From 8bdef8754e59a5a0408c4249f44b8cf3e7471c9d Mon Sep 17 00:00:00 2001
From: owenofbrien <86964623+owenofbrien@users.noreply.github.com>
Date: Fri, 24 Oct 2025 10:11:42 -0700
Subject: [PATCH 01/73] Stop logging session ids on extension events (#11941)

---
 .../clearcut-logger/clearcut-logger.test.ts   |  2 +-
 .../clearcut-logger/clearcut-logger.ts        | 68 +++++++++++--------
 2 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts
index 700e67591e..10705cd24f 100644
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.test.ts
@@ -413,7 +413,7 @@ describe('ClearcutLogger', () => {
           vi.stubEnv('CURSOR_TRACE_ID', '');
         }
         const event = logger?.createLogEvent(EventNames.API_ERROR, []);
-        expect(event?.event_metadata[0][3]).toEqual({
+        expect(event?.event_metadata[0]).toContainEqual({
           gemini_cli_key: EventMetadataKey.GEMINI_CLI_SURFACE,
           value: expectedValue,
         });
diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
index 2ab3cf2441..93eec836ef 100644
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
@@ -250,6 +250,39 @@ export class ClearcutLogger {
     }
   }
 
+  createBasicLogEvent(
+    eventName: EventNames,
+    data: EventValue[] = [],
+  ): LogEvent {
+    const surface = determineSurface();
+    return {
+      console_type: 'GEMINI_CLI',
+      application: 102, // GEMINI_CLI
+      event_name: eventName as string,
+      event_metadata: [
+        [
+          ...data,
+          {
+            gemini_cli_key: EventMetadataKey.GEMINI_CLI_SURFACE,
+            value: surface,
+          },
+          {
+            gemini_cli_key: EventMetadataKey.GEMINI_CLI_VERSION,
+            value: CLI_VERSION,
+          },
+          {
+            gemini_cli_key: EventMetadataKey.GEMINI_CLI_GIT_COMMIT_HASH,
+            value: GIT_COMMIT_INFO,
+          },
+          {
+            gemini_cli_key: EventMetadataKey.GEMINI_CLI_OS,
+            value: process.platform,
+          },
+        ],
+      ],
+    };
+  }
+
   createLogEvent(eventName: EventNames, data: EventValue[] = []): LogEvent {
     const email = this.userAccountManager.getCachedGoogleAccount();
 
@@ -260,12 +293,7 @@ export class ClearcutLogger {
 
     data = this.addDefaultFields(data, totalAccounts);
 
-    const logEvent: LogEvent = {
-      console_type: 'GEMINI_CLI',
-      application: 102, // GEMINI_CLI
-      event_name: eventName as string,
-      event_metadata: [data],
-    };
+    const logEvent = this.createBasicLogEvent(eventName, data);
 
     // Should log either email or install ID, not both. See go/cloudmill-1p-oss-instrumentation#define-sessionable-id
     if (email) {
@@ -921,7 +949,7 @@ export class ClearcutLogger {
     ];
 
     this.enqueueLogEvent(
-      this.createLogEvent(EventNames.EXTENSION_INSTALL, data),
+      this.createBasicLogEvent(EventNames.EXTENSION_INSTALL, data),
     );
     this.flushToClearcut().catch((error) => {
       debugLogger.debug('Error flushing to Clearcut:', error);
@@ -945,7 +973,7 @@ export class ClearcutLogger {
     ];
 
     this.enqueueLogEvent(
-      this.createLogEvent(EventNames.EXTENSION_UNINSTALL, data),
+      this.createBasicLogEvent(EventNames.EXTENSION_UNINSTALL, data),
     );
     this.flushToClearcut().catch((error) => {
       debugLogger.debug('Error flushing to Clearcut:', error);
@@ -981,7 +1009,7 @@ export class ClearcutLogger {
     ];
 
     this.enqueueLogEvent(
-      this.createLogEvent(EventNames.EXTENSION_UPDATE, data),
+      this.createBasicLogEvent(EventNames.EXTENSION_UPDATE, data),
     );
     this.flushToClearcut().catch((error) => {
       debugLogger.debug('Error flushing to Clearcut:', error);
@@ -1070,7 +1098,7 @@ export class ClearcutLogger {
     ];
 
     this.enqueueLogEvent(
-      this.createLogEvent(EventNames.EXTENSION_ENABLE, data),
+      this.createBasicLogEvent(EventNames.EXTENSION_ENABLE, data),
     );
     this.flushToClearcut().catch((error) => {
       debugLogger.debug('Error flushing to Clearcut:', error);
@@ -1109,7 +1137,7 @@ export class ClearcutLogger {
     ];
 
     this.enqueueLogEvent(
-      this.createLogEvent(EventNames.EXTENSION_DISABLE, data),
+      this.createBasicLogEvent(EventNames.EXTENSION_DISABLE, data),
     );
     this.flushToClearcut().catch((error) => {
       debugLogger.debug('Error flushing to Clearcut:', error);
@@ -1207,8 +1235,6 @@ export class ClearcutLogger {
    * should exist on all log events.
    */
   addDefaultFields(data: EventValue[], totalAccounts: number): EventValue[] {
-    const surface = determineSurface();
-
     const defaultLogMetadata: EventValue[] = [
       {
         gemini_cli_key: EventMetadataKey.GEMINI_CLI_SESSION_ID,
@@ -1224,26 +1250,10 @@ export class ClearcutLogger {
         gemini_cli_key: EventMetadataKey.GEMINI_CLI_GOOGLE_ACCOUNTS_COUNT,
         value: `${totalAccounts}`,
       },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_SURFACE,
-        value: surface,
-      },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_VERSION,
-        value: CLI_VERSION,
-      },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_GIT_COMMIT_HASH,
-        value: GIT_COMMIT_INFO,
-      },
       {
         gemini_cli_key: EventMetadataKey.GEMINI_CLI_PROMPT_ID,
         value: this.promptId,
       },
-      {
-        gemini_cli_key: EventMetadataKey.GEMINI_CLI_OS,
-        value: process.platform,
-      },
       {
         gemini_cli_key: EventMetadataKey.GEMINI_CLI_NODE_VERSION,
         value: process.versions.node,

From a123a813b25ae9f64a39c2d0033f3a9196106b0a Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@google.com>
Date: Fri, 24 Oct 2025 10:45:58 -0700
Subject: [PATCH 02/73] Fix(cli): Use the correct extensionPath (#11896)

---
 packages/cli/src/config/extension-manager.ts |  3 +--
 packages/cli/src/config/extension.test.ts    | 26 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts
index d175b8382c..9fb8263758 100644
--- a/packages/cli/src/config/extension-manager.ts
+++ b/packages/cli/src/config/extension-manager.ts
@@ -474,11 +474,10 @@ export class ExtensionManager {
           `Invalid configuration in ${configFilePath}: missing ${!rawConfig.name ? '"name"' : '"version"'}`,
         );
       }
-      const installDir = new ExtensionStorage(rawConfig.name).getExtensionDir();
       const config = recursivelyHydrateStrings(
         rawConfig as unknown as JsonObject,
         {
-          extensionPath: installDir,
+          extensionPath: extensionDir,
           workspacePath: this.workspaceDir,
           '/': path.sep,
           pathSeparator: path.sep,
diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index 9d81a26be2..e616246cce 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -285,6 +285,32 @@ describe('extension tests', () => {
       ]);
     });
 
+    it('should hydrate ${extensionPath} correctly for linked extensions', async () => {
+      const sourceExtDir = createExtension({
+        extensionsDir: tempWorkspaceDir,
+        name: 'my-linked-extension-with-path',
+        version: '1.0.0',
+        mcpServers: {
+          'test-server': {
+            command: 'node',
+            args: ['${extensionPath}/server/index.js'],
+            cwd: '${extensionPath}/server',
+          },
+        },
+      });
+
+      await extensionManager.installOrUpdateExtension({
+        source: sourceExtDir,
+        type: 'link',
+      });
+
+      const extensions = extensionManager.loadExtensions();
+      expect(extensions).toHaveLength(1);
+      expect(extensions[0].mcpServers?.['test-server'].cwd).toBe(
+        path.join(sourceExtDir, 'server'),
+      );
+    });
+
     it('should resolve environment variables in extension configuration', () => {
       process.env['TEST_API_KEY'] = 'test-api-key-123';
       process.env['TEST_DB_URL'] = 'postgresql://localhost:5432/testdb';

From 25996ae037c5d05a1cee515ae9f1c187986f6c4d Mon Sep 17 00:00:00 2001
From: shishu314 <shishu_1998@yahoo.com>
Date: Fri, 24 Oct 2025 13:52:07 -0400
Subject: [PATCH 03/73] fix(security) - Use emitFeedback (#11961)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com>
---
 .../keychain-token-storage.test.ts            | 44 +++++++++++++++++--
 .../token-storage/keychain-token-storage.ts   | 20 ++++++---
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts b/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
index 5b34ed01b5..3b97902f19 100644
--- a/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
+++ b/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
@@ -7,6 +7,7 @@
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import type { KeychainTokenStorage } from './keychain-token-storage.js';
 import type { OAuthCredentials } from './types.js';
+import { coreEvents } from '../../utils/events.js';
 
 // Hoist the mock to be available in the vi.mock factory
 const mockKeytar = vi.hoisted(() => ({
@@ -30,6 +31,12 @@ vi.mock('node:crypto', () => ({
   })),
 }));
 
+vi.mock('../../utils/events.js', () => ({
+  coreEvents: {
+    emitFeedback: vi.fn(),
+  },
+}));
+
 describe('KeychainTokenStorage', () => {
   let storage: KeychainTokenStorage;
 
@@ -82,7 +89,8 @@ describe('KeychainTokenStorage', () => {
     });
 
     it('should return false if keytar fails to set password', async () => {
-      mockKeytar.setPassword.mockRejectedValue(new Error('write error'));
+      const error = new Error('write error');
+      mockKeytar.setPassword.mockRejectedValue(error);
       const isAvailable = await storage.checkKeychainAvailability();
       expect(isAvailable).toBe(false);
     });
@@ -265,14 +273,20 @@ describe('KeychainTokenStorage', () => {
       });
 
       it('should return an empty array on error', async () => {
-        mockKeytar.findCredentials.mockRejectedValue(new Error('find error'));
+        const error = new Error('find error');
+        mockKeytar.findCredentials.mockRejectedValue(error);
         const result = await storage.listServers();
         expect(result).toEqual([]);
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to list servers from keychain',
+          error,
+        );
       });
     });
 
     describe('getAllCredentials', () => {
-      it('should return a map of all valid credentials', async () => {
+      it('should return a map of all valid credentials and emit feedback for invalid ones', async () => {
         const creds2 = {
           ...validCredentials,
           serverName: 'server2',
@@ -310,6 +324,30 @@ describe('KeychainTokenStorage', () => {
         expect(result.has('expired-server')).toBe(false);
         expect(result.has('bad-server')).toBe(false);
         expect(result.has('invalid-server')).toBe(false);
+
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to parse credentials for bad-server',
+          expect.any(SyntaxError),
+        );
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to parse credentials for invalid-server',
+          expect.any(Error),
+        );
+      });
+
+      it('should emit feedback and return empty map if findCredentials fails', async () => {
+        const error = new Error('find all error');
+        mockKeytar.findCredentials.mockRejectedValue(error);
+
+        const result = await storage.getAllCredentials();
+        expect(result.size).toBe(0);
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to get all credentials from keychain',
+          error,
+        );
       });
     });
 
diff --git a/packages/core/src/mcp/token-storage/keychain-token-storage.ts b/packages/core/src/mcp/token-storage/keychain-token-storage.ts
index 70eccbadf5..aa8cee2e9d 100644
--- a/packages/core/src/mcp/token-storage/keychain-token-storage.ts
+++ b/packages/core/src/mcp/token-storage/keychain-token-storage.ts
@@ -7,6 +7,7 @@
 import * as crypto from 'node:crypto';
 import { BaseTokenStorage } from './base-token-storage.js';
 import type { OAuthCredentials } from './types.js';
+import { coreEvents } from '../../utils/events.js';
 
 interface Keytar {
   getPassword(service: string, account: string): Promise<string | null>;
@@ -42,7 +43,7 @@ export class KeychainTokenStorage extends BaseTokenStorage {
       const module = await import(moduleName);
       this.keytarModule = module.default || module;
     } catch (error) {
-      console.error(error);
+      coreEvents.emitFeedback('error', "Failed to load 'keytar' module", error);
     }
     return this.keytarModule;
   }
@@ -139,7 +140,11 @@ export class KeychainTokenStorage extends BaseTokenStorage {
         .filter((cred) => !cred.account.startsWith(KEYCHAIN_TEST_PREFIX))
         .map((cred: { account: string }) => cred.account);
     } catch (error) {
-      console.error('Failed to list servers from keychain:', error);
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to list servers from keychain',
+        error,
+      );
       return [];
     }
   }
@@ -167,14 +172,19 @@ export class KeychainTokenStorage extends BaseTokenStorage {
             result.set(cred.account, data);
           }
         } catch (error) {
-          console.error(
-            `Failed to parse credentials for ${cred.account}:`,
+          coreEvents.emitFeedback(
+            'error',
+            `Failed to parse credentials for ${cred.account}`,
             error,
           );
         }
       }
     } catch (error) {
-      console.error('Failed to get all credentials from keychain:', error);
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to get all credentials from keychain',
+        error,
+      );
     }
 
     return result;

From c2104a14fbd0de383a2ecd2e70889252bef36c33 Mon Sep 17 00:00:00 2001
From: shishu314 <shishu_1998@yahoo.com>
Date: Fri, 24 Oct 2025 14:07:11 -0400
Subject: [PATCH 04/73] fix(security) - Use emitFeedback instead of console
 error (#11948)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com>
---
 .../core/src/mcp/oauth-token-storage.test.ts  | 47 +++++++++++++------
 packages/core/src/mcp/oauth-token-storage.ts  | 17 +++++--
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/packages/core/src/mcp/oauth-token-storage.test.ts b/packages/core/src/mcp/oauth-token-storage.test.ts
index cd8841aaee..16abf5a6ad 100644
--- a/packages/core/src/mcp/oauth-token-storage.test.ts
+++ b/packages/core/src/mcp/oauth-token-storage.test.ts
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import { coreEvents } from '@google/gemini-cli-core';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { promises as fs } from 'node:fs';
 import * as path from 'node:path';
@@ -33,6 +34,12 @@ vi.mock('../config/storage.js', () => ({
   },
 }));
 
+vi.mock('@google/gemini-cli-core', () => ({
+  coreEvents: {
+    emitFeedback: vi.fn(),
+  },
+}));
+
 const mockHybridTokenStorage = {
   listServers: vi.fn(),
   setCredentials: vi.fn(),
@@ -72,7 +79,6 @@ describe('MCPOAuthTokenStorage', () => {
       tokenStorage = new MCPOAuthTokenStorage();
 
       vi.clearAllMocks();
-      vi.spyOn(console, 'error');
     });
 
     afterEach(() => {
@@ -87,7 +93,7 @@ describe('MCPOAuthTokenStorage', () => {
         const tokens = await tokenStorage.getAllCredentials();
 
         expect(tokens.size).toBe(0);
-        expect(console.error).not.toHaveBeenCalled();
+        expect(coreEvents.emitFeedback).not.toHaveBeenCalled();
       });
 
       it('should load tokens from file successfully', async () => {
@@ -110,8 +116,10 @@ describe('MCPOAuthTokenStorage', () => {
         const tokens = await tokenStorage.getAllCredentials();
 
         expect(tokens.size).toBe(0);
-        expect(console.error).toHaveBeenCalledWith(
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
           expect.stringContaining('Failed to load MCP OAuth tokens'),
+          expect.any(Error),
         );
       });
 
@@ -122,8 +130,10 @@ describe('MCPOAuthTokenStorage', () => {
         const tokens = await tokenStorage.getAllCredentials();
 
         expect(tokens.size).toBe(0);
-        expect(console.error).toHaveBeenCalledWith(
-          expect.stringContaining('Failed to load MCP OAuth tokens'),
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to load MCP OAuth tokens: Permission denied',
+          error,
         );
       });
     });
@@ -188,8 +198,10 @@ describe('MCPOAuthTokenStorage', () => {
           tokenStorage.saveToken('test-server', mockToken),
         ).rejects.toThrow('Disk full');
 
-        expect(console.error).toHaveBeenCalledWith(
-          expect.stringContaining('Failed to save MCP OAuth token'),
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to save MCP OAuth token: Disk full',
+          writeError,
         );
       });
     });
@@ -277,12 +289,15 @@ describe('MCPOAuthTokenStorage', () => {
         vi.mocked(fs.readFile).mockResolvedValue(
           JSON.stringify([mockCredentials]),
         );
-        vi.mocked(fs.unlink).mockRejectedValue(new Error('Permission denied'));
+        const unlinkError = new Error('Permission denied');
+        vi.mocked(fs.unlink).mockRejectedValue(unlinkError);
 
         await tokenStorage.deleteCredentials('test-server');
 
-        expect(console.error).toHaveBeenCalledWith(
-          expect.stringContaining('Failed to remove MCP OAuth token'),
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to remove MCP OAuth token: Permission denied',
+          unlinkError,
         );
       });
     });
@@ -347,16 +362,19 @@ describe('MCPOAuthTokenStorage', () => {
 
         await tokenStorage.clearAll();
 
-        expect(console.error).not.toHaveBeenCalled();
+        expect(coreEvents.emitFeedback).not.toHaveBeenCalled();
       });
 
       it('should handle other file errors gracefully', async () => {
-        vi.mocked(fs.unlink).mockRejectedValue(new Error('Permission denied'));
+        const unlinkError = new Error('Permission denied');
+        vi.mocked(fs.unlink).mockRejectedValue(unlinkError);
 
         await tokenStorage.clearAll();
 
-        expect(console.error).toHaveBeenCalledWith(
-          expect.stringContaining('Failed to clear MCP OAuth tokens'),
+        expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+          'error',
+          'Failed to clear MCP OAuth tokens: Permission denied',
+          unlinkError,
         );
       });
     });
@@ -368,7 +386,6 @@ describe('MCPOAuthTokenStorage', () => {
       tokenStorage = new MCPOAuthTokenStorage();
 
       vi.clearAllMocks();
-      vi.spyOn(console, 'error');
     });
 
     afterEach(() => {
diff --git a/packages/core/src/mcp/oauth-token-storage.ts b/packages/core/src/mcp/oauth-token-storage.ts
index d9d98ff417..66ccba29b6 100644
--- a/packages/core/src/mcp/oauth-token-storage.ts
+++ b/packages/core/src/mcp/oauth-token-storage.ts
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import { coreEvents } from '@google/gemini-cli-core';
 import { promises as fs } from 'node:fs';
 import * as path from 'node:path';
 import { Storage } from '../config/storage.js';
@@ -68,8 +69,10 @@ export class MCPOAuthTokenStorage implements TokenStorage {
     } catch (error) {
       // File doesn't exist or is invalid, return empty map
       if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
-        console.error(
+        coreEvents.emitFeedback(
+          'error',
           `Failed to load MCP OAuth tokens: ${getErrorMessage(error)}`,
+          error,
         );
       }
     }
@@ -102,8 +105,10 @@ export class MCPOAuthTokenStorage implements TokenStorage {
         { mode: 0o600 }, // Restrict file permissions
       );
     } catch (error) {
-      console.error(
+      coreEvents.emitFeedback(
+        'error',
         `Failed to save MCP OAuth token: ${getErrorMessage(error)}`,
+        error,
       );
       throw error;
     }
@@ -181,8 +186,10 @@ export class MCPOAuthTokenStorage implements TokenStorage {
           });
         }
       } catch (error) {
-        console.error(
+        coreEvents.emitFeedback(
+          'error',
           `Failed to remove MCP OAuth token: ${getErrorMessage(error)}`,
+          error,
         );
       }
     }
@@ -216,8 +223,10 @@ export class MCPOAuthTokenStorage implements TokenStorage {
       await fs.unlink(tokenFile);
     } catch (error) {
       if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
-        console.error(
+        coreEvents.emitFeedback(
+          'error',
           `Failed to clear MCP OAuth tokens: ${getErrorMessage(error)}`,
+          error,
         );
       }
     }

From ee92db7533d33335f4146359a9338d451296105f Mon Sep 17 00:00:00 2001
From: Gaurav <39389231+gsquared94@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:09:06 -0700
Subject: [PATCH 05/73] fix: handle request retries and model fallback
 correctly (#11624)

---
 .../src/ui/hooks/useQuotaAndFallback.test.ts  |  99 +++--
 .../cli/src/ui/hooks/useQuotaAndFallback.ts   |  29 +-
 packages/core/index.ts                        |   2 +
 packages/core/src/index.ts                    |   1 +
 packages/core/src/utils/errorParsing.test.ts  | 244 ------------
 packages/core/src/utils/errorParsing.ts       |  91 +----
 packages/core/src/utils/flashFallback.test.ts |  76 ++--
 packages/core/src/utils/googleErrors.test.ts  | 356 ++++++++++++++++++
 packages/core/src/utils/googleErrors.ts       | 305 +++++++++++++++
 .../core/src/utils/googleQuotaErrors.test.ts  | 306 +++++++++++++++
 packages/core/src/utils/googleQuotaErrors.ts  | 192 ++++++++++
 .../core/src/utils/quotaErrorDetection.ts     |  65 ----
 packages/core/src/utils/retry.test.ts         | 181 +++------
 packages/core/src/utils/retry.ts              | 214 +++--------
 14 files changed, 1357 insertions(+), 804 deletions(-)
 create mode 100644 packages/core/src/utils/googleErrors.test.ts
 create mode 100644 packages/core/src/utils/googleErrors.ts
 create mode 100644 packages/core/src/utils/googleQuotaErrors.test.ts
 create mode 100644 packages/core/src/utils/googleQuotaErrors.ts

diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
index 6d7782694f..0e94a1874d 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -19,25 +19,15 @@ import {
   type FallbackModelHandler,
   UserTierId,
   AuthType,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
   makeFakeConfig,
+  type GoogleApiError,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useQuotaAndFallback } from './useQuotaAndFallback.js';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 import { AuthState, MessageType } from '../types.js';
 
-// Mock the error checking functions from the core package to control test scenarios
-vi.mock('@google/gemini-cli-core', async (importOriginal) => {
-  const original =
-    await importOriginal<typeof import('@google/gemini-cli-core')>();
-  return {
-    ...original,
-    isGenericQuotaExceededError: vi.fn(),
-    isProQuotaExceededError: vi.fn(),
-  };
-});
-
 // Use a type alias for SpyInstance as it's not directly exported
 type SpyInstance = ReturnType<typeof vi.spyOn>;
 
@@ -47,12 +37,15 @@ describe('useQuotaAndFallback', () => {
   let mockSetAuthState: Mock;
   let mockSetModelSwitchedFromQuotaError: Mock;
   let setFallbackHandlerSpy: SpyInstance;
-
-  const mockedIsGenericQuotaExceededError = isGenericQuotaExceededError as Mock;
-  const mockedIsProQuotaExceededError = isProQuotaExceededError as Mock;
+  let mockGoogleApiError: GoogleApiError;
 
   beforeEach(() => {
     mockConfig = makeFakeConfig();
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };
 
     // Spy on the method that requires the private field and mock its return.
     // This is cleaner than modifying the config class for tests.
@@ -72,9 +65,6 @@ describe('useQuotaAndFallback', () => {
 
     setFallbackHandlerSpy = vi.spyOn(mockConfig, 'setFallbackModelHandler');
     vi.spyOn(mockConfig, 'setQuotaErrorOccurred');
-
-    mockedIsGenericQuotaExceededError.mockReturnValue(false);
-    mockedIsProQuotaExceededError.mockReturnValue(false);
   });
 
   afterEach(() => {
@@ -140,51 +130,62 @@ describe('useQuotaAndFallback', () => {
     describe('Automatic Fallback Scenarios', () => {
       const testCases = [
         {
-          errorType: 'generic',
+          description: 'other error for FREE tier',
           tier: UserTierId.FREE,
+          error: new Error('some error'),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
             'upgrade to a Gemini Code Assist Standard or Enterprise plan',
           ],
         },
         {
-          errorType: 'generic',
-          tier: UserTierId.STANDARD, // Paid tier
+          description: 'other error for LEGACY tier',
+          tier: UserTierId.LEGACY, // Paid tier
+          error: new Error('some error'),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B',
+            'Automatically switching from model-A to model-B for faster responses',
             'switch to using a paid API key from AI Studio',
           ],
         },
         {
-          errorType: 'other',
+          description: 'retryable quota error for FREE tier',
           tier: UserTierId.FREE,
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
-            'upgrade to a Gemini Code Assist Standard or Enterprise plan',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
+            'upgrading to a Gemini Code Assist Standard or Enterprise plan',
           ],
         },
         {
-          errorType: 'other',
+          description: 'retryable quota error for LEGACY tier',
           tier: UserTierId.LEGACY, // Paid tier
+          error: new RetryableQuotaError(
+            'retryable quota',
+            mockGoogleApiError,
+            5,
+          ),
           expectedMessageSnippets: [
-            'Automatically switching from model-A to model-B for faster responses',
+            'Your requests are being throttled right now due to server being at capacity for model-A',
+            'Automatically switching from model-A to model-B',
             'switch to using a paid API key from AI Studio',
           ],
         },
       ];
 
-      for (const { errorType, tier, expectedMessageSnippets } of testCases) {
-        it(`should handle ${errorType} error for ${tier} tier correctly`, async () => {
-          mockedIsGenericQuotaExceededError.mockReturnValue(
-            errorType === 'generic',
-          );
-
+      for (const {
+        description,
+        tier,
+        error,
+        expectedMessageSnippets,
+      } of testCases) {
+        it(`should handle ${description} correctly`, async () => {
           const handler = getRegisteredHandler(tier);
-          const result = await handler(
-            'model-A',
-            'model-B',
-            new Error('quota exceeded'),
-          );
+          const result = await handler('model-A', 'model-B', error);
 
           // Automatic fallbacks should return 'stop'
           expect(result).toBe('stop');
@@ -207,10 +208,6 @@ describe('useQuotaAndFallback', () => {
     });
 
     describe('Interactive Fallback (Pro Quota Error)', () => {
-      beforeEach(() => {
-        mockedIsProQuotaExceededError.mockReturnValue(true);
-      });
-
       it('should set an interactive request and wait for user choice', async () => {
         const { result } = renderHook(() =>
           useQuotaAndFallback({
@@ -229,7 +226,7 @@ describe('useQuotaAndFallback', () => {
         const promise = handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota'),
+          new TerminalQuotaError('pro quota', mockGoogleApiError),
         );
 
         await act(async () => {});
@@ -268,7 +265,7 @@ describe('useQuotaAndFallback', () => {
         const promise1 = handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota 1'),
+          new TerminalQuotaError('pro quota 1', mockGoogleApiError),
         );
         await act(async () => {});
 
@@ -278,7 +275,7 @@ describe('useQuotaAndFallback', () => {
         const result2 = await handler(
           'gemini-pro',
           'gemini-flash',
-          new Error('pro quota 2'),
+          new TerminalQuotaError('pro quota 2', mockGoogleApiError),
         );
 
         // The lock should have stopped the second request
@@ -297,10 +294,6 @@ describe('useQuotaAndFallback', () => {
   });
 
   describe('handleProQuotaChoice', () => {
-    beforeEach(() => {
-      mockedIsProQuotaExceededError.mockReturnValue(true);
-    });
-
     it('should do nothing if there is no pending pro quota request', () => {
       const { result } = renderHook(() =>
         useQuotaAndFallback({
@@ -336,7 +329,7 @@ describe('useQuotaAndFallback', () => {
       const promise = handler(
         'gemini-pro',
         'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
       );
       await act(async () => {}); // Allow state to update
 
@@ -367,7 +360,7 @@ describe('useQuotaAndFallback', () => {
       const promise = handler(
         'gemini-pro',
         'gemini-flash',
-        new Error('pro quota'),
+        new TerminalQuotaError('pro quota', mockGoogleApiError),
       );
       await act(async () => {}); // Allow state to update
 
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
index a7eb77659a..194f5f27fc 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.ts
@@ -9,9 +9,9 @@ import {
   type Config,
   type FallbackModelHandler,
   type FallbackIntent,
-  isGenericQuotaExceededError,
-  isProQuotaExceededError,
+  TerminalQuotaError,
   UserTierId,
+  RetryableQuotaError,
 } from '@google/gemini-cli-core';
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { type UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -63,7 +63,7 @@ export function useQuotaAndFallback({
 
       let message: string;
 
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
         // Pro Quota specific messages (Interactive)
         if (isPaidTier) {
           message = `⚡ You have reached your daily ${failedModel} quota limit.
@@ -76,31 +76,30 @@ export function useQuotaAndFallback({
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
         }
-      } else if (error && isGenericQuotaExceededError(error)) {
-        // Generic Quota (Automatic fallback)
-        const actionMessage = `⚡ You have reached your daily quota limit.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
+      } else if (error instanceof RetryableQuotaError) {
+        // Short term quota retries exhausted (Automatic fallback)
+        const actionMessage = `⚡ Your requests are being throttled right now due to server being at capacity for ${failedModel}.\n⚡ Automatically switching from ${failedModel} to ${fallbackModel} for the remainder of this session.`;
 
         if (isPaidTier) {
           message = `${actionMessage}
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ To continue accessing the ${failedModel} model, retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
         } else {
           message = `${actionMessage}
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
-⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ Retry your requests after some time. Otherwise consider upgrading to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ You can switch authentication methods by typing /auth`;
         }
       } else {
-        // Consecutive 429s or other errors (Automatic fallback)
+        // Other errors (Automatic fallback)
         const actionMessage = `⚡ Automatically switching from ${failedModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
 
         if (isPaidTier) {
           message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To continue accessing the ${failedModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To continue accessing the ${failedModel} model, you can retry your request after some time or consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
         } else {
           message = `${actionMessage}
-⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${failedModel} quota limit
-⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Your requests are being throttled temporarily due to server being at capacity for ${failedModel} or there is a service outage.
+⚡ To avoid being throttled, you can retry your request after some time or upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
         }
@@ -119,7 +118,7 @@ export function useQuotaAndFallback({
       config.setQuotaErrorOccurred(true);
 
       // Interactive Fallback for Pro quota
-      if (error && isProQuotaExceededError(error)) {
+      if (error instanceof TerminalQuotaError) {
         if (isDialogPending.current) {
           return 'stop'; // A dialog is already active, so just stop this request.
         }
diff --git a/packages/core/index.ts b/packages/core/index.ts
index 729fcc8d48..acc9743e61 100644
--- a/packages/core/index.ts
+++ b/packages/core/index.ts
@@ -44,3 +44,5 @@ export { makeFakeConfig } from './src/test-utils/config.js';
 export * from './src/utils/pathReader.js';
 export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js';
 export { logModelSlashCommand } from './src/telemetry/loggers.js';
+export * from './src/utils/googleQuotaErrors.js';
+export type { GoogleApiError } from './src/utils/googleErrors.js';
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 42ced4457f..bc2eab2147 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -48,6 +48,7 @@ export * from './utils/gitIgnoreParser.js';
 export * from './utils/gitUtils.js';
 export * from './utils/editor.js';
 export * from './utils/quotaErrorDetection.js';
+export * from './utils/googleQuotaErrors.js';
 export * from './utils/fileUtils.js';
 export * from './utils/retry.js';
 export * from './utils/shell-utils.js';
diff --git a/packages/core/src/utils/errorParsing.test.ts b/packages/core/src/utils/errorParsing.test.ts
index 9c71f4d89b..291145d2e8 100644
--- a/packages/core/src/utils/errorParsing.test.ts
+++ b/packages/core/src/utils/errorParsing.test.ts
@@ -6,9 +6,7 @@
 
 import { describe, it, expect } from 'vitest';
 import { parseAndFormatApiError } from './errorParsing.js';
-import { isProQuotaExceededError } from './quotaErrorDetection.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
-import { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 import type { StructuredError } from '../core/turn.js';
 
@@ -40,22 +38,6 @@ describe('parseAndFormatApiError', () => {
     );
   });
 
-  it('should format a 429 API error with the personal message', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-  });
-
   it('should format a 429 API error with the vertex message', () => {
     const errorMessage =
       'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
@@ -132,230 +114,4 @@ describe('parseAndFormatApiError', () => {
     const expected = '[API Error: An unknown error occurred.]';
     expect(parseAndFormatApiError(error)).toBe(expected);
   });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
-    );
-    expect(result).not.toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-  });
-
-  it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).not.toContain(
-      'You have reached your daily Gemini 2.5 Pro quota limit',
-    );
-  });
-
-  it('should prioritize Pro quota message over generic quota message for Google auth', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).not.toContain('You have reached your daily quota limit');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.LEGACY,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
-    );
-    expect(result).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
-    const errorMessage25 =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const errorMessagePreview =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-
-    const result25 = parseAndFormatApiError(
-      errorMessage25,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    const resultPreview = parseAndFormatApiError(
-      errorMessagePreview,
-      AuthType.LOGIN_WITH_GOOGLE,
-      undefined,
-      'gemini-2.5-preview-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-
-    expect(result25).toContain(
-      'You have reached your daily gemini-2.5-pro quota limit',
-    );
-    expect(resultPreview).toContain(
-      'You have reached your daily gemini-2.5-preview-pro quota limit',
-    );
-    expect(result25).toContain('upgrade to get higher limits');
-    expect(resultPreview).toContain('upgrade to get higher limits');
-  });
-
-  it('should not match non-Pro models with similar version strings', () => {
-    // Test that Flash models with similar version strings don't match
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test other model types
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
-      ),
-    ).toBe(false);
-
-    // Test generic quota messages
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'GenerationRequests' and limit",
-      ),
-    ).toBe(false);
-    expect(
-      isProQuotaExceededError(
-        "Quota exceeded for quota metric 'EmbeddingRequests' and limit",
-      ),
-    ).toBe(false);
-  });
-
-  it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain(
-      "[API Error: Quota exceeded for quota metric 'GenerationRequests'",
-    );
-    expect(result).toContain('You have reached your daily quota limit');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
-
-  it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
-    const errorMessage =
-      'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
-    const result = parseAndFormatApiError(
-      errorMessage,
-      AuthType.LOGIN_WITH_GOOGLE,
-      UserTierId.STANDARD,
-      'gemini-2.5-pro',
-      DEFAULT_GEMINI_FLASH_MODEL,
-    );
-    expect(result).toContain('[API Error: Rate limit exceeded');
-    expect(result).toContain(
-      'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
-    );
-    expect(result).not.toContain('upgrade to get higher limits');
-  });
 });
diff --git a/packages/core/src/utils/errorParsing.ts b/packages/core/src/utils/errorParsing.ts
index ecfc237573..bad61ea9e2 100644
--- a/packages/core/src/utils/errorParsing.ts
+++ b/packages/core/src/utils/errorParsing.ts
@@ -4,50 +4,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import {
-  isProQuotaExceededError,
-  isGenericQuotaExceededError,
-  isApiError,
-  isStructuredError,
-} from './quotaErrorDetection.js';
-import {
-  DEFAULT_GEMINI_MODEL,
-  DEFAULT_GEMINI_FLASH_MODEL,
-} from '../config/models.js';
-import { UserTierId } from '../code_assist/types.js';
+import { isApiError, isStructuredError } from './quotaErrorDetection.js';
+import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
+import type { UserTierId } from '../code_assist/types.js';
 import { AuthType } from '../core/contentGenerator.js';
 
-// Free Tier message functions
-const getRateLimitErrorMessageGoogleFree = (
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
-
-const getRateLimitErrorMessageGoogleProQuotaFree = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
-  `\nYou have reached your daily quota limit. To increase your limits, upgrade to get higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-// Legacy/Standard Tier message functions
-const getRateLimitErrorMessageGooglePaid = (
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
-
-const getRateLimitErrorMessageGoogleProQuotaPaid = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-  fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
-) =>
-  `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
-
-const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
-  currentModel: string = DEFAULT_GEMINI_MODEL,
-) =>
-  `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
 const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
   '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
 const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
@@ -59,39 +20,9 @@ const getRateLimitErrorMessageDefault = (
 
 function getRateLimitMessage(
   authType?: AuthType,
-  error?: unknown,
-  userTier?: UserTierId,
-  currentModel?: string,
   fallbackModel?: string,
 ): string {
   switch (authType) {
-    case AuthType.LOGIN_WITH_GOOGLE: {
-      // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
-      const isPaidTier =
-        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
-
-      if (isProQuotaExceededError(error)) {
-        return isPaidTier
-          ? getRateLimitErrorMessageGoogleProQuotaPaid(
-              currentModel || DEFAULT_GEMINI_MODEL,
-              fallbackModel,
-            )
-          : getRateLimitErrorMessageGoogleProQuotaFree(
-              currentModel || DEFAULT_GEMINI_MODEL,
-              fallbackModel,
-            );
-      } else if (isGenericQuotaExceededError(error)) {
-        return isPaidTier
-          ? getRateLimitErrorMessageGoogleGenericQuotaPaid(
-              currentModel || DEFAULT_GEMINI_MODEL,
-            )
-          : getRateLimitErrorMessageGoogleGenericQuotaFree();
-      } else {
-        return isPaidTier
-          ? getRateLimitErrorMessageGooglePaid(fallbackModel)
-          : getRateLimitErrorMessageGoogleFree(fallbackModel);
-      }
-    }
     case AuthType.USE_GEMINI:
       return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
     case AuthType.USE_VERTEX_AI:
@@ -111,13 +42,7 @@ export function parseAndFormatApiError(
   if (isStructuredError(error)) {
     let text = `[API Error: ${error.message}]`;
     if (error.status === 429) {
-      text += getRateLimitMessage(
-        authType,
-        error,
-        userTier,
-        currentModel,
-        fallbackModel,
-      );
+      text += getRateLimitMessage(authType, fallbackModel);
     }
     return text;
   }
@@ -146,13 +71,7 @@ export function parseAndFormatApiError(
         }
         let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
         if (parsedError.error.code === 429) {
-          text += getRateLimitMessage(
-            authType,
-            parsedError,
-            userTier,
-            currentModel,
-            fallbackModel,
-          );
+          text += getRateLimitMessage(authType, fallbackModel);
         }
         return text;
       }
diff --git a/packages/core/src/utils/flashFallback.test.ts b/packages/core/src/utils/flashFallback.test.ts
index 8ef9665f42..a3f08f5df6 100644
--- a/packages/core/src/utils/flashFallback.test.ts
+++ b/packages/core/src/utils/flashFallback.test.ts
@@ -11,7 +11,6 @@ import {
   setSimulate429,
   disableSimulationAfterFallback,
   shouldSimulate429,
-  createSimulated429Error,
   resetRequestCounter,
 } from './testUtils.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
@@ -19,12 +18,15 @@ import { retryWithBackoff } from './retry.js';
 import { AuthType } from '../core/contentGenerator.js';
 // Import the new types (Assuming this test file is in packages/core/src/utils/)
 import type { FallbackModelHandler } from '../fallback/types.js';
+import type { GoogleApiError } from './googleErrors.js';
+import { TerminalQuotaError } from './googleQuotaErrors.js';
 
 vi.mock('node:fs');
 
 // Update the description to reflect that this tests the retry utility's integration
 describe('Retry Utility Fallback Integration', () => {
   let config: Config;
+  let mockGoogleApiError: GoogleApiError;
 
   beforeEach(() => {
     vi.mocked(fs.existsSync).mockReturnValue(true);
@@ -38,6 +40,11 @@ describe('Retry Utility Fallback Integration', () => {
       cwd: '/test',
       model: 'gemini-2.5-pro',
     });
+    mockGoogleApiError = {
+      code: 429,
+      message: 'mock error',
+      details: [],
+    };
 
     // Reset simulation state for each test
     setSimulate429(false);
@@ -56,6 +63,7 @@ describe('Retry Utility Fallback Integration', () => {
     const result = await config.fallbackModelHandler!(
       'gemini-2.5-pro',
       DEFAULT_GEMINI_FLASH_MODEL,
+      new Error('test'),
     );
 
     // Verify it returns the correct intent
@@ -63,81 +71,61 @@ describe('Retry Utility Fallback Integration', () => {
   });
 
   // This test validates the retry utility's logic for triggering the callback.
-  it('should trigger onPersistent429 after 2 consecutive 429 errors for OAuth users', async () => {
+  it('should trigger onPersistent429 on TerminalQuotaError for OAuth users', async () => {
     let fallbackCalled = false;
-    // Removed fallbackModel variable as it's no longer relevant here.
 
-    // Mock function that simulates exactly 2 429 errors, then succeeds after fallback
     const mockApiCall = vi
       .fn()
-      .mockRejectedValueOnce(createSimulated429Error())
-      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      )
       .mockResolvedValueOnce('success after fallback');
 
-    // Mock the onPersistent429 callback (this is what client.ts/geminiChat.ts provides)
     const mockPersistent429Callback = vi.fn(async (_authType?: string) => {
       fallbackCalled = true;
-      // Return true to signal retryWithBackoff to reset attempts and continue.
       return true;
     });
 
-    // Test with OAuth personal auth type, with maxAttempts = 2 to ensure fallback triggers
     const result = await retryWithBackoff(mockApiCall, {
       maxAttempts: 2,
       initialDelayMs: 1,
       maxDelayMs: 10,
-      shouldRetryOnError: (error: Error) => {
-        const status = (error as Error & { status?: number }).status;
-        return status === 429;
-      },
       onPersistent429: mockPersistent429Callback,
       authType: AuthType.LOGIN_WITH_GOOGLE,
     });
 
-    // Verify fallback mechanism was triggered
     expect(fallbackCalled).toBe(true);
     expect(mockPersistent429Callback).toHaveBeenCalledWith(
       AuthType.LOGIN_WITH_GOOGLE,
-      expect.any(Error),
+      expect.any(TerminalQuotaError),
     );
     expect(result).toBe('success after fallback');
-    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
     expect(mockApiCall).toHaveBeenCalledTimes(3);
   });
 
   it('should not trigger onPersistent429 for API key users', async () => {
-    let fallbackCalled = false;
+    const fallbackCallback = vi.fn();
 
-    // Mock function that simulates 429 errors
-    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
+    const mockApiCall = vi
+      .fn()
+      .mockRejectedValueOnce(
+        new TerminalQuotaError('Daily limit', mockGoogleApiError),
+      );
 
-    // Mock the callback
-    const mockPersistent429Callback = vi.fn(async () => {
-      fallbackCalled = true;
-      return true;
+    const promise = retryWithBackoff(mockApiCall, {
+      maxAttempts: 2,
+      initialDelayMs: 1,
+      maxDelayMs: 10,
+      onPersistent429: fallbackCallback,
+      authType: AuthType.USE_GEMINI, // API key auth type
     });
 
-    // Test with API key auth type - should not trigger fallback
-    try {
-      await retryWithBackoff(mockApiCall, {
-        maxAttempts: 5,
-        initialDelayMs: 10,
-        maxDelayMs: 100,
-        shouldRetryOnError: (error: Error) => {
-          const status = (error as Error & { status?: number }).status;
-          return status === 429;
-        },
-        onPersistent429: mockPersistent429Callback,
-        authType: AuthType.USE_GEMINI, // API key auth type
-      });
-    } catch (error) {
-      // Expected to throw after max attempts
-      expect((error as Error).message).toContain('Rate limit exceeded');
-    }
-
-    // Verify fallback was NOT triggered for API key users
-    expect(fallbackCalled).toBe(false);
-    expect(mockPersistent429Callback).not.toHaveBeenCalled();
+    await expect(promise).rejects.toThrow('Daily limit');
+    expect(fallbackCallback).not.toHaveBeenCalled();
+    expect(mockApiCall).toHaveBeenCalledTimes(1);
   });
 
   // This test validates the test utilities themselves.
diff --git a/packages/core/src/utils/googleErrors.test.ts b/packages/core/src/utils/googleErrors.test.ts
new file mode 100644
index 0000000000..c051fb0310
--- /dev/null
+++ b/packages/core/src/utils/googleErrors.test.ts
@@ -0,0 +1,356 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { parseGoogleApiError } from './googleErrors.js';
+import type { QuotaFailure } from './googleErrors.js';
+
+describe('parseGoogleApiError', () => {
+  it('should return null for non-gaxios errors', () => {
+    expect(parseGoogleApiError(new Error('vanilla error'))).toBeNull();
+    expect(parseGoogleApiError(null)).toBeNull();
+    expect(parseGoogleApiError({})).toBeNull();
+  });
+
+  it('should parse a standard gaxios error', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: 'Quota exceeded',
+            details: [
+              {
+                '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                violations: [{ subject: 'user', description: 'daily limit' }],
+              },
+            ],
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+    expect(parsed?.details).toHaveLength(1);
+    const detail = parsed?.details[0] as QuotaFailure;
+    expect(detail['@type']).toBe('type.googleapis.com/google.rpc.QuotaFailure');
+    expect(detail.violations[0].description).toBe('daily limit');
+  });
+
+  it('should parse an error with details stringified in the message', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Inner quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '10s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: JSON.stringify(innerError),
+            details: [], // Top-level details are empty
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Inner quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should return null if details are not in the expected format', () => {
+    const mockError = {
+      response: {
+        status: 400,
+        data: {
+          error: {
+            code: 400,
+            message: 'Bad Request',
+            details: 'just a string', // Invalid details format
+          },
+        },
+      },
+    };
+    expect(parseGoogleApiError(mockError)).toBeNull();
+  });
+
+  it('should return null if there are no valid details', () => {
+    const mockError = {
+      response: {
+        status: 400,
+        data: {
+          error: {
+            code: 400,
+            message: 'Bad Request',
+            details: [
+              {
+                // missing '@type'
+                reason: 'some reason',
+              },
+            ],
+          },
+        },
+      },
+    };
+    expect(parseGoogleApiError(mockError)).toBeNull();
+  });
+
+  it('should parse a doubly nested error in the message', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const middleError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const mockError = {
+      response: {
+        status: 429,
+        data: {
+          error: {
+            code: 429,
+            message: JSON.stringify(middleError),
+            details: [],
+          },
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse an error that is not in a response object', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse an error that is a JSON string', () => {
+    const innerError = {
+      error: {
+        code: 429,
+        message: 'Innermost quota message',
+        details: [
+          {
+            '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+            retryDelay: '20s',
+          },
+        ],
+      },
+    };
+
+    const mockError = {
+      error: {
+        code: 429,
+        message: JSON.stringify(innerError),
+        details: [],
+      },
+    };
+
+    const parsed = parseGoogleApiError(JSON.stringify(mockError));
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Innermost quota message');
+    expect(parsed?.details).toHaveLength(1);
+    expect(parsed?.details[0]['@type']).toBe(
+      'type.googleapis.com/google.rpc.RetryInfo',
+    );
+  });
+
+  it('should parse the user-provided nested error string', () => {
+    const userErrorString =
+      '{"error":{"message":"{\\n  \\"error\\": {\\n    \\"code\\": 429,\\n    \\"message\\": \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s.\\",\\n    \\"status\\": \\"RESOURCE_EXHAUSTED\\",\\n    \\"details\\": [\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.DebugInfo\\",\\n        \\"detail\\": \\"[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\nPlease retry in 40.025771073s. [google.rpc.error_details_ext] { message: \\\\\\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 10000\\\\\\\\nPlease retry in 40.025771073s.\\\\\\" }\\"\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.QuotaFailure\\",\\n        \\"violations\\": [\\n          {\\n            \\"quotaMetric\\": \\"generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count\\",\\n            \\"quotaId\\": \\"GenerateContentPaidTierInputTokensPerModelPerMinute\\",\\n            \\"quotaDimensions\\": {\\n              \\"location\\": \\"global\\",\\n              \\"model\\": \\"gemini-2.5-pro\\"\\n            },\\n            \\"quotaValue\\": \\"10000\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.Help\\",\\n        \\"links\\": [\\n          {\\n            \\"description\\": \\"Learn more about Gemini API quotas\\",\\n            \\"url\\": \\"https://ai.google.dev/gemini-api/docs/rate-limits\\"\\n          }\\n        ]\\n      },\\n      {\\n        \\"@type\\": \\"type.googleapis.com/google.rpc.RetryInfo\\",\\n        \\"retryDelay\\": \\"40s\\"\\n      }\\n    ]\\n  }\\n}\\n","code":429,"status":"Too Many Requests"}}';
+
+    const parsed = parseGoogleApiError(userErrorString);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toContain('You exceeded your current quota');
+    expect(parsed?.details).toHaveLength(4);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+      ),
+    ).toBe(true);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+      ),
+    ).toBe(true);
+  });
+
+  it('should parse an error that is an array', () => {
+    const mockError = [
+      {
+        error: {
+          code: 429,
+          message: 'Quota exceeded',
+          details: [
+            {
+              '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+              violations: [{ subject: 'user', description: 'daily limit' }],
+            },
+          ],
+        },
+      },
+    ];
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse a gaxios error where data is an array', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: [
+          {
+            error: {
+              code: 429,
+              message: 'Quota exceeded',
+              details: [
+                {
+                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                  violations: [{ subject: 'user', description: 'daily limit' }],
+                },
+              ],
+            },
+          },
+        ],
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse a gaxios error where data is a stringified array', () => {
+    const mockError = {
+      response: {
+        status: 429,
+        data: JSON.stringify([
+          {
+            error: {
+              code: 429,
+              message: 'Quota exceeded',
+              details: [
+                {
+                  '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+                  violations: [{ subject: 'user', description: 'daily limit' }],
+                },
+              ],
+            },
+          },
+        ]),
+      },
+    };
+
+    const parsed = parseGoogleApiError(mockError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toBe('Quota exceeded');
+  });
+
+  it('should parse an error with a malformed @type key (returned by Gemini API)', () => {
+    const malformedError = {
+      name: 'API Error',
+      message: {
+        error: {
+          message:
+            '{\n  "error": {\n    "code": 429,\n    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 54.887755558s.",\n    "status": "RESOURCE_EXHAUSTED",\n    "details": [\n      {\n        " @type": "type.googleapis.com/google.rpc.DebugInfo",\n        "detail": "[ORIGINAL ERROR] generic::resource_exhausted: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\nPlease retry in 54.887755558s. [google.rpc.error_details_ext] { message: \\"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\\\\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\\\\nPlease retry in 54.887755558s.\\" }"\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.QuotaFailure",\n        "violations": [\n          {\n            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",\n            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",\n            "quotaDimensions": {\n              "location": "global",\n"model": "gemini-2.5-pro"\n            },\n            "quotaValue": "2"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.Help",\n        "links": [\n          {\n            "description": "Learn more about Gemini API quotas",\n            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"\n          }\n        ]\n      },\n      {\n" @type": "type.googleapis.com/google.rpc.RetryInfo",\n        "retryDelay": "54s"\n      }\n    ]\n  }\n}\n',
+          code: 429,
+          status: 'Too Many Requests',
+        },
+      },
+    };
+
+    const parsed = parseGoogleApiError(malformedError);
+    expect(parsed).not.toBeNull();
+    expect(parsed?.code).toBe(429);
+    expect(parsed?.message).toContain('You exceeded your current quota');
+    expect(parsed?.details).toHaveLength(4);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+      ),
+    ).toBe(true);
+    expect(
+      parsed?.details.some(
+        (d) => d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+      ),
+    ).toBe(true);
+  });
+});
diff --git a/packages/core/src/utils/googleErrors.ts b/packages/core/src/utils/googleErrors.ts
new file mode 100644
index 0000000000..d7c15ac0b6
--- /dev/null
+++ b/packages/core/src/utils/googleErrors.ts
@@ -0,0 +1,305 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @fileoverview
+ * This file contains types and functions for parsing structured Google API errors.
+ */
+
+/**
+ * Based on google/rpc/error_details.proto
+ */
+
+export interface ErrorInfo {
+  '@type': 'type.googleapis.com/google.rpc.ErrorInfo';
+  reason: string;
+  domain: string;
+  metadata: { [key: string]: string };
+}
+
+export interface RetryInfo {
+  '@type': 'type.googleapis.com/google.rpc.RetryInfo';
+  retryDelay: string; // e.g. "51820.638305887s"
+}
+
+export interface DebugInfo {
+  '@type': 'type.googleapis.com/google.rpc.DebugInfo';
+  stackEntries: string[];
+  detail: string;
+}
+
+export interface QuotaFailure {
+  '@type': 'type.googleapis.com/google.rpc.QuotaFailure';
+  violations: Array<{
+    subject?: string;
+    description?: string;
+    apiService?: string;
+    quotaMetric?: string;
+    quotaId?: string;
+    quotaDimensions?: { [key: string]: string };
+    quotaValue?: string | number;
+    futureQuotaValue?: number;
+  }>;
+}
+
+export interface PreconditionFailure {
+  '@type': 'type.googleapis.com/google.rpc.PreconditionFailure';
+  violations: Array<{
+    type: string;
+    subject: string;
+    description: string;
+  }>;
+}
+
+export interface LocalizedMessage {
+  '@type': 'type.googleapis.com/google.rpc.LocalizedMessage';
+  locale: string;
+  message: string;
+}
+
+export interface BadRequest {
+  '@type': 'type.googleapis.com/google.rpc.BadRequest';
+  fieldViolations: Array<{
+    field: string;
+    description: string;
+    reason?: string;
+    localizedMessage?: LocalizedMessage;
+  }>;
+}
+
+export interface RequestInfo {
+  '@type': 'type.googleapis.com/google.rpc.RequestInfo';
+  requestId: string;
+  servingData: string;
+}
+
+export interface ResourceInfo {
+  '@type': 'type.googleapis.com/google.rpc.ResourceInfo';
+  resourceType: string;
+  resourceName: string;
+  owner: string;
+  description: string;
+}
+
+export interface Help {
+  '@type': 'type.googleapis.com/google.rpc.Help';
+  links: Array<{
+    description: string;
+    url: string;
+  }>;
+}
+
+export type GoogleApiErrorDetail =
+  | ErrorInfo
+  | RetryInfo
+  | DebugInfo
+  | QuotaFailure
+  | PreconditionFailure
+  | BadRequest
+  | RequestInfo
+  | ResourceInfo
+  | Help
+  | LocalizedMessage;
+
+export interface GoogleApiError {
+  code: number;
+  message: string;
+  details: GoogleApiErrorDetail[];
+}
+
+type ErrorShape = {
+  message?: string;
+  details?: unknown[];
+  code?: number;
+};
+
+/**
+ * Parses an error object to check if it's a structured Google API error
+ * and extracts all details.
+ *
+ * This function can handle two formats:
+ * 1. Standard Google API errors where `details` is a top-level field.
+ * 2. Errors where the entire structured error object is stringified inside
+ *    the `message` field of a wrapper error.
+ *
+ * @param error The error object to inspect.
+ * @returns A GoogleApiError object if the error matches, otherwise null.
+ */
+export function parseGoogleApiError(error: unknown): GoogleApiError | null {
+  if (!error) {
+    return null;
+  }
+
+  let errorObj: unknown = error;
+
+  // If error is a string, try to parse it.
+  if (typeof errorObj === 'string') {
+    try {
+      errorObj = JSON.parse(errorObj);
+    } catch (_) {
+      // Not a JSON string, can't parse.
+      return null;
+    }
+  }
+
+  if (Array.isArray(errorObj) && errorObj.length > 0) {
+    errorObj = errorObj[0];
+  }
+
+  if (typeof errorObj !== 'object' || errorObj === null) {
+    return null;
+  }
+
+  let currentError: ErrorShape | undefined =
+    fromGaxiosError(errorObj) ?? fromApiError(errorObj);
+
+  let depth = 0;
+  const maxDepth = 10;
+  // Handle cases where the actual error object is stringified inside the message
+  // by drilling down until we find an error that doesn't have a stringified message.
+  while (
+    currentError &&
+    typeof currentError.message === 'string' &&
+    depth < maxDepth
+  ) {
+    try {
+      const parsedMessage = JSON.parse(
+        currentError.message.replace(/\u00A0/g, '').replace(/\n/g, ' '),
+      );
+      if (parsedMessage.error) {
+        currentError = parsedMessage.error;
+        depth++;
+      } else {
+        // The message is a JSON string, but not a nested error object.
+        break;
+      }
+    } catch (_error) {
+      // It wasn't a JSON string, so we've drilled down as far as we can.
+      break;
+    }
+  }
+
+  if (!currentError) {
+    return null;
+  }
+
+  const code = currentError.code;
+  const message = currentError.message;
+  const errorDetails = currentError.details;
+
+  if (Array.isArray(errorDetails) && code && message) {
+    const details: GoogleApiErrorDetail[] = [];
+    for (const detail of errorDetails) {
+      if (detail && typeof detail === 'object') {
+        const detailObj = detail as Record<string, unknown>;
+        const typeKey = Object.keys(detailObj).find(
+          (key) => key.trim() === '@type',
+        );
+        if (typeKey) {
+          if (typeKey !== '@type') {
+            detailObj['@type'] = detailObj[typeKey];
+            delete detailObj[typeKey];
+          }
+          // We can just cast it; the consumer will have to switch on @type
+          details.push(detailObj as unknown as GoogleApiErrorDetail);
+        }
+      }
+    }
+
+    if (details.length > 0) {
+      return {
+        code,
+        message,
+        details,
+      };
+    }
+  }
+
+  return null;
+}
+
+function fromGaxiosError(errorObj: object): ErrorShape | undefined {
+  const gaxiosError = errorObj as {
+    response?: {
+      status?: number;
+      data?:
+        | {
+            error?: ErrorShape;
+          }
+        | string;
+    };
+    error?: ErrorShape;
+    code?: number;
+  };
+
+  let outerError: ErrorShape | undefined;
+  if (gaxiosError.response?.data) {
+    let data = gaxiosError.response.data;
+
+    if (typeof data === 'string') {
+      try {
+        data = JSON.parse(data);
+      } catch (_) {
+        // Not a JSON string, can't parse.
+      }
+    }
+
+    if (Array.isArray(data) && data.length > 0) {
+      data = data[0];
+    }
+
+    if (typeof data === 'object' && data !== null) {
+      if ('error' in data) {
+        outerError = (data as { error: ErrorShape }).error;
+      }
+    }
+  }
+
+  if (!outerError) {
+    // If the gaxios structure isn't there, check for a top-level `error` property.
+    if (gaxiosError.error) {
+      outerError = gaxiosError.error;
+    } else {
+      return undefined;
+    }
+  }
+  return outerError;
+}
+
+function fromApiError(errorObj: object): ErrorShape | undefined {
+  const apiError = errorObj as {
+    message?:
+      | {
+          error?: ErrorShape;
+        }
+      | string;
+    code?: number;
+  };
+
+  let outerError: ErrorShape | undefined;
+  if (apiError.message) {
+    let data = apiError.message;
+
+    if (typeof data === 'string') {
+      try {
+        data = JSON.parse(data);
+      } catch (_) {
+        // Not a JSON string, can't parse.
+      }
+    }
+
+    if (Array.isArray(data) && data.length > 0) {
+      data = data[0];
+    }
+
+    if (typeof data === 'object' && data !== null) {
+      if ('error' in data) {
+        outerError = (data as { error: ErrorShape }).error;
+      }
+    }
+  }
+  return outerError;
+}
diff --git a/packages/core/src/utils/googleQuotaErrors.test.ts b/packages/core/src/utils/googleQuotaErrors.test.ts
new file mode 100644
index 0000000000..cc5e5de43a
--- /dev/null
+++ b/packages/core/src/utils/googleQuotaErrors.test.ts
@@ -0,0 +1,306 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import {
+  classifyGoogleError,
+  RetryableQuotaError,
+  TerminalQuotaError,
+} from './googleQuotaErrors.js';
+import * as errorParser from './googleErrors.js';
+import type { GoogleApiError } from './googleErrors.js';
+
+describe('classifyGoogleError', () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('should return original error if not a Google API error', () => {
+    const regularError = new Error('Something went wrong');
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(null);
+    const result = classifyGoogleError(regularError);
+    expect(result).toBe(regularError);
+  });
+
+  it('should return original error if code is not 429', () => {
+    const apiError: GoogleApiError = {
+      code: 500,
+      message: 'Server error',
+      details: [],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const originalError = new Error();
+    const result = classifyGoogleError(originalError);
+    expect(result).toBe(originalError);
+    expect(result).not.toBeInstanceOf(TerminalQuotaError);
+    expect(result).not.toBeInstanceOf(RetryableQuotaError);
+  });
+
+  it('should return TerminalQuotaError for daily quota violations in QuotaFailure', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'daily limit',
+              quotaId: 'RequestsPerDay-limit',
+            },
+          ],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+    expect((result as TerminalQuotaError).cause).toBe(apiError);
+  });
+
+  it('should return TerminalQuotaError for daily quota violations in ErrorInfo', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXCEEDED',
+          domain: 'googleapis.com',
+          metadata: {
+            quota_limit: 'RequestsPerDay_PerProject_PerUser',
+          },
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return TerminalQuotaError for long retry delays', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '301s', // > 5 minutes
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return RetryableQuotaError for short retry delays', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '45.123s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(45123);
+  });
+
+  it('should return RetryableQuotaError for per-minute quota violations in QuotaFailure', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'per minute limit',
+              quotaId: 'RequestsPerMinute-limit',
+            },
+          ],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
+  });
+
+  it('should return RetryableQuotaError for per-minute quota violations in ErrorInfo', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXCEEDED',
+          domain: 'googleapis.com',
+          metadata: {
+            quota_limit: 'RequestsPerMinute_PerProject_PerUser',
+          },
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(60000);
+  });
+
+  it('should return RetryableQuotaError for another short retry delay', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 2\nPlease retry in 56.185908122s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              quotaMetric:
+                'generativelanguage.googleapis.com/generate_content_free_tier_requests',
+              quotaId: 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier',
+              quotaDimensions: {
+                location: 'global',
+                model: 'gemini-2.5-pro',
+              },
+              quotaValue: '2',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.Help',
+          links: [
+            {
+              description: 'Learn more about Gemini API quotas',
+              url: 'https://ai.google.dev/gemini-api/docs/rate-limits',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '56s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBe(56000);
+  });
+
+  it('should return RetryableQuotaError for Cloud Code RATE_LIMIT_EXCEEDED with retry delay', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'RATE_LIMIT_EXCEEDED',
+          domain: 'cloudcode-pa.googleapis.com',
+          metadata: {
+            uiMessage: 'true',
+            model: 'gemini-2.5-pro',
+            quotaResetDelay: '539.477544ms',
+            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
+          },
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '0.539477544s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(RetryableQuotaError);
+    expect((result as RetryableQuotaError).retryDelayMs).toBeCloseTo(
+      539.477544,
+    );
+  });
+
+  it('should return TerminalQuotaError for Cloud Code QUOTA_EXHAUSTED', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message:
+        'You have exhausted your capacity on this model. Your quota will reset after 0s.',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.ErrorInfo',
+          reason: 'QUOTA_EXHAUSTED',
+          domain: 'cloudcode-pa.googleapis.com',
+          metadata: {
+            uiMessage: 'true',
+            model: 'gemini-2.5-pro',
+            quotaResetDelay: '539.477544ms',
+            quotaResetTimeStamp: '2025-10-20T19:14:08Z',
+          },
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '0.539477544s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should prioritize daily limit over retry info', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Quota exceeded',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.QuotaFailure',
+          violations: [
+            {
+              subject: 'user',
+              description: 'daily limit',
+              quotaId: 'RequestsPerDay-limit',
+            },
+          ],
+        },
+        {
+          '@type': 'type.googleapis.com/google.rpc.RetryInfo',
+          retryDelay: '10s',
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const result = classifyGoogleError(new Error());
+    expect(result).toBeInstanceOf(TerminalQuotaError);
+  });
+
+  it('should return original error for 429 without specific details', () => {
+    const apiError: GoogleApiError = {
+      code: 429,
+      message: 'Too many requests',
+      details: [
+        {
+          '@type': 'type.googleapis.com/google.rpc.DebugInfo',
+          detail: 'some debug info',
+          stackEntries: [],
+        },
+      ],
+    };
+    vi.spyOn(errorParser, 'parseGoogleApiError').mockReturnValue(apiError);
+    const originalError = new Error();
+    const result = classifyGoogleError(originalError);
+    expect(result).toBe(originalError);
+  });
+});
diff --git a/packages/core/src/utils/googleQuotaErrors.ts b/packages/core/src/utils/googleQuotaErrors.ts
new file mode 100644
index 0000000000..4de1a81710
--- /dev/null
+++ b/packages/core/src/utils/googleQuotaErrors.ts
@@ -0,0 +1,192 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  ErrorInfo,
+  GoogleApiError,
+  QuotaFailure,
+  RetryInfo,
+} from './googleErrors.js';
+import { parseGoogleApiError } from './googleErrors.js';
+
+/**
+ * A non-retryable error indicating a hard quota limit has been reached (e.g., daily limit).
+ */
+export class TerminalQuotaError extends Error {
+  constructor(
+    message: string,
+    override readonly cause: GoogleApiError,
+  ) {
+    super(message);
+    this.name = 'TerminalQuotaError';
+  }
+}
+
+/**
+ * A retryable error indicating a temporary quota issue (e.g., per-minute limit).
+ */
+export class RetryableQuotaError extends Error {
+  retryDelayMs: number;
+
+  constructor(
+    message: string,
+    override readonly cause: GoogleApiError,
+    retryDelaySeconds: number,
+  ) {
+    super(message);
+    this.name = 'RetryableQuotaError';
+    this.retryDelayMs = retryDelaySeconds * 1000;
+  }
+}
+
+/**
+ * Parses a duration string (e.g., "34.074824224s", "60s") and returns the time in seconds.
+ * @param duration The duration string to parse.
+ * @returns The duration in seconds, or null if parsing fails.
+ */
+function parseDurationInSeconds(duration: string): number | null {
+  if (!duration.endsWith('s')) {
+    return null;
+  }
+  const seconds = parseFloat(duration.slice(0, -1));
+  return isNaN(seconds) ? null : seconds;
+}
+
+/**
+ * Analyzes a caught error and classifies it as a specific quota-related error if applicable.
+ *
+ * It decides whether an error is a `TerminalQuotaError` or a `RetryableQuotaError` based on
+ * the following logic:
+ * - If the error indicates a daily limit, it's a `TerminalQuotaError`.
+ * - If the error suggests a retry delay of more than 2 minutes, it's a `TerminalQuotaError`.
+ * - If the error suggests a retry delay of 2 minutes or less, it's a `RetryableQuotaError`.
+ * - If the error indicates a per-minute limit, it's a `RetryableQuotaError`.
+ *
+ * @param error The error to classify.
+ * @returns A `TerminalQuotaError`, `RetryableQuotaError`, or the original `unknown` error.
+ */
+export function classifyGoogleError(error: unknown): unknown {
+  const googleApiError = parseGoogleApiError(error);
+
+  if (!googleApiError || googleApiError.code !== 429) {
+    return error; // Not a 429 error we can handle.
+  }
+
+  const quotaFailure = googleApiError.details.find(
+    (d): d is QuotaFailure =>
+      d['@type'] === 'type.googleapis.com/google.rpc.QuotaFailure',
+  );
+
+  const errorInfo = googleApiError.details.find(
+    (d): d is ErrorInfo =>
+      d['@type'] === 'type.googleapis.com/google.rpc.ErrorInfo',
+  );
+
+  const retryInfo = googleApiError.details.find(
+    (d): d is RetryInfo =>
+      d['@type'] === 'type.googleapis.com/google.rpc.RetryInfo',
+  );
+
+  // 1. Check for long-term limits in QuotaFailure or ErrorInfo
+  if (quotaFailure) {
+    for (const violation of quotaFailure.violations) {
+      const quotaId = violation.quotaId ?? '';
+      if (quotaId.includes('PerDay') || quotaId.includes('Daily')) {
+        return new TerminalQuotaError(
+          `${googleApiError.message}\nExpected quota reset within 24h.`,
+          googleApiError,
+        );
+      }
+    }
+  }
+
+  if (errorInfo) {
+    // New Cloud Code API quota handling
+    if (errorInfo.domain) {
+      const validDomains = [
+        'cloudcode-pa.googleapis.com',
+        'staging-cloudcode-pa.googleapis.com',
+        'autopush-cloudcode-pa.googleapis.com',
+      ];
+      if (validDomains.includes(errorInfo.domain)) {
+        if (errorInfo.reason === 'RATE_LIMIT_EXCEEDED') {
+          let delaySeconds = 10; // Default retry of 10s
+          if (retryInfo?.retryDelay) {
+            const parsedDelay = parseDurationInSeconds(retryInfo.retryDelay);
+            if (parsedDelay) {
+              delaySeconds = parsedDelay;
+            }
+          }
+          return new RetryableQuotaError(
+            `${googleApiError.message}`,
+            googleApiError,
+            delaySeconds,
+          );
+        }
+        if (errorInfo.reason === 'QUOTA_EXHAUSTED') {
+          return new TerminalQuotaError(
+            `${googleApiError.message}`,
+            googleApiError,
+          );
+        }
+      }
+    }
+
+    // Existing Cloud Code API quota handling
+    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
+    if (quotaLimit.includes('PerDay') || quotaLimit.includes('Daily')) {
+      return new TerminalQuotaError(
+        `${googleApiError.message}\nExpected quota reset within 24h.`,
+        googleApiError,
+      );
+    }
+  }
+
+  // 2. Check for long delays in RetryInfo
+  if (retryInfo?.retryDelay) {
+    const delaySeconds = parseDurationInSeconds(retryInfo.retryDelay);
+    if (delaySeconds) {
+      if (delaySeconds > 120) {
+        return new TerminalQuotaError(
+          `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
+          googleApiError,
+        );
+      }
+      // This is a retryable error with a specific delay.
+      return new RetryableQuotaError(
+        `${googleApiError.message}\nSuggested retry after ${retryInfo.retryDelay}.`,
+        googleApiError,
+        delaySeconds,
+      );
+    }
+  }
+
+  // 3. Check for short-term limits in QuotaFailure or ErrorInfo
+  if (quotaFailure) {
+    for (const violation of quotaFailure.violations) {
+      const quotaId = violation.quotaId ?? '';
+      if (quotaId.includes('PerMinute')) {
+        return new RetryableQuotaError(
+          `${googleApiError.message}\nSuggested retry after 60s.`,
+          googleApiError,
+          60,
+        );
+      }
+    }
+  }
+
+  if (errorInfo) {
+    const quotaLimit = errorInfo.metadata?.['quota_limit'] ?? '';
+    if (quotaLimit.includes('PerMinute')) {
+      return new RetryableQuotaError(
+        `${errorInfo.reason}\nSuggested retry after 60s.`,
+        googleApiError,
+        60,
+      );
+    }
+  }
+  return error; // Fallback to original error if no specific classification fits.
+}
diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts
index 6417e0db57..893e48b0f2 100644
--- a/packages/core/src/utils/quotaErrorDetection.ts
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -33,68 +33,3 @@ export function isStructuredError(error: unknown): error is StructuredError {
     typeof (error as StructuredError).message === 'string'
   );
 }
-
-export function isProQuotaExceededError(error: unknown): boolean {
-  // Check for Pro quota exceeded errors by looking for the specific pattern
-  // This will match patterns like:
-  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
-  // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'"
-  // We use string methods instead of regex to avoid ReDoS vulnerabilities
-
-  const checkMessage = (message: string): boolean =>
-    message.includes("Quota exceeded for quota metric 'Gemini") &&
-    message.includes("Pro Requests'");
-
-  if (typeof error === 'string') {
-    return checkMessage(error);
-  }
-
-  if (isStructuredError(error)) {
-    return checkMessage(error.message);
-  }
-
-  if (isApiError(error)) {
-    return checkMessage(error.error.message);
-  }
-
-  // Check if it's a Gaxios error with response data
-  if (error && typeof error === 'object' && 'response' in error) {
-    const gaxiosError = error as {
-      response?: {
-        data?: unknown;
-      };
-    };
-    if (gaxiosError.response && gaxiosError.response.data) {
-      if (typeof gaxiosError.response.data === 'string') {
-        return checkMessage(gaxiosError.response.data);
-      }
-      if (
-        typeof gaxiosError.response.data === 'object' &&
-        gaxiosError.response.data !== null &&
-        'error' in gaxiosError.response.data
-      ) {
-        const errorData = gaxiosError.response.data as {
-          error?: { message?: string };
-        };
-        return checkMessage(errorData.error?.message || '');
-      }
-    }
-  }
-  return false;
-}
-
-export function isGenericQuotaExceededError(error: unknown): boolean {
-  if (typeof error === 'string') {
-    return error.includes('Quota exceeded for quota metric');
-  }
-
-  if (isStructuredError(error)) {
-    return error.message.includes('Quota exceeded for quota metric');
-  }
-
-  if (isApiError(error)) {
-    return error.error.message.includes('Quota exceeded for quota metric');
-  }
-
-  return false;
-}
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index 13af50b475..e0297e8903 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -7,10 +7,15 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { ApiError } from '@google/genai';
+import { AuthType } from '../core/contentGenerator.js';
 import type { HttpError } from './retry.js';
 import { retryWithBackoff } from './retry.js';
 import { setSimulate429 } from './testUtils.js';
 import { debugLogger } from './debugLogger.js';
+import {
+  TerminalQuotaError,
+  RetryableQuotaError,
+} from './googleQuotaErrors.js';
 
 // Helper to create a mock function that fails a certain number of times
 const createFailingFunction = (
@@ -100,26 +105,26 @@ describe('retryWithBackoff', () => {
 
     // Expect it to fail with the error from the 5th attempt.
     await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
       vi.runAllTimersAsync(),
     ]);
 
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
   });
 
-  it('should default to 5 maxAttempts if options.maxAttempts is undefined', async () => {
-    // This function will fail more than 5 times to ensure all retries are used.
+  it('should default to 3 maxAttempts if options.maxAttempts is undefined', async () => {
+    // This function will fail more than 3 times to ensure all retries are used.
     const mockFn = createFailingFunction(10);
 
     const promise = retryWithBackoff(mockFn, { maxAttempts: undefined });
 
     // Expect it to fail with the error from the 5th attempt.
     await Promise.all([
-      expect(promise).rejects.toThrow('Simulated error attempt 5'),
+      expect(promise).rejects.toThrow('Simulated error attempt 3'),
       vi.runAllTimersAsync(),
     ]);
 
-    expect(mockFn).toHaveBeenCalledTimes(5);
+    expect(mockFn).toHaveBeenCalledTimes(3);
   });
 
   it('should not retry if shouldRetry returns false', async () => {
@@ -336,15 +341,13 @@ describe('retryWithBackoff', () => {
   });
 
   describe('Flash model fallback for OAuth users', () => {
-    it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
+    it('should trigger fallback for OAuth personal users on TerminalQuotaError', async () => {
       const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
 
       let fallbackOccurred = false;
       const mockFn = vi.fn().mockImplementation(async () => {
         if (!fallbackOccurred) {
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
+          throw new TerminalQuotaError('Daily limit reached', {} as any);
         }
         return 'success';
       });
@@ -352,154 +355,62 @@ describe('retryWithBackoff', () => {
       const promise = retryWithBackoff(mockFn, {
         maxAttempts: 3,
         initialDelayMs: 100,
-        onPersistent429: async (authType?: string) => {
+        onPersistent429: async (authType?: string, error?: unknown) => {
           fallbackOccurred = true;
-          return await fallbackCallback(authType);
+          return await fallbackCallback(authType, error);
         },
         authType: 'oauth-personal',
       });
 
-      // Advance all timers to complete retries
-      await vi.runAllTimersAsync();
-
-      // Should succeed after fallback
-      await expect(promise).resolves.toBe('success');
-
-      // Verify callback was called with correct auth type
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
-
-      // Should retry again after fallback
-      expect(mockFn).toHaveBeenCalledTimes(3); // 2 initial attempts + 1 after fallback
-    });
-
-    it('should NOT trigger fallback for API key users', async () => {
-      const fallbackCallback = vi.fn();
-
-      const mockFn = vi.fn(async () => {
-        const error: HttpError = new Error('Rate limit exceeded');
-        error.status = 429;
-        throw error;
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'gemini-api-key',
-      });
-
-      // Handle the promise properly to avoid unhandled rejections
-      const resultPromise = promise.catch((error) => error);
-      await vi.runAllTimersAsync();
-      const result = await resultPromise;
-
-      // Should fail after all retries without fallback
-      expect(result).toBeInstanceOf(Error);
-      expect(result.message).toBe('Rate limit exceeded');
-
-      // Callback should not be called for API key users
-      expect(fallbackCallback).not.toHaveBeenCalled();
-    });
-
-    it('should reset attempt counter and continue after successful fallback', async () => {
-      let fallbackCalled = false;
-      const fallbackCallback = vi.fn().mockImplementation(async () => {
-        fallbackCalled = true;
-        return 'gemini-2.5-flash';
-      });
-
-      const mockFn = vi.fn().mockImplementation(async () => {
-        if (!fallbackCalled) {
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
-        }
-        return 'success';
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'oauth-personal',
-      });
-
       await vi.runAllTimersAsync();
 
       await expect(promise).resolves.toBe('success');
-      expect(fallbackCallback).toHaveBeenCalledOnce();
-    });
-
-    it('should continue with original error if fallback is rejected', async () => {
-      const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
-
-      const mockFn = vi.fn(async () => {
-        const error: HttpError = new Error('Rate limit exceeded');
-        error.status = 429;
-        throw error;
-      });
-
-      const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 3,
-        initialDelayMs: 100,
-        onPersistent429: fallbackCallback,
-        authType: 'oauth-personal',
-      });
-
-      // Handle the promise properly to avoid unhandled rejections
-      const resultPromise = promise.catch((error) => error);
-      await vi.runAllTimersAsync();
-      const result = await resultPromise;
-
-      // Should fail with original error when fallback is rejected
-      expect(result).toBeInstanceOf(Error);
-      expect(result.message).toBe('Rate limit exceeded');
       expect(fallbackCallback).toHaveBeenCalledWith(
         'oauth-personal',
-        expect.any(Error),
+        expect.any(TerminalQuotaError),
       );
+      expect(mockFn).toHaveBeenCalledTimes(2);
     });
 
-    it('should handle mixed error types (only count consecutive 429s)', async () => {
-      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
-      let attempts = 0;
-      let fallbackOccurred = false;
-
+    it('should use retryDelayMs from RetryableQuotaError', async () => {
+      const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
       const mockFn = vi.fn().mockImplementation(async () => {
-        attempts++;
-        if (fallbackOccurred) {
-          return 'success';
-        }
-        if (attempts === 1) {
-          // First attempt: 500 error (resets consecutive count)
-          const error: HttpError = new Error('Server error');
-          error.status = 500;
-          throw error;
-        } else {
-          // Remaining attempts: 429 errors
-          const error: HttpError = new Error('Rate limit exceeded');
-          error.status = 429;
-          throw error;
-        }
+        throw new RetryableQuotaError('Per-minute limit', {} as any, 12.345);
       });
 
       const promise = retryWithBackoff(mockFn, {
-        maxAttempts: 5,
+        maxAttempts: 2,
         initialDelayMs: 100,
-        onPersistent429: async (authType?: string) => {
-          fallbackOccurred = true;
-          return await fallbackCallback(authType);
-        },
-        authType: 'oauth-personal',
       });
 
+      // Attach the rejection expectation *before* running timers
+      // eslint-disable-next-line vitest/valid-expect
+      const assertionPromise = expect(promise).rejects.toThrow();
       await vi.runAllTimersAsync();
+      await assertionPromise;
 
-      await expect(promise).resolves.toBe('success');
-
-      // Should trigger fallback after 2 consecutive 429s (attempts 2-3)
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+      expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 12345);
     });
+
+    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
+      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
+      async (authType) => {
+        const fallbackCallback = vi.fn();
+        const mockFn = vi.fn().mockImplementation(async () => {
+          throw new TerminalQuotaError('Daily limit reached', {} as any);
+        });
+
+        const promise = retryWithBackoff(mockFn, {
+          maxAttempts: 3,
+          onPersistent429: fallbackCallback,
+          authType,
+        });
+
+        await expect(promise).rejects.toThrow('Daily limit reached');
+        expect(fallbackCallback).not.toHaveBeenCalled();
+        expect(mockFn).toHaveBeenCalledTimes(1);
+      },
+    );
   });
   it('should abort the retry loop when the signal is aborted', async () => {
     const abortController = new AbortController();
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index 70afe42f5d..edb8f9bb85 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -8,9 +8,10 @@ import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import {
-  isProQuotaExceededError,
-  isGenericQuotaExceededError,
-} from './quotaErrorDetection.js';
+  classifyGoogleError,
+  RetryableQuotaError,
+  TerminalQuotaError,
+} from './googleQuotaErrors.js';
 import { delay, createAbortError } from './delay.js';
 import { debugLogger } from './debugLogger.js';
 
@@ -37,7 +38,7 @@ export interface RetryOptions {
 }
 
 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
-  maxAttempts: 5,
+  maxAttempts: 3,
   initialDelayMs: 5000,
   maxDelayMs: 30000, // 30 seconds
   shouldRetryOnError: defaultShouldRetry,
@@ -118,7 +119,6 @@ export async function retryWithBackoff<T>(
 
   let attempt = 0;
   let currentDelay = initialDelayMs;
-  let consecutive429Count = 0;
 
   while (attempt < maxAttempts) {
     if (signal?.aborted) {
@@ -145,94 +145,54 @@ export async function retryWithBackoff<T>(
         throw error;
       }
 
-      const errorStatus = getErrorStatus(error);
+      const classifiedError = classifyGoogleError(error);
 
-      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
-      if (
-        errorStatus === 429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
-        isProQuotaExceededError(error) &&
-        onPersistent429
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
+      if (classifiedError instanceof TerminalQuotaError) {
+        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+          try {
+            const fallbackModel = await onPersistent429(
+              authType,
+              classifiedError,
+            );
+            if (fallbackModel) {
+              attempt = 0; // Reset attempts and retry with the new model.
+              currentDelay = initialDelayMs;
+              continue;
+            }
+          } catch (fallbackError) {
+            debugLogger.warn('Fallback to Flash model failed:', fallbackError);
           }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
         }
+        throw classifiedError; // Throw if no fallback or fallback failed.
       }
 
-      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
-      if (
-        errorStatus === 429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE &&
-        !isProQuotaExceededError(error) &&
-        isGenericQuotaExceededError(error) &&
-        onPersistent429
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
+      if (classifiedError instanceof RetryableQuotaError) {
+        if (attempt >= maxAttempts) {
+          if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+            try {
+              const fallbackModel = await onPersistent429(
+                authType,
+                classifiedError,
+              );
+              if (fallbackModel) {
+                attempt = 0; // Reset attempts and retry with the new model.
+                currentDelay = initialDelayMs;
+                continue;
+              }
+            } catch (fallbackError) {
+              console.warn('Model fallback failed:', fallbackError);
+            }
           }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
+          throw classifiedError;
         }
+        console.warn(
+          `Attempt ${attempt} failed: ${classifiedError.message}. Retrying after ${classifiedError.retryDelayMs}ms...`,
+        );
+        await delay(classifiedError.retryDelayMs, signal);
+        continue;
       }
 
-      // Track consecutive 429 errors
-      if (errorStatus === 429) {
-        consecutive429Count++;
-      } else {
-        consecutive429Count = 0;
-      }
-
-      // If we have persistent 429s and a fallback callback for OAuth
-      if (
-        consecutive429Count >= 2 &&
-        onPersistent429 &&
-        authType === AuthType.LOGIN_WITH_GOOGLE
-      ) {
-        try {
-          const fallbackModel = await onPersistent429(authType, error);
-          if (fallbackModel !== false && fallbackModel !== null) {
-            // Reset attempt counter and try with new model
-            attempt = 0;
-            consecutive429Count = 0;
-            currentDelay = initialDelayMs;
-            // With the model updated, we continue to the next attempt
-            continue;
-          } else {
-            // Fallback handler returned null/false, meaning don't continue - stop retry process
-            throw error;
-          }
-        } catch (fallbackError) {
-          // If fallback fails, continue with original error
-          debugLogger.warn('Fallback to Flash model failed:', fallbackError);
-        }
-      }
-
-      // Check if we've exhausted retries or shouldn't retry
+      // Generic retry logic for other errors
       if (
         attempt >= maxAttempts ||
         !shouldRetryOnError(error as Error, retryFetchErrors)
@@ -240,31 +200,17 @@ export async function retryWithBackoff<T>(
         throw error;
       }
 
-      const { delayDurationMs, errorStatus: delayErrorStatus } =
-        getDelayDurationAndStatus(error);
+      const errorStatus = getErrorStatus(error);
+      logRetryAttempt(attempt, error, errorStatus);
 
-      if (delayDurationMs > 0) {
-        // Respect Retry-After header if present and parsed
-        debugLogger.warn(
-          `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
-          error,
-        );
-        await delay(delayDurationMs, signal);
-        // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time
-        currentDelay = initialDelayMs;
-      } else {
-        // Fall back to exponential backoff with jitter
-        logRetryAttempt(attempt, error, errorStatus);
-        // Add jitter: +/- 30% of currentDelay
-        const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
-        const delayWithJitter = Math.max(0, currentDelay + jitter);
-        await delay(delayWithJitter, signal);
-        currentDelay = Math.min(maxDelayMs, currentDelay * 2);
-      }
+      // Exponential backoff with jitter for non-quota errors
+      const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+      const delayWithJitter = Math.max(0, currentDelay + jitter);
+      await delay(delayWithJitter, signal);
+      currentDelay = Math.min(maxDelayMs, currentDelay * 2);
     }
   }
-  // This line should theoretically be unreachable due to the throw in the catch block.
-  // Added for type safety and to satisfy the compiler that a promise is always returned.
+
   throw new Error('Retry attempts exhausted');
 }
 
@@ -295,62 +241,6 @@ export function getErrorStatus(error: unknown): number | undefined {
   return undefined;
 }
 
-/**
- * Extracts the Retry-After delay from an error object's headers.
- * @param error The error object.
- * @returns The delay in milliseconds, or 0 if not found or invalid.
- */
-function getRetryAfterDelayMs(error: unknown): number {
-  if (typeof error === 'object' && error !== null) {
-    // Check for error.response.headers (common in axios errors)
-    if (
-      'response' in error &&
-      typeof (error as { response?: unknown }).response === 'object' &&
-      (error as { response?: unknown }).response !== null
-    ) {
-      const response = (error as { response: { headers?: unknown } }).response;
-      if (
-        'headers' in response &&
-        typeof response.headers === 'object' &&
-        response.headers !== null
-      ) {
-        const headers = response.headers as { 'retry-after'?: unknown };
-        const retryAfterHeader = headers['retry-after'];
-        if (typeof retryAfterHeader === 'string') {
-          const retryAfterSeconds = parseInt(retryAfterHeader, 10);
-          if (!isNaN(retryAfterSeconds)) {
-            return retryAfterSeconds * 1000;
-          }
-          // It might be an HTTP date
-          const retryAfterDate = new Date(retryAfterHeader);
-          if (!isNaN(retryAfterDate.getTime())) {
-            return Math.max(0, retryAfterDate.getTime() - Date.now());
-          }
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-/**
- * Determines the delay duration based on the error, prioritizing Retry-After header.
- * @param error The error object.
- * @returns An object containing the delay duration in milliseconds and the error status.
- */
-function getDelayDurationAndStatus(error: unknown): {
-  delayDurationMs: number;
-  errorStatus: number | undefined;
-} {
-  const errorStatus = getErrorStatus(error);
-  let delayDurationMs = 0;
-
-  if (errorStatus === 429) {
-    delayDurationMs = getRetryAfterDelayMs(error);
-  }
-  return { delayDurationMs, errorStatus };
-}
-
 /**
  * Logs a message for a retry attempt when using exponential backoff.
  * @param attempt The current attempt number.

From 4960c472571ac737a3e4287745ecfdae0ac5cb7b Mon Sep 17 00:00:00 2001
From: shishu314 <shishu_1998@yahoo.com>
Date: Fri, 24 Oct 2025 14:23:50 -0400
Subject: [PATCH 06/73] fix(infra) - Simplify cancel in progress and add
 permission to set status step (#11835)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com>
---
 .github/workflows/test_chained_e2e.yml | 6 ++++--
 .github/workflows/trigger_e2e.yml      | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test_chained_e2e.yml b/.github/workflows/test_chained_e2e.yml
index adb77ffa03..8ded1a7591 100644
--- a/.github/workflows/test_chained_e2e.yml
+++ b/.github/workflows/test_chained_e2e.yml
@@ -18,9 +18,9 @@ on:
         required: true
 
 concurrency:
-  group: '${{ github.workflow }}-${{ github.head_ref || github.ref }}'
+  group: '${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.ref }}'
   cancel-in-progress: |-
-    ${{ github.ref != 'refs/heads/main' && !startsWith(github.ref, 'refs/heads/release/') }}
+    ${{ github.event_name != 'push' && github.event_name != 'merge_group' }}
 
 permissions:
   contents: 'read'
@@ -99,6 +99,7 @@ jobs:
 
   set_pending_status:
     runs-on: 'gemini-cli-ubuntu-16-core'
+    permissions: 'write-all'
     if: "github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'"
     needs:
       - 'parse_run_context'
@@ -286,6 +287,7 @@ jobs:
 
   set_workflow_status:
     runs-on: 'gemini-cli-ubuntu-16-core'
+    permissions: 'write-all'
     if: "github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'"
     needs:
       - 'parse_run_context'
diff --git a/.github/workflows/trigger_e2e.yml b/.github/workflows/trigger_e2e.yml
index dd6079cee2..c8cfe5d744 100644
--- a/.github/workflows/trigger_e2e.yml
+++ b/.github/workflows/trigger_e2e.yml
@@ -15,7 +15,7 @@ jobs:
     steps:
       - name: 'Save Repo name'
         env:
-          # Replace with github.event.pull_request.base.repo.full_name when switched to listen on pull request events. This repo name does not contain the org which is needed for checkout.
+          # Replace with github.event.pull_request.head.repo.full_name when switched to listen on pull request events. This repo name does not contain the org which is needed for checkout.
           REPO_NAME: '${{ github.event.repository.name }}'
         run: |
           mkdir -p ./pr

From 31b7c010d028e0548d3b0756a7eeaa100b258368 Mon Sep 17 00:00:00 2001
From: cornmander <shikhman@google.com>
Date: Fri, 24 Oct 2025 14:25:54 -0400
Subject: [PATCH 07/73] Add regression tests for shell command parsing (#11962)

---
 integration-tests/run_shell_command.test.ts |   3 +-
 packages/core/src/utils/shell-utils.test.ts | 115 ++++++++++++++++++++
 2 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts
index c71f6239ed..472bbbccd5 100644
--- a/integration-tests/run_shell_command.test.ts
+++ b/integration-tests/run_shell_command.test.ts
@@ -427,7 +427,8 @@ describe('run_shell_command', () => {
     expect(failureLog!.toolRequest.success).toBe(false);
   });
 
-  it('should reject chained commands when only the first segment is allowlisted in non-interactive mode', async () => {
+  // TODO(#11966): Deflake this test and re-enable once the underlying race is resolved.
+  it.skip('should reject chained commands when only the first segment is allowlisted in non-interactive mode', async () => {
     const rig = new TestRig();
     await rig.setup(
       'should reject chained commands when only the first segment is allowlisted',
diff --git a/packages/core/src/utils/shell-utils.test.ts b/packages/core/src/utils/shell-utils.test.ts
index e2c80bc9a2..c178d20d6a 100644
--- a/packages/core/src/utils/shell-utils.test.ts
+++ b/packages/core/src/utils/shell-utils.test.ts
@@ -156,6 +156,121 @@ describe('isCommandAllowed', () => {
     );
   });
 
+  it('should block a command that redefines an allowed function to run an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed(
+      'echo () (curl google.com) ; echo Hello Wolrd',
+      config,
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block a multi-line function body that runs an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed(
+      `echo () {
+  curl google.com
+} ; echo ok`,
+      config,
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block a function keyword declaration that runs an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed(
+      'function echo { curl google.com; } ; echo hi',
+      config,
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block command substitution that invokes an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed('echo $(curl google.com)', config);
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block pipelines that invoke an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed('echo hi | curl google.com', config);
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block background jobs that invoke an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed('echo hi & curl google.com', config);
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block command substitution inside a here-document when the inner command is unlisted', () => {
+    config.getCoreTools = () => [
+      'run_shell_command(echo)',
+      'run_shell_command(cat)',
+    ];
+    const result = isCommandAllowed(
+      `cat <<EOF
+$(rm -rf /)
+EOF`,
+      config,
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "rm -rf /"`,
+    );
+  });
+
+  it('should block backtick substitution that invokes an unlisted command', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed('echo `curl google.com`', config);
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block process substitution using <() when the inner command is unlisted', () => {
+    config.getCoreTools = () => [
+      'run_shell_command(diff)',
+      'run_shell_command(echo)',
+    ];
+    const result = isCommandAllowed(
+      'diff <(curl google.com) <(echo safe)',
+      config,
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
+  it('should block process substitution using >() when the inner command is unlisted', () => {
+    config.getCoreTools = () => ['run_shell_command(echo)'];
+    const result = isCommandAllowed('echo "data" > >(curl google.com)', config);
+    expect(result.allowed).toBe(false);
+    expect(result.reason).toBe(
+      `Command(s) not in the allowed commands list. Disallowed commands: "curl google.com"`,
+    );
+  });
+
   describe('command substitution', () => {
     it('should allow command substitution using `$(...)`', () => {
       const result = isCommandAllowed('echo $(goodCommand --safe)', config);

From ca94dabd4f84bcf2399a7b90799fe6c89491f6d9 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@google.com>
Date: Fri, 24 Oct 2025 11:42:49 -0700
Subject: [PATCH 08/73] Fix(cli): Use cross-platform path separators in
 extension tests (#11970)

---
 packages/cli/src/config/extension.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index e616246cce..3243aff0d5 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -293,8 +293,8 @@ describe('extension tests', () => {
         mcpServers: {
           'test-server': {
             command: 'node',
-            args: ['${extensionPath}/server/index.js'],
-            cwd: '${extensionPath}/server',
+            args: ['${extensionPath}${/}server${/}index.js'],
+            cwd: '${extensionPath}${/}server',
           },
         },
       });

From 63a90836fe6a9a2539dade85f303ab461bf82cf6 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Fri, 24 Oct 2025 11:55:31 -0700
Subject: [PATCH 09/73] fix linked extension test on windows (#11973)

---
 packages/cli/src/config/extension.test.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index 3243aff0d5..7f0e4e2f02 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -309,6 +309,9 @@ describe('extension tests', () => {
       expect(extensions[0].mcpServers?.['test-server'].cwd).toBe(
         path.join(sourceExtDir, 'server'),
       );
+      expect(extensions[0].mcpServers?.['test-server'].args).toEqual([
+        path.join(sourceExtDir, 'server', 'index.js'),
+      ]);
     });
 
     it('should resolve environment variables in extension configuration', () => {

From b188a51c32322f7167943ebe06023c0bc11dd4fa Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Fri, 24 Oct 2025 13:04:40 -0700
Subject: [PATCH 10/73] feat(core): Introduce message bus for tool execution
 confirmation (#11544)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 integration-tests/run_shell_command.test.ts   |  4 +-
 packages/core/src/core/coreToolScheduler.ts   | 58 +++++++++++--------
 .../src/core/nonInteractiveToolExecutor.ts    | 10 ++--
 packages/core/src/tools/edit.ts               | 45 +++++++++++---
 packages/core/src/tools/mcp-tool.ts           |  4 ++
 packages/core/src/tools/memoryTool.ts         | 34 +++++++++--
 packages/core/src/tools/shell.ts              | 20 +++++--
 packages/core/src/tools/smart-edit.ts         | 36 +++++++++---
 packages/core/src/tools/tool-registry.ts      |  4 ++
 packages/core/src/tools/tools.ts              | 46 +++++++++------
 packages/core/src/tools/web-fetch.test.ts     |  4 +-
 packages/core/src/tools/web-fetch.ts          | 16 +----
 packages/core/src/tools/write-file.ts         | 24 ++++++--
 packages/core/src/tools/write-todos.ts        |  4 ++
 packages/core/src/utils/errors.ts             |  7 +++
 15 files changed, 224 insertions(+), 92 deletions(-)

diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts
index 472bbbccd5..d643437eac 100644
--- a/integration-tests/run_shell_command.test.ts
+++ b/integration-tests/run_shell_command.test.ts
@@ -144,7 +144,7 @@ describe('run_shell_command', () => {
     validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
   });
 
-  it('should run allowed sub-command in non-interactive mode', async () => {
+  it.skip('should run allowed sub-command in non-interactive mode', async () => {
     const rig = new TestRig();
     await rig.setup('should run allowed sub-command in non-interactive mode');
 
@@ -262,7 +262,7 @@ describe('run_shell_command', () => {
     expect(toolCall.toolRequest.success).toBe(true);
   });
 
-  it('should work with ShellTool alias', async () => {
+  it.skip('should work with ShellTool alias', async () => {
     const rig = new TestRig();
     await rig.setup('should work with ShellTool alias');
 
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index 6c76f4aa5c..5c1cb58fb7 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -46,6 +46,7 @@ import levenshtein from 'fast-levenshtein';
 import { ShellToolInvocation } from '../tools/shell.js';
 import type { ToolConfirmationRequest } from '../confirmation-bus/types.js';
 import { MessageBusType } from '../confirmation-bus/types.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 
 export type ValidatingToolCall = {
   status: 'validating';
@@ -331,6 +332,13 @@ interface CoreToolSchedulerOptions {
 }
 
 export class CoreToolScheduler {
+  // Static WeakMap to track which MessageBus instances already have a handler subscribed
+  // This prevents duplicate subscriptions when multiple CoreToolScheduler instances are created
+  private static subscribedMessageBuses = new WeakMap<
+    MessageBus,
+    (request: ToolConfirmationRequest) => void
+  >();
+
   private toolCalls: ToolCall[] = [];
   private outputUpdateHandler?: OutputUpdateHandler;
   private onAllToolCallsComplete?: AllToolCallsCompleteHandler;
@@ -356,12 +364,34 @@ export class CoreToolScheduler {
     this.onEditorClose = options.onEditorClose;
 
     // Subscribe to message bus for ASK_USER policy decisions
+    // Use a static WeakMap to ensure we only subscribe ONCE per MessageBus instance
+    // This prevents memory leaks when multiple CoreToolScheduler instances are created
+    // (e.g., on every React render, or for each non-interactive tool call)
     if (this.config.getEnableMessageBusIntegration()) {
       const messageBus = this.config.getMessageBus();
-      messageBus.subscribe(
-        MessageBusType.TOOL_CONFIRMATION_REQUEST,
-        this.handleToolConfirmationRequest.bind(this),
-      );
+
+      // Check if we've already subscribed a handler to this message bus
+      if (!CoreToolScheduler.subscribedMessageBuses.has(messageBus)) {
+        // Create a shared handler that will be used for this message bus
+        const sharedHandler = (request: ToolConfirmationRequest) => {
+          // When ASK_USER policy decision is made, respond with requiresUserConfirmation=true
+          // to tell tools to use their legacy confirmation flow
+          messageBus.publish({
+            type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
+            correlationId: request.correlationId,
+            confirmed: false,
+            requiresUserConfirmation: true,
+          });
+        };
+
+        messageBus.subscribe(
+          MessageBusType.TOOL_CONFIRMATION_REQUEST,
+          sharedHandler,
+        );
+
+        // Store the handler in the WeakMap so we don't subscribe again
+        CoreToolScheduler.subscribedMessageBuses.set(messageBus, sharedHandler);
+      }
     }
   }
 
@@ -1170,26 +1200,6 @@ export class CoreToolScheduler {
     });
   }
 
-  /**
-   * Handle tool confirmation requests from the message bus when policy decision is ASK_USER.
-   * This publishes a response with requiresUserConfirmation=true to signal the tool
-   * that it should fall back to its legacy confirmation UI.
-   */
-  private handleToolConfirmationRequest(
-    request: ToolConfirmationRequest,
-  ): void {
-    // When ASK_USER policy decision is made, the message bus emits the request here.
-    // We respond with requiresUserConfirmation=true to tell the tool to use its
-    // legacy confirmation flow (which will show diffs, URLs, etc in the UI).
-    const messageBus = this.config.getMessageBus();
-    messageBus.publish({
-      type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
-      correlationId: request.correlationId,
-      confirmed: false, // Not auto-approved
-      requiresUserConfirmation: true, // Use legacy UI confirmation
-    });
-  }
-
   private isAutoApproved(toolCall: ValidatingToolCall): boolean {
     if (this.config.getApprovalMode() === ApprovalMode.YOLO) {
       return true;
diff --git a/packages/core/src/core/nonInteractiveToolExecutor.ts b/packages/core/src/core/nonInteractiveToolExecutor.ts
index e10988cfa6..52100e6ea0 100644
--- a/packages/core/src/core/nonInteractiveToolExecutor.ts
+++ b/packages/core/src/core/nonInteractiveToolExecutor.ts
@@ -19,15 +19,17 @@ export async function executeToolCall(
   abortSignal: AbortSignal,
 ): Promise<CompletedToolCall> {
   return new Promise<CompletedToolCall>((resolve, reject) => {
-    new CoreToolScheduler({
+    const scheduler = new CoreToolScheduler({
       config,
       getPreferredEditor: () => undefined,
       onEditorClose: () => {},
       onAllToolCallsComplete: async (completedToolCalls) => {
         resolve(completedToolCalls[0]);
       },
-    })
-      .schedule(toolCallRequest, abortSignal)
-      .catch(reject);
+    });
+
+    scheduler.schedule(toolCallRequest, abortSignal).catch((error) => {
+      reject(error);
+    });
   });
 }
diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts
index 40b58145f1..749dffe813 100644
--- a/packages/core/src/tools/edit.ts
+++ b/packages/core/src/tools/edit.ts
@@ -14,7 +14,13 @@ import type {
   ToolLocation,
   ToolResult,
 } from './tools.js';
-import { BaseDeclarativeTool, Kind, ToolConfirmationOutcome } from './tools.js';
+import {
+  BaseDeclarativeTool,
+  BaseToolInvocation,
+  Kind,
+  ToolConfirmationOutcome,
+} from './tools.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { ToolErrorType } from './tool-error.js';
 import { makeRelative, shortenPath } from '../utils/paths.js';
 import { isNodeError } from '../utils/errors.js';
@@ -102,13 +108,21 @@ interface CalculatedEdit {
   isNewFile: boolean;
 }
 
-class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
+class EditToolInvocation
+  extends BaseToolInvocation<EditToolParams, ToolResult>
+  implements ToolInvocation<EditToolParams, ToolResult>
+{
   constructor(
     private readonly config: Config,
-    public params: EditToolParams,
-  ) {}
+    params: EditToolParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
+  ) {
+    super(params, messageBus, toolName, displayName);
+  }
 
-  toolLocations(): ToolLocation[] {
+  override toolLocations(): ToolLocation[] {
     return [{ path: this.params.file_path }];
   }
 
@@ -241,7 +255,7 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
    * Handles the confirmation prompt for the Edit tool in the CLI.
    * It needs to calculate the diff to show the user.
    */
-  async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
     if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
@@ -467,7 +481,10 @@ export class EditTool
 {
   static readonly Name = EDIT_TOOL_NAME;
 
-  constructor(private readonly config: Config) {
+  constructor(
+    private readonly config: Config,
+    messageBus?: MessageBus,
+  ) {
     super(
       EditTool.Name,
       'Edit',
@@ -510,6 +527,9 @@ Expectation for required parameters:
         required: ['file_path', 'old_string', 'new_string'],
         type: 'object',
       },
+      true, // isOutputMarkdown
+      false, // canUpdateOutput
+      messageBus,
     );
   }
 
@@ -540,8 +560,17 @@ Expectation for required parameters:
 
   protected createInvocation(
     params: EditToolParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
   ): ToolInvocation<EditToolParams, ToolResult> {
-    return new EditToolInvocation(this.config, params);
+    return new EditToolInvocation(
+      this.config,
+      params,
+      messageBus ?? this.messageBus,
+      toolName ?? this.name,
+      displayName ?? this.displayName,
+    );
   }
 
   getModifyContext(_: AbortSignal): ModifyContext<EditToolParams> {
diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts
index d6d71ad600..822a41f24f 100644
--- a/packages/core/src/tools/mcp-tool.ts
+++ b/packages/core/src/tools/mcp-tool.ts
@@ -20,6 +20,7 @@ import {
 import type { CallableTool, FunctionCall, Part } from '@google/genai';
 import { ToolErrorType } from './tool-error.js';
 import type { Config } from '../config/config.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 
 type ToolParams = Record<string, unknown>;
 
@@ -244,6 +245,9 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool<
 
   protected createInvocation(
     params: ToolParams,
+    _messageBus?: MessageBus,
+    _toolName?: string,
+    _displayName?: string,
   ): ToolInvocation<ToolParams, ToolResult> {
     return new DiscoveredMCPToolInvocation(
       this.mcpTool,
diff --git a/packages/core/src/tools/memoryTool.ts b/packages/core/src/tools/memoryTool.ts
index 05b6c886d8..bdd2656e5b 100644
--- a/packages/core/src/tools/memoryTool.ts
+++ b/packages/core/src/tools/memoryTool.ts
@@ -24,6 +24,7 @@ import type {
 } from './modifiable-tool.js';
 import { ToolErrorType } from './tool-error.js';
 import { MEMORY_TOOL_NAME } from './tool-names.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 
 const memoryToolSchemaData: FunctionDeclaration = {
   name: MEMORY_TOOL_NAME,
@@ -58,8 +59,7 @@ Do NOT use this tool:
 
 ## Parameters
 
-- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".
-`;
+- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".`;
 
 export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md';
 export const MEMORY_SECTION_HEADER = '## Gemini Added Memories';
@@ -177,12 +177,21 @@ class MemoryToolInvocation extends BaseToolInvocation<
 > {
   private static readonly allowlist: Set<string> = new Set();
 
+  constructor(
+    params: SaveMemoryParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
+  ) {
+    super(params, messageBus, toolName, displayName);
+  }
+
   getDescription(): string {
     const memoryFilePath = getGlobalMemoryFilePath();
     return `in ${tildeifyPath(memoryFilePath)}`;
   }
 
-  override async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     _abortSignal: AbortSignal,
   ): Promise<ToolEditConfirmationDetails | false> {
     const memoryFilePath = getGlobalMemoryFilePath();
@@ -291,13 +300,16 @@ export class MemoryTool
 {
   static readonly Name = MEMORY_TOOL_NAME;
 
-  constructor() {
+  constructor(messageBus?: MessageBus) {
     super(
       MemoryTool.Name,
       'Save Memory',
       memoryToolDescription,
       Kind.Think,
       memoryToolSchemaData.parametersJsonSchema as Record<string, unknown>,
+      true,
+      false,
+      messageBus,
     );
   }
 
@@ -311,8 +323,18 @@ export class MemoryTool
     return null;
   }
 
-  protected createInvocation(params: SaveMemoryParams) {
-    return new MemoryToolInvocation(params);
+  protected createInvocation(
+    params: SaveMemoryParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
+  ) {
+    return new MemoryToolInvocation(
+      params,
+      messageBus ?? this.messageBus,
+      toolName ?? this.name,
+      displayName ?? this.displayName,
+    );
   }
 
   static async performAddMemoryEntry(
diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts
index ed7269cec7..ba67c8adcf 100644
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -41,6 +41,7 @@ import {
   stripShellWrapper,
 } from '../utils/shell-utils.js';
 import { SHELL_TOOL_NAME } from './tool-names.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 
 export const OUTPUT_UPDATE_INTERVAL_MS = 1000;
 
@@ -58,8 +59,9 @@ export class ShellToolInvocation extends BaseToolInvocation<
     private readonly config: Config,
     params: ShellToolParams,
     private readonly allowlist: Set<string>,
+    messageBus?: MessageBus,
   ) {
-    super(params);
+    super(params, messageBus);
   }
 
   getDescription(): string {
@@ -76,7 +78,7 @@ export class ShellToolInvocation extends BaseToolInvocation<
     return description;
   }
 
-  override async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     _abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
     const command = stripShellWrapper(this.params.command);
@@ -372,7 +374,10 @@ export class ShellTool extends BaseDeclarativeTool<
 
   private allowlist: Set<string> = new Set();
 
-  constructor(private readonly config: Config) {
+  constructor(
+    private readonly config: Config,
+    messageBus?: MessageBus,
+  ) {
     void initializeShellParsers().catch(() => {
       // Errors are surfaced when parsing commands.
     });
@@ -403,6 +408,7 @@ export class ShellTool extends BaseDeclarativeTool<
       },
       false, // output is not markdown
       true, // output can be updated
+      messageBus,
     );
   }
 
@@ -444,7 +450,13 @@ export class ShellTool extends BaseDeclarativeTool<
 
   protected createInvocation(
     params: ShellToolParams,
+    messageBus?: MessageBus,
   ): ToolInvocation<ShellToolParams, ToolResult> {
-    return new ShellToolInvocation(this.config, params, this.allowlist);
+    return new ShellToolInvocation(
+      this.config,
+      params,
+      this.allowlist,
+      messageBus,
+    );
   }
 }
diff --git a/packages/core/src/tools/smart-edit.ts b/packages/core/src/tools/smart-edit.ts
index 113263ac0f..8c826292a8 100644
--- a/packages/core/src/tools/smart-edit.ts
+++ b/packages/core/src/tools/smart-edit.ts
@@ -10,6 +10,7 @@ import * as crypto from 'node:crypto';
 import * as Diff from 'diff';
 import {
   BaseDeclarativeTool,
+  BaseToolInvocation,
   Kind,
   type ToolCallConfirmationDetails,
   ToolConfirmationOutcome,
@@ -19,6 +20,7 @@ import {
   type ToolResult,
   type ToolResultDisplay,
 } from './tools.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { ToolErrorType } from './tool-error.js';
 import { makeRelative, shortenPath } from '../utils/paths.js';
 import { isNodeError } from '../utils/errors.js';
@@ -369,13 +371,21 @@ interface CalculatedEdit {
   originalLineEnding: '\r\n' | '\n';
 }
 
-class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
+class EditToolInvocation
+  extends BaseToolInvocation<EditToolParams, ToolResult>
+  implements ToolInvocation<EditToolParams, ToolResult>
+{
   constructor(
     private readonly config: Config,
-    public params: EditToolParams,
-  ) {}
+    params: EditToolParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
+  ) {
+    super(params, messageBus, toolName, displayName);
+  }
 
-  toolLocations(): ToolLocation[] {
+  override toolLocations(): ToolLocation[] {
     return [{ path: this.params.file_path }];
   }
 
@@ -602,7 +612,7 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
    * Handles the confirmation prompt for the Edit tool in the CLI.
    * It needs to calculate the diff to show the user.
    */
-  async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
     if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
@@ -818,7 +828,10 @@ export class SmartEditTool
 {
   static readonly Name = EDIT_TOOL_NAME;
 
-  constructor(private readonly config: Config) {
+  constructor(
+    private readonly config: Config,
+    messageBus?: MessageBus,
+  ) {
     super(
       SmartEditTool.Name,
       'Edit',
@@ -875,6 +888,9 @@ A good instruction should concisely answer:
         required: ['file_path', 'instruction', 'old_string', 'new_string'],
         type: 'object',
       },
+      true, // isOutputMarkdown
+      false, // canUpdateOutput
+      messageBus,
     );
   }
 
@@ -914,7 +930,13 @@ A good instruction should concisely answer:
   protected createInvocation(
     params: EditToolParams,
   ): ToolInvocation<EditToolParams, ToolResult> {
-    return new EditToolInvocation(this.config, params);
+    return new EditToolInvocation(
+      this.config,
+      params,
+      this.messageBus,
+      this.name,
+      this.displayName,
+    );
   }
 
   getModifyContext(_: AbortSignal): ModifyContext<EditToolParams> {
diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts
index efd647c2bf..f24365913e 100644
--- a/packages/core/src/tools/tool-registry.ts
+++ b/packages/core/src/tools/tool-registry.ts
@@ -21,6 +21,7 @@ import { parse } from 'shell-quote';
 import { ToolErrorType } from './tool-error.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import type { EventEmitter } from 'node:events';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { debugLogger } from '../utils/debugLogger.js';
 
 type ToolParams = Record<string, unknown>;
@@ -162,6 +163,9 @@ Signal: Signal number or \`(none)\` if no signal was received.
 
   protected createInvocation(
     params: ToolParams,
+    _messageBus?: MessageBus,
+    _toolName?: string,
+    _displayName?: string,
   ): ToolInvocation<ToolParams, ToolResult> {
     return new DiscoveredToolInvocation(this.config, this.name, params);
   }
diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts
index 1f4f3db3da..4ea20de673 100644
--- a/packages/core/src/tools/tools.ts
+++ b/packages/core/src/tools/tools.ts
@@ -104,25 +104,37 @@ export abstract class BaseToolInvocation<
       }
 
       if (decision === 'ASK_USER') {
-        const confirmationDetails: ToolCallConfirmationDetails = {
-          type: 'info',
-          title: `Confirm: ${this._toolDisplayName || this._toolName}`,
-          prompt: this.getDescription(),
-          onConfirm: async (outcome: ToolConfirmationOutcome) => {
-            if (outcome === ToolConfirmationOutcome.ProceedAlways) {
-              if (this.messageBus && this._toolName) {
-                this.messageBus.publish({
-                  type: MessageBusType.UPDATE_POLICY,
-                  toolName: this._toolName,
-                });
-              }
-            }
-          },
-        };
-        return confirmationDetails;
+        return this.getConfirmationDetails(abortSignal);
       }
     }
-    return false;
+    // When no message bus, use default confirmation flow
+    return this.getConfirmationDetails(abortSignal);
+  }
+
+  /**
+   * Subclasses should override this method to provide custom confirmation UI
+   * when the policy engine's decision is 'ASK_USER'.
+   * The base implementation provides a generic confirmation prompt.
+   */
+  protected async getConfirmationDetails(
+    _abortSignal: AbortSignal,
+  ): Promise<ToolCallConfirmationDetails | false> {
+    const confirmationDetails: ToolCallConfirmationDetails = {
+      type: 'info',
+      title: `Confirm: ${this._toolDisplayName || this._toolName}`,
+      prompt: this.getDescription(),
+      onConfirm: async (outcome: ToolConfirmationOutcome) => {
+        if (outcome === ToolConfirmationOutcome.ProceedAlways) {
+          if (this.messageBus && this._toolName) {
+            this.messageBus.publish({
+              type: MessageBusType.UPDATE_POLICY,
+              toolName: this._toolName,
+            });
+          }
+        }
+      },
+    };
+    return confirmationDetails;
   }
 
   protected getMessageBusDecision(
diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts
index 69adeb23ac..f8d9d1cfe8 100644
--- a/packages/core/src/tools/web-fetch.test.ts
+++ b/packages/core/src/tools/web-fetch.test.ts
@@ -521,7 +521,7 @@ describe('WebFetchTool', () => {
 
       // Should reject with error when denied
       await expect(confirmationPromise).rejects.toThrow(
-        'Tool execution denied by policy',
+        'Tool execution for "WebFetch" denied by policy.',
       );
     });
 
@@ -559,7 +559,7 @@ describe('WebFetchTool', () => {
       abortController.abort();
 
       await expect(confirmationPromise).rejects.toThrow(
-        'Tool execution denied by policy.',
+        'Tool execution for "WebFetch" denied by policy.',
       );
     });
 
diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts
index 3e6c529f95..c914885af9 100644
--- a/packages/core/src/tools/web-fetch.ts
+++ b/packages/core/src/tools/web-fetch.ts
@@ -205,21 +205,9 @@ ${textContent}
     return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
   }
 
-  override async shouldConfirmExecute(
-    abortSignal: AbortSignal,
+  protected override async getConfirmationDetails(
+    _abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
-    // Try message bus confirmation first if available
-    if (this.messageBus) {
-      const decision = await this.getMessageBusDecision(abortSignal);
-      if (decision === 'ALLOW') {
-        return false; // No confirmation needed
-      }
-      if (decision === 'DENY') {
-        throw new Error('Tool execution denied by policy.');
-      }
-      // if 'ASK_USER', fall through to legacy logic
-    }
-
     // Legacy confirmation flow (no message bus OR policy decision was ASK_USER)
     if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
       return false;
diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts
index d18e2b6939..c22165dbb0 100644
--- a/packages/core/src/tools/write-file.ts
+++ b/packages/core/src/tools/write-file.ts
@@ -42,6 +42,7 @@ import { FileOperationEvent } from '../telemetry/types.js';
 import { FileOperation } from '../telemetry/metrics.js';
 import { getSpecificMimeType } from '../utils/fileUtils.js';
 import { getLanguageFromFilePath } from '../utils/language-detection.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 
 /**
  * Parameters for the WriteFile tool
@@ -144,8 +145,11 @@ class WriteFileToolInvocation extends BaseToolInvocation<
   constructor(
     private readonly config: Config,
     params: WriteFileToolParams,
+    messageBus?: MessageBus,
+    toolName?: string,
+    displayName?: string,
   ) {
-    super(params);
+    super(params, messageBus, toolName, displayName);
   }
 
   override toolLocations(): ToolLocation[] {
@@ -160,7 +164,7 @@ class WriteFileToolInvocation extends BaseToolInvocation<
     return `Writing to ${shortenPath(relativePath)}`;
   }
 
-  override async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
     if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
@@ -392,7 +396,10 @@ export class WriteFileTool
 {
   static readonly Name = WRITE_FILE_TOOL_NAME;
 
-  constructor(private readonly config: Config) {
+  constructor(
+    private readonly config: Config,
+    messageBus?: MessageBus,
+  ) {
     super(
       WriteFileTool.Name,
       'WriteFile',
@@ -415,6 +422,9 @@ export class WriteFileTool
         required: ['file_path', 'content'],
         type: 'object',
       },
+      true,
+      false,
+      messageBus,
     );
   }
 
@@ -458,7 +468,13 @@ export class WriteFileTool
   protected createInvocation(
     params: WriteFileToolParams,
   ): ToolInvocation<WriteFileToolParams, ToolResult> {
-    return new WriteFileToolInvocation(this.config, params);
+    return new WriteFileToolInvocation(
+      this.config,
+      params,
+      this.messageBus,
+      this.name,
+      this.displayName,
+    );
   }
 
   getModifyContext(
diff --git a/packages/core/src/tools/write-todos.ts b/packages/core/src/tools/write-todos.ts
index 896861613d..8f80904c85 100644
--- a/packages/core/src/tools/write-todos.ts
+++ b/packages/core/src/tools/write-todos.ts
@@ -12,6 +12,7 @@ import {
   type Todo,
   type ToolResult,
 } from './tools.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { WRITE_TODOS_TOOL_NAME } from './tool-names.js';
 
 const TODO_STATUSES = [
@@ -204,6 +205,9 @@ export class WriteTodosTool extends BaseDeclarativeTool<
 
   protected createInvocation(
     params: WriteTodosToolParams,
+    _messageBus?: MessageBus,
+    _toolName?: string,
+    _displayName?: string,
   ): ToolInvocation<WriteTodosToolParams, ToolResult> {
     return new WriteTodosToolInvocation(params);
   }
diff --git a/packages/core/src/utils/errors.ts b/packages/core/src/utils/errors.ts
index 030910ce88..fa5d8bf6d3 100644
--- a/packages/core/src/utils/errors.ts
+++ b/packages/core/src/utils/errors.ts
@@ -70,6 +70,13 @@ export class FatalCancellationError extends FatalError {
   }
 }
 
+export class CanceledError extends Error {
+  constructor(message = 'The operation was canceled.') {
+    super(message);
+    this.name = 'CanceledError';
+  }
+}
+
 export class ForbiddenError extends Error {}
 export class UnauthorizedError extends Error {}
 export class BadRequestError extends Error {}

From 40057b55f0c725458b4f3291e85985fcf1716bd8 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@google.com>
Date: Fri, 24 Oct 2025 13:20:17 -0700
Subject: [PATCH 11/73] fix(cli): Use correct defaults for file filtering
 (#11426)

---
 packages/cli/src/config/config.test.ts | 14 ++++++++++++++
 packages/cli/src/config/config.ts      | 10 ++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index b935d4a696..6b36235be4 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -8,6 +8,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import * as os from 'node:os';
 import * as path from 'node:path';
 import {
+  DEFAULT_FILE_FILTERING_OPTIONS,
   DEFAULT_GEMINI_MODEL,
   DEFAULT_GEMINI_MODEL_AUTO,
   OutputFormat,
@@ -583,6 +584,19 @@ describe('loadCliConfig', () => {
       });
     });
   });
+
+  it('should use default fileFilter options when unconfigured', async () => {
+    process.argv = ['node', 'script.js'];
+    const argv = await parseArguments({} as Settings);
+    const settings: Settings = {};
+    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    expect(config.getFileFilteringRespectGitIgnore()).toBe(
+      DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore,
+    );
+    expect(config.getFileFilteringRespectGeminiIgnore()).toBe(
+      DEFAULT_FILE_FILTERING_OPTIONS.respectGeminiIgnore,
+    );
+  });
 });
 
 describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => {
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index f6ae37a0b6..760b8c4097 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -27,6 +27,7 @@ import {
   DEFAULT_GEMINI_MODEL,
   DEFAULT_GEMINI_MODEL_AUTO,
   DEFAULT_GEMINI_EMBEDDING_MODEL,
+  DEFAULT_FILE_FILTERING_OPTIONS,
   DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
   FileDiscoveryService,
   WRITE_FILE_TOOL_NAME,
@@ -394,11 +395,16 @@ export async function loadCliConfig(
 
   const fileService = new FileDiscoveryService(cwd);
 
-  const fileFiltering = {
+  const memoryFileFiltering = {
     ...DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
     ...settings.context?.fileFiltering,
   };
 
+  const fileFiltering = {
+    ...DEFAULT_FILE_FILTERING_OPTIONS,
+    ...settings.context?.fileFiltering,
+  };
+
   const includeDirectories = (settings.context?.includeDirectories || [])
     .map(resolvePath)
     .concat((argv.includeDirectories || []).map(resolvePath));
@@ -416,7 +422,7 @@ export async function loadCliConfig(
       allExtensions,
       trustedFolder,
       memoryImportFormat,
-      fileFiltering,
+      memoryFileFiltering,
     );
 
   let mcpServers = mergeMcpServers(settings, allExtensions);

From 7e2642b9f109b1ddc64b3fadbe2a66da9489157d Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Fri, 24 Oct 2025 14:00:05 -0700
Subject: [PATCH 12/73] fix(core): use debugLogger.warn for loop detection
 errors (#11986)

---
 packages/core/src/services/loopDetectionService.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts
index f8e9216398..d2fbb3746d 100644
--- a/packages/core/src/services/loopDetectionService.ts
+++ b/packages/core/src/services/loopDetectionService.ts
@@ -432,7 +432,7 @@ export class LoopDetectionService {
       });
     } catch (e) {
       // Do nothing, treat it as a non-loop.
-      this.config.getDebugMode() ? console.error(e) : debugLogger.debug(e);
+      this.config.getDebugMode() ? debugLogger.warn(e) : debugLogger.debug(e);
       return false;
     }
 

From 810d940e578c160e7c6e6da6a03d951c00114f1e Mon Sep 17 00:00:00 2001
From: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Date: Fri, 24 Oct 2025 14:23:39 -0700
Subject: [PATCH 13/73] fix(update): replace update-notifier with
 latest-version  (#11989)

---
 package-lock.json                             | 358 +++---------------
 package.json                                  |   2 +-
 packages/cli/package.json                     |   2 +-
 packages/cli/src/config/config.ts             |   4 +
 packages/cli/src/ui/utils/updateCheck.test.ts |  69 +---
 packages/cli/src/ui/utils/updateCheck.ts      |  75 ++--
 packages/cli/src/utils/handleAutoUpdate.ts    |   7 +
 7 files changed, 114 insertions(+), 403 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index f30a484e63..a0e554676c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12,6 +12,7 @@
       ],
       "dependencies": {
         "@testing-library/dom": "^10.4.1",
+        "latest-version": "^9.0.0",
         "simple-git": "^3.28.0"
       },
       "bin": {
@@ -3891,6 +3892,16 @@
         "text-table": "^0.2.0"
       }
     },
+    "node_modules/@textlint/linter-formatter/node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/@textlint/linter-formatter/node_modules/argparse": {
       "version": "1.0.10",
       "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@@ -5481,6 +5492,15 @@
         "string-width": "^4.1.0"
       }
     },
+    "node_modules/ansi-align/node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/ansi-align/node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -5967,15 +5987,6 @@
       "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
       "license": "MIT"
     },
-    "node_modules/atomically": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/atomically/-/atomically-2.0.3.tgz",
-      "integrity": "sha512-kU6FmrwZ3Lx7/7y3hPS5QnbJfaohcIul5fGqf7ok+4KklIEk9tJ0C2IQPdacSbVUWv6zVHXEBWoWd6NrVMT7Cw==",
-      "dependencies": {
-        "stubborn-fs": "^1.2.5",
-        "when-exit": "^2.1.1"
-      }
-    },
     "node_modules/auto-bind": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/auto-bind/-/auto-bind-5.0.1.tgz",
@@ -6987,30 +6998,6 @@
         "proto-list": "~1.2.1"
       }
     },
-    "node_modules/config-chain/node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
-      "license": "ISC"
-    },
-    "node_modules/configstore": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/configstore/-/configstore-7.0.0.tgz",
-      "integrity": "sha512-yk7/5PN5im4qwz0WFZW3PXnzHgPu9mX29Y8uZ3aefe2lBPC1FYttWZRcaW9fKkT0pBCJyuQ2HfbmPVaODi9jcQ==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "atomically": "^2.0.3",
-        "dot-prop": "^9.0.0",
-        "graceful-fs": "^4.2.11",
-        "xdg-basedir": "^5.1.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/yeoman/configstore?sponsor=1"
-      }
-    },
     "node_modules/content-disposition": {
       "version": "0.5.4",
       "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
@@ -7630,33 +7617,6 @@
         "url": "https://github.com/fb55/domutils?sponsor=1"
       }
     },
-    "node_modules/dot-prop": {
-      "version": "9.0.0",
-      "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-9.0.0.tgz",
-      "integrity": "sha512-1gxPBJpI/pcjQhKgIU91II6Wkay+dLcN3M6rf2uwP8hRur3HtQXjVrdAK3sjC0piaEuxzMwjXChcETiJl47lAQ==",
-      "license": "MIT",
-      "dependencies": {
-        "type-fest": "^4.18.2"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/dot-prop/node_modules/type-fest": {
-      "version": "4.41.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
-      "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
-      "license": "(MIT OR CC0-1.0)",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/dotenv": {
       "version": "17.1.0",
       "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.1.0.tgz",
@@ -8066,18 +8026,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/escape-goat": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/escape-goat/-/escape-goat-4.0.0.tgz",
-      "integrity": "sha512-2Sd4ShcWxbx6OY1IHyla/CVNwvg7XwZVoXZHcSu9w9SReNP1EzzD5T8NWKIR38fIqEns9kDWKUQTXXAmlDrdPg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/escape-html": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
@@ -9477,21 +9425,6 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/global-directory": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz",
-      "integrity": "sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q==",
-      "license": "MIT",
-      "dependencies": {
-        "ini": "4.1.1"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/globals": {
       "version": "16.3.0",
       "resolved": "https://registry.npmjs.org/globals/-/globals-16.3.0.tgz",
@@ -10200,13 +10133,10 @@
       "license": "ISC"
     },
     "node_modules/ini": {
-      "version": "4.1.1",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-4.1.1.tgz",
-      "integrity": "sha512-QQnnxNyfvmHFIsj7gkPcYymR8Jdw/o7mp5ZFihxn6h8Ci6fh3Dx4E1gPjpQEpIuPo9XVNY/ZUwh4BPMjGyL01g==",
-      "license": "ISC",
-      "engines": {
-        "node": "^14.17.0 || ^16.13.0 || >=18.0.0"
-      }
+      "version": "1.3.8",
+      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
+      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
+      "license": "ISC"
     },
     "node_modules/ink": {
       "version": "6.2.3",
@@ -10652,21 +10582,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/is-in-ci": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/is-in-ci/-/is-in-ci-1.0.0.tgz",
-      "integrity": "sha512-eUuAjybVTHMYWm/U+vBO1sY/JOCgoPCXRxzdju0K+K0BiGW0SChEL1MLC0PoCIR1OlPo5YAp8HuQoUlsWEICwg==",
-      "license": "MIT",
-      "bin": {
-        "is-in-ci": "cli.js"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/is-inside-container": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/is-inside-container/-/is-inside-container-1.0.0.tgz",
@@ -10685,22 +10600,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/is-installed-globally": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/is-installed-globally/-/is-installed-globally-1.0.0.tgz",
-      "integrity": "sha512-K55T22lfpQ63N4KEN57jZUAaAYqYHEe8veb/TycJRk9DdSCLLcovXz/mL6mOnhQaZsQGwPhuFopdQIlqGSEjiQ==",
-      "license": "MIT",
-      "dependencies": {
-        "global-directory": "^4.0.1",
-        "is-path-inside": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/is-map": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz",
@@ -10734,18 +10633,6 @@
       "dev": true,
       "license": "MIT"
     },
-    "node_modules/is-npm": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/is-npm/-/is-npm-6.0.0.tgz",
-      "integrity": "sha512-JEjxbSmtPSt1c8XTkVrlujcXdKV1/tvuQ7GwKcAlyiVLeYFQ2VHat8xfrDJsIkhCdF/tZ7CiIR3sy141c6+gPQ==",
-      "license": "MIT",
-      "engines": {
-        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/is-number": {
       "version": "7.0.0",
       "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
@@ -10782,18 +10669,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/is-path-inside": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/is-path-inside/-/is-path-inside-4.0.0.tgz",
-      "integrity": "sha512-lJJV/5dYS+RcL8uQdBDW9c9uWFLLBNRyFhnAKXw5tVqLlKZ4RMGZKv+YQ/IA3OhD+RpbJa1LLFM1FQPGyIXvOA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/is-plain-obj": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
@@ -11413,9 +11288,9 @@
       "license": "MIT"
     },
     "node_modules/ky": {
-      "version": "1.8.1",
-      "resolved": "https://registry.npmjs.org/ky/-/ky-1.8.1.tgz",
-      "integrity": "sha512-7Bp3TpsE+L+TARSnnDpk3xg8Idi8RwSLdj6CMbNWoOARIrGrbuLGusV0dYwbZOm4bB3jHNxSw8Wk/ByDqJEnDw==",
+      "version": "1.13.0",
+      "resolved": "https://registry.npmjs.org/ky/-/ky-1.13.0.tgz",
+      "integrity": "sha512-JeNNGs44hVUp2XxO3FY9WV28ymG7LgO4wju4HL/dCq1A8eKDcFgVrdCn1ssn+3Q/5OQilv5aYsL0DMt5mmAV9w==",
       "license": "MIT",
       "engines": {
         "node": ">=18"
@@ -13805,21 +13680,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/pupa": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/pupa/-/pupa-3.1.0.tgz",
-      "integrity": "sha512-FLpr4flz5xZTSJxSeaheeMKN/EDzMdK7b8PTOC6a5PYFKTucWbdqjgqaEyH0shFiSJrVB1+Qqi4Tk19ccU6Aug==",
-      "license": "MIT",
-      "dependencies": {
-        "escape-goat": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=12.20"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/qs": {
       "version": "6.13.0",
       "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
@@ -13940,12 +13800,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/rc/node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
-      "license": "ISC"
-    },
     "node_modules/rc/node_modules/strip-json-comments": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
@@ -15481,15 +15335,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/strip-ansi/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/strip-bom": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
@@ -15559,11 +15404,6 @@
         "boundary": "^2.0.0"
       }
     },
-    "node_modules/stubborn-fs": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/stubborn-fs/-/stubborn-fs-1.2.5.tgz",
-      "integrity": "sha512-H2N9c26eXjzL/S/K+i/RHHcFanE74dptvvjM8iwzwbVcWY/zjBbgRqF3K0DY4+OD+uTTASTBvDoxPDaPN02D7g=="
-    },
     "node_modules/stubs": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz",
@@ -15724,6 +15564,16 @@
         "url": "https://github.com/sponsors/epoberezkin"
       }
     },
+    "node_modules/table/node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/table/node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -16589,126 +16439,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/update-notifier": {
-      "version": "7.3.1",
-      "resolved": "https://registry.npmjs.org/update-notifier/-/update-notifier-7.3.1.tgz",
-      "integrity": "sha512-+dwUY4L35XFYEzE+OAL3sarJdUioVovq+8f7lcIJ7wnmnYQV5UD1Y/lcwaMSyaQ6Bj3JMj1XSTjZbNLHn/19yA==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "boxen": "^8.0.1",
-        "chalk": "^5.3.0",
-        "configstore": "^7.0.0",
-        "is-in-ci": "^1.0.0",
-        "is-installed-globally": "^1.0.0",
-        "is-npm": "^6.0.0",
-        "latest-version": "^9.0.0",
-        "pupa": "^3.1.0",
-        "semver": "^7.6.3",
-        "xdg-basedir": "^5.1.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/yeoman/update-notifier?sponsor=1"
-      }
-    },
-    "node_modules/update-notifier/node_modules/boxen": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/boxen/-/boxen-8.0.1.tgz",
-      "integrity": "sha512-F3PH5k5juxom4xktynS7MoFY+NUWH5LC4CnH11YB8NPew+HLpmBLCybSAEyb2F+4pRXhuhWqFesoQd6DAyc2hw==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-align": "^3.0.1",
-        "camelcase": "^8.0.0",
-        "chalk": "^5.3.0",
-        "cli-boxes": "^3.0.0",
-        "string-width": "^7.2.0",
-        "type-fest": "^4.21.0",
-        "widest-line": "^5.0.0",
-        "wrap-ansi": "^9.0.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/update-notifier/node_modules/camelcase": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-8.0.0.tgz",
-      "integrity": "sha512-8WB3Jcas3swSvjIeA2yvCJ+Miyz5l1ZmB6HFb9R1317dt9LCQoswg/BGrmAmkWVEszSrrg4RwmO46qIm2OEnSA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/update-notifier/node_modules/chalk": {
-      "version": "5.4.1",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.1.tgz",
-      "integrity": "sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==",
-      "license": "MIT",
-      "engines": {
-        "node": "^12.17.0 || ^14.13 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/chalk?sponsor=1"
-      }
-    },
-    "node_modules/update-notifier/node_modules/emoji-regex": {
-      "version": "10.4.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.4.0.tgz",
-      "integrity": "sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw==",
-      "license": "MIT"
-    },
-    "node_modules/update-notifier/node_modules/string-width": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
-      "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==",
-      "license": "MIT",
-      "dependencies": {
-        "emoji-regex": "^10.3.0",
-        "get-east-asian-width": "^1.0.0",
-        "strip-ansi": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/update-notifier/node_modules/type-fest": {
-      "version": "4.41.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
-      "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
-      "license": "(MIT OR CC0-1.0)",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/update-notifier/node_modules/widest-line": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/widest-line/-/widest-line-5.0.0.tgz",
-      "integrity": "sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==",
-      "license": "MIT",
-      "dependencies": {
-        "string-width": "^7.0.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/uri-js": {
       "version": "4.4.1",
       "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
@@ -17092,12 +16822,6 @@
         "node": ">=18"
       }
     },
-    "node_modules/when-exit": {
-      "version": "2.1.4",
-      "resolved": "https://registry.npmjs.org/when-exit/-/when-exit-2.1.4.tgz",
-      "integrity": "sha512-4rnvd3A1t16PWzrBUcSDZqcAmsUIy4minDXT/CZ8F2mVDgd65i4Aalimgz1aQkRGU0iH5eT5+6Rx2TK8o443Pg==",
-      "license": "MIT"
-    },
     "node_modules/which": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@@ -18003,6 +17727,7 @@
         "ink": "^6.2.3",
         "ink-gradient": "^3.0.0",
         "ink-spinner": "^5.0.0",
+        "latest-version": "^9.0.0",
         "lowlight": "^3.3.0",
         "mnemonist": "^0.40.3",
         "open": "^10.1.2",
@@ -18016,7 +17741,6 @@
         "strip-json-comments": "^3.1.1",
         "tar": "^7.5.1",
         "undici": "^7.10.0",
-        "update-notifier": "^7.3.1",
         "wrap-ansi": "9.0.2",
         "yargs": "^17.7.2",
         "zod": "^3.23.8"
@@ -18079,6 +17803,12 @@
         }
       }
     },
+    "packages/cli/node_modules/emoji-regex": {
+      "version": "10.6.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.6.0.tgz",
+      "integrity": "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==",
+      "license": "MIT"
+    },
     "packages/cli/node_modules/string-width": {
       "version": "7.2.0",
       "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
diff --git a/package.json b/package.json
index c0a3885231..ae3bdfa852 100644
--- a/package.json
+++ b/package.json
@@ -59,7 +59,6 @@
   },
   "overrides": {
     "wrap-ansi": "9.0.2",
-    "ansi-regex": "5.0.1",
     "cliui": {
       "wrap-ansi": "7.0.0"
     }
@@ -113,6 +112,7 @@
   },
   "dependencies": {
     "@testing-library/dom": "^10.4.1",
+    "latest-version": "^9.0.0",
     "simple-git": "^3.28.0"
   },
   "optionalDependencies": {
diff --git a/packages/cli/package.json b/packages/cli/package.json
index a2e62e4a33..df73c1496b 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -45,6 +45,7 @@
     "ink": "^6.2.3",
     "ink-gradient": "^3.0.0",
     "ink-spinner": "^5.0.0",
+    "latest-version": "^9.0.0",
     "lowlight": "^3.3.0",
     "mnemonist": "^0.40.3",
     "open": "^10.1.2",
@@ -58,7 +59,6 @@
     "strip-json-comments": "^3.1.1",
     "tar": "^7.5.1",
     "undici": "^7.10.0",
-    "update-notifier": "^7.3.1",
     "wrap-ansi": "9.0.2",
     "yargs": "^17.7.2",
     "zod": "^3.23.8"
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 760b8c4097..7617770b79 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -375,6 +375,10 @@ export async function loadCliConfig(
 ): Promise<Config> {
   const debugMode = isDebugMode(argv);
 
+  if (argv.sandbox) {
+    process.env['GEMINI_SANDBOX'] = 'true';
+  }
+
   const memoryImportFormat = settings.context?.importFormat || 'tree';
 
   const ideMode = settings.ide?.enabled ?? false;
diff --git a/packages/cli/src/ui/utils/updateCheck.test.ts b/packages/cli/src/ui/utils/updateCheck.test.ts
index 4a2a74c83a..085fd2ea28 100644
--- a/packages/cli/src/ui/utils/updateCheck.test.ts
+++ b/packages/cli/src/ui/utils/updateCheck.test.ts
@@ -13,9 +13,9 @@ vi.mock('../../utils/package.js', () => ({
   getPackageJson,
 }));
 
-const updateNotifier = vi.hoisted(() => vi.fn());
-vi.mock('update-notifier', () => ({
-  default: updateNotifier,
+const latestVersion = vi.hoisted(() => vi.fn());
+vi.mock('latest-version', () => ({
+  default: latestVersion,
 }));
 
 describe('checkForUpdates', () => {
@@ -46,7 +46,7 @@ describe('checkForUpdates', () => {
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
     expect(getPackageJson).not.toHaveBeenCalled();
-    expect(updateNotifier).not.toHaveBeenCalled();
+    expect(latestVersion).not.toHaveBeenCalled();
   });
 
   it('should return null when running from source (DEV=true)', async () => {
@@ -55,15 +55,11 @@ describe('checkForUpdates', () => {
       name: 'test-package',
       version: '1.0.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi
-        .fn()
-        .mockResolvedValue({ current: '1.0.0', latest: '1.1.0' }),
-    });
+    latestVersion.mockResolvedValue('1.1.0');
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
     expect(getPackageJson).not.toHaveBeenCalled();
-    expect(updateNotifier).not.toHaveBeenCalled();
+    expect(latestVersion).not.toHaveBeenCalled();
   });
 
   it('should return null if package.json is missing', async () => {
@@ -77,9 +73,7 @@ describe('checkForUpdates', () => {
       name: 'test-package',
       version: '1.0.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi.fn().mockResolvedValue(null),
-    });
+    latestVersion.mockResolvedValue('1.0.0');
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
   });
@@ -89,15 +83,13 @@ describe('checkForUpdates', () => {
       name: 'test-package',
       version: '1.0.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi
-        .fn()
-        .mockResolvedValue({ current: '1.0.0', latest: '1.1.0' }),
-    });
+    latestVersion.mockResolvedValue('1.1.0');
 
     const result = await checkForUpdates(mockSettings);
     expect(result?.message).toContain('1.0.0 → 1.1.0');
-    expect(result?.update).toEqual({ current: '1.0.0', latest: '1.1.0' });
+    expect(result?.update.current).toEqual('1.0.0');
+    expect(result?.update.latest).toEqual('1.1.0');
+    expect(result?.update.name).toEqual('test-package');
   });
 
   it('should return null if the latest version is the same as the current version', async () => {
@@ -105,11 +97,7 @@ describe('checkForUpdates', () => {
       name: 'test-package',
       version: '1.0.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi
-        .fn()
-        .mockResolvedValue({ current: '1.0.0', latest: '1.0.0' }),
-    });
+    latestVersion.mockResolvedValue('1.0.0');
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
   });
@@ -119,23 +107,17 @@ describe('checkForUpdates', () => {
       name: 'test-package',
       version: '1.1.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi
-        .fn()
-        .mockResolvedValue({ current: '1.1.0', latest: '1.0.0' }),
-    });
+    latestVersion.mockResolvedValue('1.0.0');
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
   });
 
-  it('should return null if fetchInfo rejects', async () => {
+  it('should return null if latestVersion rejects', async () => {
     getPackageJson.mockResolvedValue({
       name: 'test-package',
       version: '1.0.0',
     });
-    updateNotifier.mockReturnValue({
-      fetchInfo: vi.fn().mockRejectedValue(new Error('Timeout')),
-    });
+    latestVersion.mockRejectedValue(new Error('Timeout'));
 
     const result = await checkForUpdates(mockSettings);
     expect(result).toBeNull();
@@ -154,26 +136,13 @@ describe('checkForUpdates', () => {
         version: '1.2.3-nightly.1',
       });
 
-      const fetchInfoMock = vi.fn().mockImplementation(({ distTag }) => {
-        if (distTag === 'nightly') {
-          return Promise.resolve({
-            latest: '1.2.3-nightly.2',
-            current: '1.2.3-nightly.1',
-          });
+      latestVersion.mockImplementation(async (name, options) => {
+        if (options?.version === 'nightly') {
+          return '1.2.3-nightly.2';
         }
-        if (distTag === 'latest') {
-          return Promise.resolve({
-            latest: '1.2.3',
-            current: '1.2.3-nightly.1',
-          });
-        }
-        return Promise.resolve(null);
+        return '1.2.3';
       });
 
-      updateNotifier.mockImplementation(({ pkg, distTag }) => ({
-        fetchInfo: () => fetchInfoMock({ pkg, distTag }),
-      }));
-
       const result = await checkForUpdates(mockSettings);
       expect(result?.message).toContain('1.2.3-nightly.1 → 1.2.3-nightly.2');
       expect(result?.update.latest).toBe('1.2.3-nightly.2');
diff --git a/packages/cli/src/ui/utils/updateCheck.ts b/packages/cli/src/ui/utils/updateCheck.ts
index f924964370..6a6de8518d 100644
--- a/packages/cli/src/ui/utils/updateCheck.ts
+++ b/packages/cli/src/ui/utils/updateCheck.ts
@@ -4,8 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { UpdateInfo } from 'update-notifier';
-import updateNotifier from 'update-notifier';
+import latestVersion from 'latest-version';
 import semver from 'semver';
 import { getPackageJson } from '../../utils/package.js';
 import type { LoadedSettings } from '../../config/settings.js';
@@ -13,32 +12,35 @@ import { debugLogger } from '@google/gemini-cli-core';
 
 export const FETCH_TIMEOUT_MS = 2000;
 
+// Replicating the bits of UpdateInfo we need from update-notifier
+export interface UpdateInfo {
+  latest: string;
+  current: string;
+  name: string;
+  type?: semver.ReleaseType;
+}
+
 export interface UpdateObject {
   message: string;
   update: UpdateInfo;
 }
 
 /**
- * From a nightly and stable update, determines which is the "best" one to offer.
+ * From a nightly and stable version, determines which is the "best" one to offer.
  * The rule is to always prefer nightly if the base versions are the same.
  */
 function getBestAvailableUpdate(
-  nightly?: UpdateInfo,
-  stable?: UpdateInfo,
-): UpdateInfo | null {
+  nightly?: string,
+  stable?: string,
+): string | null {
   if (!nightly) return stable || null;
   if (!stable) return nightly || null;
 
-  const nightlyVer = nightly.latest;
-  const stableVer = stable.latest;
-
-  if (
-    semver.coerce(stableVer)?.version === semver.coerce(nightlyVer)?.version
-  ) {
+  if (semver.coerce(stable)?.version === semver.coerce(nightly)?.version) {
     return nightly;
   }
 
-  return semver.gt(stableVer, nightlyVer) ? stable : nightly;
+  return semver.gt(stable, nightly) ? stable : nightly;
 }
 
 export async function checkForUpdates(
@@ -59,43 +61,42 @@ export async function checkForUpdates(
 
     const { name, version: currentVersion } = packageJson;
     const isNightly = currentVersion.includes('nightly');
-    const createNotifier = (distTag: 'latest' | 'nightly') =>
-      updateNotifier({
-        pkg: {
-          name,
-          version: currentVersion,
-        },
-        updateCheckInterval: 0,
-        shouldNotifyInNpmScript: true,
-        distTag,
-      });
 
     if (isNightly) {
-      const [nightlyUpdateInfo, latestUpdateInfo] = await Promise.all([
-        createNotifier('nightly').fetchInfo(),
-        createNotifier('latest').fetchInfo(),
+      const [nightlyUpdate, latestUpdate] = await Promise.all([
+        latestVersion(name, { version: 'nightly' }),
+        latestVersion(name),
       ]);
 
-      const bestUpdate = getBestAvailableUpdate(
-        nightlyUpdateInfo,
-        latestUpdateInfo,
-      );
+      const bestUpdate = getBestAvailableUpdate(nightlyUpdate, latestUpdate);
 
-      if (bestUpdate && semver.gt(bestUpdate.latest, currentVersion)) {
-        const message = `A new version of Gemini CLI is available! ${currentVersion} → ${bestUpdate.latest}`;
+      if (bestUpdate && semver.gt(bestUpdate, currentVersion)) {
+        const message = `A new version of Gemini CLI is available! ${currentVersion} → ${bestUpdate}`;
+        const type = semver.diff(bestUpdate, currentVersion) || undefined;
         return {
           message,
-          update: { ...bestUpdate, current: currentVersion },
+          update: {
+            latest: bestUpdate,
+            current: currentVersion,
+            name,
+            type,
+          },
         };
       }
     } else {
-      const updateInfo = await createNotifier('latest').fetchInfo();
+      const latestUpdate = await latestVersion(name);
 
-      if (updateInfo && semver.gt(updateInfo.latest, currentVersion)) {
-        const message = `Gemini CLI update available! ${currentVersion} → ${updateInfo.latest}`;
+      if (latestUpdate && semver.gt(latestUpdate, currentVersion)) {
+        const message = `Gemini CLI update available! ${currentVersion} → ${latestUpdate}`;
+        const type = semver.diff(latestUpdate, currentVersion) || undefined;
         return {
           message,
-          update: { ...updateInfo, current: currentVersion },
+          update: {
+            latest: latestUpdate,
+            current: currentVersion,
+            name,
+            type,
+          },
         };
       }
     }
diff --git a/packages/cli/src/utils/handleAutoUpdate.ts b/packages/cli/src/utils/handleAutoUpdate.ts
index a41ddc3592..e546b0c6fc 100644
--- a/packages/cli/src/utils/handleAutoUpdate.ts
+++ b/packages/cli/src/utils/handleAutoUpdate.ts
@@ -23,6 +23,13 @@ export function handleAutoUpdate(
     return;
   }
 
+  if (settings.merged.tools?.sandbox || process.env['GEMINI_SANDBOX']) {
+    updateEventEmitter.emit('update-info', {
+      message: `${info.message}\nAutomatic update is not available in sandbox mode.`,
+    });
+    return;
+  }
+
   if (settings.merged.general?.disableUpdateNag) {
     return;
   }

From c20b88cee2ed488ad611878e7c96716fb12ed071 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Fri, 24 Oct 2025 14:47:13 -0700
Subject: [PATCH 14/73] use coreEvents.emitFeedback in extension enablement
 (#11985)

---
 .../extensions/extensionEnablement.test.ts    | 26 ++++++++++++-------
 .../config/extensions/extensionEnablement.ts  | 10 ++++---
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/packages/cli/src/config/extensions/extensionEnablement.test.ts b/packages/cli/src/config/extensions/extensionEnablement.test.ts
index c42374acac..e26ebdbf66 100644
--- a/packages/cli/src/config/extensions/extensionEnablement.test.ts
+++ b/packages/cli/src/config/extensions/extensionEnablement.test.ts
@@ -10,7 +10,11 @@ import * as os from 'node:os';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { ExtensionEnablementManager, Override } from './extensionEnablement.js';
 
-import { GEMINI_DIR, type GeminiCLIExtension } from '@google/gemini-cli-core';
+import {
+  coreEvents,
+  GEMINI_DIR,
+  type GeminiCLIExtension,
+} from '@google/gemini-cli-core';
 
 vi.mock('os', async (importOriginal) => {
   const mockedOs = await importOriginal<typeof os>();
@@ -272,20 +276,20 @@ describe('ExtensionEnablementManager', () => {
   });
 
   describe('validateExtensionOverrides', () => {
-    let consoleErrorSpy: ReturnType<typeof vi.spyOn>;
+    let coreEventsEmitSpy: ReturnType<typeof vi.spyOn>;
 
     beforeEach(() => {
-      consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+      coreEventsEmitSpy = vi.spyOn(coreEvents, 'emitFeedback');
     });
 
     afterEach(() => {
-      consoleErrorSpy.mockRestore();
+      coreEventsEmitSpy.mockRestore();
     });
 
     it('should not log an error if enabledExtensionNamesOverride is empty', () => {
       const manager = new ExtensionEnablementManager([]);
       manager.validateExtensionOverrides([]);
-      expect(consoleErrorSpy).not.toHaveBeenCalled();
+      expect(coreEventsEmitSpy).not.toHaveBeenCalled();
     });
 
     it('should not log an error if all enabledExtensionNamesOverride are valid', () => {
@@ -295,7 +299,7 @@ describe('ExtensionEnablementManager', () => {
         { name: 'ext-two' },
       ] as GeminiCLIExtension[];
       manager.validateExtensionOverrides(extensions);
-      expect(consoleErrorSpy).not.toHaveBeenCalled();
+      expect(coreEventsEmitSpy).not.toHaveBeenCalled();
     });
 
     it('should log an error for each invalid extension name in enabledExtensionNamesOverride', () => {
@@ -309,11 +313,13 @@ describe('ExtensionEnablementManager', () => {
         { name: 'ext-two' },
       ] as GeminiCLIExtension[];
       manager.validateExtensionOverrides(extensions);
-      expect(consoleErrorSpy).toHaveBeenCalledTimes(2);
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
+      expect(coreEventsEmitSpy).toHaveBeenCalledTimes(2);
+      expect(coreEventsEmitSpy).toHaveBeenCalledWith(
+        'error',
         'Extension not found: ext-invalid',
       );
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
+      expect(coreEventsEmitSpy).toHaveBeenCalledWith(
+        'error',
         'Extension not found: ext-another-invalid',
       );
     });
@@ -321,7 +327,7 @@ describe('ExtensionEnablementManager', () => {
     it('should not log an error if "none" is in enabledExtensionNamesOverride', () => {
       const manager = new ExtensionEnablementManager(['none']);
       manager.validateExtensionOverrides([]);
-      expect(consoleErrorSpy).not.toHaveBeenCalled();
+      expect(coreEventsEmitSpy).not.toHaveBeenCalled();
     });
   });
 });
diff --git a/packages/cli/src/config/extensions/extensionEnablement.ts b/packages/cli/src/config/extensions/extensionEnablement.ts
index 9994a4ecff..a619587342 100644
--- a/packages/cli/src/config/extensions/extensionEnablement.ts
+++ b/packages/cli/src/config/extensions/extensionEnablement.ts
@@ -6,7 +6,7 @@
 
 import fs from 'node:fs';
 import path from 'node:path';
-import type { GeminiCLIExtension } from '@google/gemini-cli-core';
+import { coreEvents, type GeminiCLIExtension } from '@google/gemini-cli-core';
 import { ExtensionStorage } from './storage.js';
 
 export interface ExtensionEnablementConfig {
@@ -129,7 +129,7 @@ export class ExtensionEnablementManager {
       if (
         !extensions.some((ext) => ext.name.toLowerCase() === name.toLowerCase())
       ) {
-        console.error(`Extension not found: ${name}`);
+        coreEvents.emitFeedback('error', `Extension not found: ${name}`);
       }
     }
   }
@@ -188,7 +188,11 @@ export class ExtensionEnablementManager {
       ) {
         return {};
       }
-      console.error('Error reading extension enablement config:', error);
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to read extension enablement config.',
+        error,
+      );
       return {};
     }
   }

From d91484eb4dc276e9ccfbeec71e85e1a304f1d950 Mon Sep 17 00:00:00 2001
From: Abhi <43648792+abhipatel12@users.noreply.github.com>
Date: Fri, 24 Oct 2025 17:49:42 -0400
Subject: [PATCH 15/73] Fix tests (#11998)

---
 packages/cli/src/config/config.test.ts      | 28 ++++++++++-----------
 packages/cli/src/config/settings.test.ts    |  4 +--
 packages/core/src/telemetry/metrics.test.ts |  4 +--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index 6b36235be4..a4cd313034 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -235,13 +235,13 @@ describe('parseArguments', () => {
       '@path',
       './file.md',
       '--model',
-      'gemini-1.5-pro',
+      'gemini-2.5-pro',
     ];
     const argv = await parseArguments({} as Settings);
     expect(argv.query).toBe('@path ./file.md');
     expect(argv.prompt).toBe('@path ./file.md'); // Should map to one-shot
     expect(argv.promptInteractive).toBeUndefined();
-    expect(argv.model).toBe('gemini-1.5-pro');
+    expect(argv.model).toBe('gemini-2.5-pro');
   });
 
   it('maps unquoted positional @path + arg to prompt (one-shot)', async () => {
@@ -1347,7 +1347,7 @@ describe('loadCliConfig model selection', () => {
     const config = await loadCliConfig(
       {
         model: {
-          name: 'gemini-9001-ultra',
+          name: 'gemini-2.5-pro',
         },
       },
       [],
@@ -1355,7 +1355,7 @@ describe('loadCliConfig model selection', () => {
       argv,
     );
 
-    expect(config.getModel()).toBe('gemini-9001-ultra');
+    expect(config.getModel()).toBe('gemini-2.5-pro');
   });
 
   it('uses the default gemini model if nothing is set', async () => {
@@ -1374,12 +1374,12 @@ describe('loadCliConfig model selection', () => {
   });
 
   it('always prefers model from argv', async () => {
-    process.argv = ['node', 'script.js', '--model', 'gemini-8675309-ultra'];
+    process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash-preview'];
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       {
         model: {
-          name: 'gemini-9001-ultra',
+          name: 'gemini-2.5-pro',
         },
       },
       [],
@@ -1387,11 +1387,11 @@ describe('loadCliConfig model selection', () => {
       argv,
     );
 
-    expect(config.getModel()).toBe('gemini-8675309-ultra');
+    expect(config.getModel()).toBe('gemini-2.5-flash-preview');
   });
 
   it('selects the model from argv if provided', async () => {
-    process.argv = ['node', 'script.js', '--model', 'gemini-8675309-ultra'];
+    process.argv = ['node', 'script.js', '--model', 'gemini-2.5-flash-preview'];
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       {
@@ -1402,7 +1402,7 @@ describe('loadCliConfig model selection', () => {
       argv,
     );
 
-    expect(config.getModel()).toBe('gemini-8675309-ultra');
+    expect(config.getModel()).toBe('gemini-2.5-flash-preview');
   });
 });
 
@@ -1923,7 +1923,7 @@ describe('loadCliConfig interactive', () => {
 
   it('should not be interactive if positional prompt words are provided with other flags', async () => {
     process.stdin.isTTY = true;
-    process.argv = ['node', 'script.js', '--model', 'gemini-1.5-pro', 'Hello'];
+    process.argv = ['node', 'script.js', '--model', 'gemini-2.5-pro', 'Hello'];
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig({}, [], 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
@@ -1935,7 +1935,7 @@ describe('loadCliConfig interactive', () => {
       'node',
       'script.js',
       '--model',
-      'gemini-1.5-pro',
+      'gemini-2.5-pro',
       '--yolo',
       'Hello world',
     ];
@@ -1973,7 +1973,7 @@ describe('loadCliConfig interactive', () => {
       'node',
       'script.js',
       '--model',
-      'gemini-1.5-pro',
+      'gemini-2.5-pro',
       'write',
       'a',
       'function',
@@ -1985,7 +1985,7 @@ describe('loadCliConfig interactive', () => {
     const config = await loadCliConfig({}, [], 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     expect(argv.query).toBe('write a function to sort array');
-    expect(argv.model).toBe('gemini-1.5-pro');
+    expect(argv.model).toBe('gemini-2.5-pro');
   });
 
   it('should handle empty positional arguments', async () => {
@@ -2019,7 +2019,7 @@ describe('loadCliConfig interactive', () => {
 
   it('should be interactive if no positional prompt words are provided with flags', async () => {
     process.stdin.isTTY = true;
-    process.argv = ['node', 'script.js', '--model', 'gemini-1.5-pro'];
+    process.argv = ['node', 'script.js', '--model', 'gemini-2.5-pro'];
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig({}, [], 'test-session', argv);
     expect(config.isInteractive()).toBe(true);
diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts
index 3c79657b14..a0e3b5196e 100644
--- a/packages/cli/src/config/settings.test.ts
+++ b/packages/cli/src/config/settings.test.ts
@@ -2159,7 +2159,7 @@ describe('Settings Loading and Merging', () => {
         },
         ui: {},
         model: {
-          name: 'gemini-1.5-pro',
+          name: 'gemini-2.5-pro',
         },
         unrecognized: 'value',
       };
@@ -2168,7 +2168,7 @@ describe('Settings Loading and Merging', () => {
 
       expect(v1Settings).toEqual({
         vimMode: false,
-        model: 'gemini-1.5-pro',
+        model: 'gemini-2.5-pro',
         unrecognized: 'value',
       });
     });
diff --git a/packages/core/src/telemetry/metrics.test.ts b/packages/core/src/telemetry/metrics.test.ts
index ee97a8771c..63355cd542 100644
--- a/packages/core/src/telemetry/metrics.test.ts
+++ b/packages/core/src/telemetry/metrics.test.ts
@@ -335,14 +335,14 @@ describe('Telemetry Metrics', () => {
       mockCounterAddFn.mockClear();
 
       recordTokenUsageMetricsModule(mockConfig, 200, {
-        model: 'gemini-ultra',
+        model: 'gemini-different-model',
         type: 'input',
       });
       expect(mockCounterAddFn).toHaveBeenCalledWith(200, {
         'session.id': 'test-session-id',
         'installation.id': 'test-installation-id',
         'user.email': 'test@example.com',
-        model: 'gemini-ultra',
+        model: 'gemini-different-model',
         type: 'input',
       });
     });

From cdff69b7b255b8ce1df0c4a7fc09a1d5342e2da2 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Fri, 24 Oct 2025 15:35:09 -0700
Subject: [PATCH 16/73] Support redirects in fetchJson, add tests for it
 (#11993)

---
 .../config/extensions/github_fetch.test.ts    | 199 ++++++++++++++++++
 .../cli/src/config/extensions/github_fetch.ts |  17 +-
 2 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 packages/cli/src/config/extensions/github_fetch.test.ts

diff --git a/packages/cli/src/config/extensions/github_fetch.test.ts b/packages/cli/src/config/extensions/github_fetch.test.ts
new file mode 100644
index 0000000000..fe6edbedb2
--- /dev/null
+++ b/packages/cli/src/config/extensions/github_fetch.test.ts
@@ -0,0 +1,199 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
+import * as https from 'node:https';
+import { EventEmitter } from 'node:events';
+import { fetchJson, getGitHubToken } from './github_fetch.js';
+import type { ClientRequest, IncomingMessage } from 'node:http';
+
+vi.mock('node:https');
+
+describe('getGitHubToken', () => {
+  const originalToken = process.env['GITHUB_TOKEN'];
+
+  afterEach(() => {
+    if (originalToken) {
+      process.env['GITHUB_TOKEN'] = originalToken;
+    } else {
+      delete process.env['GITHUB_TOKEN'];
+    }
+  });
+
+  it('should return the token if GITHUB_TOKEN is set', () => {
+    process.env['GITHUB_TOKEN'] = 'test-token';
+    expect(getGitHubToken()).toBe('test-token');
+  });
+
+  it('should return undefined if GITHUB_TOKEN is not set', () => {
+    delete process.env['GITHUB_TOKEN'];
+    expect(getGitHubToken()).toBeUndefined();
+  });
+});
+
+describe('fetchJson', () => {
+  const getMock = vi.mocked(https.get);
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
+  it('should fetch and parse JSON successfully', async () => {
+    getMock.mockImplementationOnce((_url, _options, callback) => {
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 200;
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('data', Buffer.from('{"foo":'));
+      res.emit('data', Buffer.from('"bar"}'));
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+    await expect(fetchJson('https://example.com/data.json')).resolves.toEqual({
+      foo: 'bar',
+    });
+  });
+
+  it('should handle redirects (301 and 302)', async () => {
+    // Test 302
+    getMock.mockImplementationOnce((_url, _options, callback) => {
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 302;
+      res.headers = { location: 'https://example.com/final' };
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+    getMock.mockImplementationOnce((url, _options, callback) => {
+      expect(url).toBe('https://example.com/final');
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 200;
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('data', Buffer.from('{"success": true}'));
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+
+    await expect(fetchJson('https://example.com/redirect')).resolves.toEqual({
+      success: true,
+    });
+
+    // Test 301
+    getMock.mockImplementationOnce((_url, _options, callback) => {
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 301;
+      res.headers = { location: 'https://example.com/final-permanent' };
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+    getMock.mockImplementationOnce((url, _options, callback) => {
+      expect(url).toBe('https://example.com/final-permanent');
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 200;
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('data', Buffer.from('{"permanent": true}'));
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+
+    await expect(
+      fetchJson('https://example.com/redirect-perm'),
+    ).resolves.toEqual({ permanent: true });
+  });
+
+  it('should reject on non-200/30x status code', async () => {
+    getMock.mockImplementationOnce((_url, _options, callback) => {
+      const res = new EventEmitter() as IncomingMessage;
+      res.statusCode = 404;
+      (callback as (res: IncomingMessage) => void)(res);
+      res.emit('end');
+      return new EventEmitter() as ClientRequest;
+    });
+
+    await expect(fetchJson('https://example.com/error')).rejects.toThrow(
+      'Request failed with status code 404',
+    );
+  });
+
+  it('should reject on request error', async () => {
+    const error = new Error('Network error');
+    getMock.mockImplementationOnce(() => {
+      const req = new EventEmitter() as ClientRequest;
+      req.emit('error', error);
+      return req;
+    });
+
+    await expect(fetchJson('https://example.com/error')).rejects.toThrow(
+      'Network error',
+    );
+  });
+
+  describe('with GITHUB_TOKEN', () => {
+    const originalToken = process.env['GITHUB_TOKEN'];
+
+    beforeEach(() => {
+      process.env['GITHUB_TOKEN'] = 'my-secret-token';
+    });
+
+    afterEach(() => {
+      if (originalToken) {
+        process.env['GITHUB_TOKEN'] = originalToken;
+      } else {
+        delete process.env['GITHUB_TOKEN'];
+      }
+    });
+
+    it('should include Authorization header if token is present', async () => {
+      getMock.mockImplementationOnce((_url, options, callback) => {
+        expect(options.headers).toEqual({
+          'User-Agent': 'gemini-cli',
+          Authorization: 'token my-secret-token',
+        });
+        const res = new EventEmitter() as IncomingMessage;
+        res.statusCode = 200;
+        (callback as (res: IncomingMessage) => void)(res);
+        res.emit('data', Buffer.from('{"foo": "bar"}'));
+        res.emit('end');
+        return new EventEmitter() as ClientRequest;
+      });
+      await expect(fetchJson('https://api.github.com/user')).resolves.toEqual({
+        foo: 'bar',
+      });
+    });
+  });
+
+  describe('without GITHUB_TOKEN', () => {
+    const originalToken = process.env['GITHUB_TOKEN'];
+
+    beforeEach(() => {
+      delete process.env['GITHUB_TOKEN'];
+    });
+
+    afterEach(() => {
+      if (originalToken) {
+        process.env['GITHUB_TOKEN'] = originalToken;
+      }
+    });
+
+    it('should not include Authorization header if token is not present', async () => {
+      getMock.mockImplementationOnce((_url, options, callback) => {
+        expect(options.headers).toEqual({
+          'User-Agent': 'gemini-cli',
+        });
+        const res = new EventEmitter() as IncomingMessage;
+        res.statusCode = 200;
+        (callback as (res: IncomingMessage) => void)(res);
+        res.emit('data', Buffer.from('{"foo": "bar"}'));
+        res.emit('end');
+        return new EventEmitter() as ClientRequest;
+      });
+
+      await expect(fetchJson('https://api.github.com/user')).resolves.toEqual({
+        foo: 'bar',
+      });
+    });
+  });
+});
diff --git a/packages/cli/src/config/extensions/github_fetch.ts b/packages/cli/src/config/extensions/github_fetch.ts
index 3940275699..a4f9d29b70 100644
--- a/packages/cli/src/config/extensions/github_fetch.ts
+++ b/packages/cli/src/config/extensions/github_fetch.ts
@@ -10,7 +10,10 @@ export function getGitHubToken(): string | undefined {
   return process.env['GITHUB_TOKEN'];
 }
 
-export async function fetchJson<T>(url: string): Promise<T> {
+export async function fetchJson<T>(
+  url: string,
+  redirectCount: number = 0,
+): Promise<T> {
   const headers: { 'User-Agent': string; Authorization?: string } = {
     'User-Agent': 'gemini-cli',
   };
@@ -21,6 +24,18 @@ export async function fetchJson<T>(url: string): Promise<T> {
   return new Promise((resolve, reject) => {
     https
       .get(url, { headers }, (res) => {
+        if (res.statusCode === 302 || res.statusCode === 301) {
+          if (redirectCount >= 10) {
+            return reject(new Error('Too many redirects'));
+          }
+          if (!res.headers.location) {
+            return reject(new Error('No location header in redirect response'));
+          }
+          fetchJson<T>(res.headers.location!, redirectCount++)
+            .then(resolve)
+            .catch(reject);
+          return;
+        }
         if (res.statusCode !== 200) {
           return reject(
             new Error(`Request failed with status code ${res.statusCode}`),

From f934f018818f3f66e0a141fe9bbccdd03254f191 Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Fri, 24 Oct 2025 16:22:02 -0700
Subject: [PATCH 17/73] fix(tools): ReadFile no longer shows confirmation when
 message bus is off (#12003)

---
 packages/core/src/tools/tools.ts | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts
index 4ea20de673..a69856cd72 100644
--- a/packages/core/src/tools/tools.ts
+++ b/packages/core/src/tools/tools.ts
@@ -114,27 +114,13 @@ export abstract class BaseToolInvocation<
   /**
    * Subclasses should override this method to provide custom confirmation UI
    * when the policy engine's decision is 'ASK_USER'.
-   * The base implementation provides a generic confirmation prompt.
+   * The base implementation returns false (no confirmation needed).
+   * Only tools that need confirmation (e.g., write, execute tools) should override this.
    */
   protected async getConfirmationDetails(
     _abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
-    const confirmationDetails: ToolCallConfirmationDetails = {
-      type: 'info',
-      title: `Confirm: ${this._toolDisplayName || this._toolName}`,
-      prompt: this.getDescription(),
-      onConfirm: async (outcome: ToolConfirmationOutcome) => {
-        if (outcome === ToolConfirmationOutcome.ProceedAlways) {
-          if (this.messageBus && this._toolName) {
-            this.messageBus.publish({
-              type: MessageBusType.UPDATE_POLICY,
-              toolName: this._toolName,
-            });
-          }
-        }
-      },
-    };
-    return confirmationDetails;
+    return false;
   }
 
   protected getMessageBusDecision(

From 81006605c82becc468b3dc4a6707993e214c4ac8 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Fri, 24 Oct 2025 16:31:25 -0700
Subject: [PATCH 18/73] use debugLogger instead of console.error (#11990)

---
 packages/cli/src/ui/components/views/ExtensionsList.tsx | 6 ++++--
 packages/cli/src/ui/hooks/useExtensionUpdates.ts        | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/ui/components/views/ExtensionsList.tsx b/packages/cli/src/ui/components/views/ExtensionsList.tsx
index e1ddf270f3..b37648d78c 100644
--- a/packages/cli/src/ui/components/views/ExtensionsList.tsx
+++ b/packages/cli/src/ui/components/views/ExtensionsList.tsx
@@ -8,7 +8,7 @@ import type React from 'react';
 import { Box, Text } from 'ink';
 import { useUIState } from '../../contexts/UIStateContext.js';
 import { ExtensionUpdateState } from '../../state/extensions.js';
-import type { GeminiCLIExtension } from '@google/gemini-cli-core';
+import { debugLogger, type GeminiCLIExtension } from '@google/gemini-cli-core';
 
 interface ExtensionsList {
   extensions: readonly GeminiCLIExtension[];
@@ -50,8 +50,10 @@ export const ExtensionsList: React.FC<ExtensionsList> = ({ extensions }) => {
             case ExtensionUpdateState.NOT_UPDATABLE:
               stateColor = 'green';
               break;
+            case undefined:
+              break;
             default:
-              console.error(`Unhandled ExtensionUpdateState ${state}`);
+              debugLogger.warn(`Unhandled ExtensionUpdateState ${state}`);
               break;
           }
 
diff --git a/packages/cli/src/ui/hooks/useExtensionUpdates.ts b/packages/cli/src/ui/hooks/useExtensionUpdates.ts
index a4e9e2598e..3bad4f771b 100644
--- a/packages/cli/src/ui/hooks/useExtensionUpdates.ts
+++ b/packages/cli/src/ui/hooks/useExtensionUpdates.ts
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { GeminiCLIExtension } from '@google/gemini-cli-core';
+import { debugLogger, type GeminiCLIExtension } from '@google/gemini-cli-core';
 import { getErrorMessage } from '../../utils/errors.js';
 import {
   ExtensionUpdateState,
@@ -204,7 +204,7 @@ export const useExtensionUpdates = (
           try {
             callback(nonNullResults);
           } catch (e) {
-            console.error(getErrorMessage(e));
+            debugLogger.warn(getErrorMessage(e));
           }
         });
       });

From 145e099ca54524fa1198a607bc0b54082f1661c9 Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Fri, 24 Oct 2025 18:52:03 -0700
Subject: [PATCH 19/73] Support paste markers split across writes. (#11977)

---
 .../src/ui/components/InputPrompt.test.tsx    |   6 +-
 .../src/ui/components/SettingsDialog.test.tsx |   2 +-
 .../src/ui/contexts/KeypressContext.test.tsx  |  57 +++++-
 .../cli/src/ui/contexts/KeypressContext.tsx   | 184 ++++++++++++------
 packages/cli/src/ui/hooks/useKeypress.test.ts | 100 +++++++++-
 5 files changed, 281 insertions(+), 68 deletions(-)

diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 688f9a8538..eed0020ffe 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -1331,7 +1331,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B');
-      await wait();
+      await wait(100);
 
       expect(props.buffer.setText).toHaveBeenCalledWith('');
       expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
@@ -1372,7 +1372,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B');
-      await wait();
+      await wait(100);
 
       expect(props.setShellModeActive).toHaveBeenCalledWith(false);
       unmount();
@@ -1392,7 +1392,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B');
-      await wait();
+      await wait(100);
 
       expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
       unmount();
diff --git a/packages/cli/src/ui/components/SettingsDialog.test.tsx b/packages/cli/src/ui/components/SettingsDialog.test.tsx
index 24909fcbfd..4a36fafb75 100644
--- a/packages/cli/src/ui/components/SettingsDialog.test.tsx
+++ b/packages/cli/src/ui/components/SettingsDialog.test.tsx
@@ -1348,7 +1348,7 @@ describe('SettingsDialog', () => {
 
       // Press Escape to exit
       stdin.write('\u001B');
-      await wait();
+      await wait(100);
 
       expect(onSelect).toHaveBeenCalledWith(undefined, 'User');
 
diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
index 295938ca9f..197974c751 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
@@ -46,7 +46,7 @@ class MockStdin extends EventEmitter {
   pause = vi.fn();
 
   write(text: string) {
-    this.emit('data', Buffer.from(text));
+    this.emit('data', text);
   }
 }
 
@@ -381,6 +381,61 @@ describe('KeypressContext - Kitty Protocol', () => {
         }),
       );
     });
+    it('should paste start code split over multiple writes', async () => {
+      const keyHandler = vi.fn();
+      const pastedText = 'pasted content';
+
+      const { result } = renderHook(() => useKeypressContext(), { wrapper });
+
+      act(() => result.current.subscribe(keyHandler));
+
+      act(() => {
+        // Split PASTE_START into two parts
+        stdin.write(PASTE_START.slice(0, 3));
+        stdin.write(PASTE_START.slice(3));
+        stdin.write(pastedText);
+        stdin.write(PASTE_END);
+      });
+
+      await waitFor(() => {
+        expect(keyHandler).toHaveBeenCalledTimes(1);
+      });
+
+      expect(keyHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          paste: true,
+          sequence: pastedText,
+        }),
+      );
+    });
+
+    it('should paste end code split over multiple writes', async () => {
+      const keyHandler = vi.fn();
+      const pastedText = 'pasted content';
+
+      const { result } = renderHook(() => useKeypressContext(), { wrapper });
+
+      act(() => result.current.subscribe(keyHandler));
+
+      act(() => {
+        stdin.write(PASTE_START);
+        stdin.write(pastedText);
+        // Split PASTE_END into two parts
+        stdin.write(PASTE_END.slice(0, 3));
+        stdin.write(PASTE_END.slice(3));
+      });
+
+      await waitFor(() => {
+        expect(keyHandler).toHaveBeenCalledTimes(1);
+      });
+
+      expect(keyHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          paste: true,
+          sequence: pastedText,
+        }),
+      );
+    });
   });
 
   describe('debug keystroke logging', () => {
diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx
index 6390fb1ee6..060efe1e72 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.tsx
@@ -40,10 +40,11 @@ import {
 import { FOCUS_IN, FOCUS_OUT } from '../hooks/useFocus.js';
 
 const ESC = '\u001B';
-export const PASTE_MODE_PREFIX = `${ESC}[200~`;
-export const PASTE_MODE_SUFFIX = `${ESC}[201~`;
+export const PASTE_MODE_START = `${ESC}[200~`;
+export const PASTE_MODE_END = `${ESC}[201~`;
 export const DRAG_COMPLETION_TIMEOUT_MS = 100; // Broadcast full path after 100ms if no more input
 export const KITTY_SEQUENCE_TIMEOUT_MS = 50; // Flush incomplete kitty sequences after 50ms
+export const PASTE_CODE_TIMEOUT_MS = 50; // Flush incomplete paste code after 50ms
 export const SINGLE_QUOTE = "'";
 export const DOUBLE_QUOTE = '"';
 
@@ -353,6 +354,102 @@ function parseKittyPrefix(buffer: string): { key: Key; length: number } | null {
   return null;
 }
 
+/**
+ * Returns the first index before which we are certain there is no paste marker.
+ */
+function earliestPossiblePasteMarker(data: string): number {
+  // Check data for full start-paste or end-paste markers.
+  const startIndex = data.indexOf(PASTE_MODE_START);
+  const endIndex = data.indexOf(PASTE_MODE_END);
+  if (startIndex !== -1 && endIndex !== -1) {
+    return Math.min(startIndex, endIndex);
+  } else if (startIndex !== -1) {
+    return startIndex;
+  } else if (endIndex !== -1) {
+    return endIndex;
+  }
+
+  // data contains no full start-paste or end-paste.
+  // Check if data ends with a prefix of start-paste or end-paste.
+  const codeLength = PASTE_MODE_START.length;
+  for (let i = Math.min(data.length, codeLength - 1); i > 0; i--) {
+    const candidate = data.slice(data.length - i);
+    if (
+      PASTE_MODE_START.indexOf(candidate) === 0 ||
+      PASTE_MODE_END.indexOf(candidate) === 0
+    ) {
+      return data.length - i;
+    }
+  }
+  return data.length;
+}
+
+/**
+ * A generator that takes in data chunks and spits out paste-start and
+ * paste-end keypresses. All non-paste marker data is passed to passthrough.
+ */
+function* pasteMarkerParser(
+  passthrough: PassThrough,
+  keypressHandler: (_: unknown, key: Key) => void,
+): Generator<void, void, string> {
+  while (true) {
+    let data = yield;
+    if (data.length === 0) {
+      continue; // we timed out
+    }
+
+    while (true) {
+      const index = earliestPossiblePasteMarker(data);
+      if (index === data.length) {
+        // no possible paste markers were found
+        passthrough.write(data);
+        break;
+      }
+      if (index > 0) {
+        // snip off and send the part that doesn't have a paste marker
+        passthrough.write(data.slice(0, index));
+        data = data.slice(index);
+      }
+      // data starts with a possible paste marker
+      const codeLength = PASTE_MODE_START.length;
+      if (data.length < codeLength) {
+        // we have a prefix. Concat the next data and try again.
+        const newData = yield;
+        if (newData.length === 0) {
+          // we timed out. Just dump what we have and start over.
+          passthrough.write(data);
+          break;
+        }
+        data += newData;
+      } else if (data.startsWith(PASTE_MODE_START)) {
+        keypressHandler(undefined, {
+          name: 'paste-start',
+          ctrl: false,
+          meta: false,
+          shift: false,
+          paste: false,
+          sequence: '',
+        });
+        data = data.slice(PASTE_MODE_START.length);
+      } else if (data.startsWith(PASTE_MODE_END)) {
+        keypressHandler(undefined, {
+          name: 'paste-end',
+          ctrl: false,
+          meta: false,
+          shift: false,
+          paste: false,
+          sequence: '',
+        });
+        data = data.slice(PASTE_MODE_END.length);
+      } else {
+        // This should never happen.
+        passthrough.write(data);
+        break;
+      }
+    }
+  }
+}
+
 export interface Key {
   name: string;
   ctrl: boolean;
@@ -621,8 +718,8 @@ export function KeypressProvider({
         // Check if this could start a kitty sequence
         const startsWithEsc = key.sequence.startsWith(ESC);
         const isExcluded = [
-          PASTE_MODE_PREFIX,
-          PASTE_MODE_SUFFIX,
+          PASTE_MODE_START,
+          PASTE_MODE_END,
           FOCUS_IN,
           FOCUS_OUT,
         ].some((prefix) => key.sequence.startsWith(prefix));
@@ -766,57 +863,7 @@ export function KeypressProvider({
       broadcast({ ...key, paste: pasteBuffer !== null });
     };
 
-    const handleRawKeypress = (data: Buffer) => {
-      const pasteModePrefixBuffer = Buffer.from(PASTE_MODE_PREFIX);
-      const pasteModeSuffixBuffer = Buffer.from(PASTE_MODE_SUFFIX);
-
-      let pos = 0;
-      while (pos < data.length) {
-        const prefixPos = data.indexOf(pasteModePrefixBuffer, pos);
-        const suffixPos = data.indexOf(pasteModeSuffixBuffer, pos);
-        const isPrefixNext =
-          prefixPos !== -1 && (suffixPos === -1 || prefixPos < suffixPos);
-        const isSuffixNext =
-          suffixPos !== -1 && (prefixPos === -1 || suffixPos < prefixPos);
-
-        let nextMarkerPos = -1;
-        let markerLength = 0;
-
-        if (isPrefixNext) {
-          nextMarkerPos = prefixPos;
-        } else if (isSuffixNext) {
-          nextMarkerPos = suffixPos;
-        }
-        markerLength = pasteModeSuffixBuffer.length;
-
-        if (nextMarkerPos === -1) {
-          keypressStream!.write(data.slice(pos));
-          return;
-        }
-
-        const nextData = data.slice(pos, nextMarkerPos);
-        if (nextData.length > 0) {
-          keypressStream!.write(nextData);
-        }
-        const createPasteKeyEvent = (
-          name: 'paste-start' | 'paste-end',
-        ): Key => ({
-          name,
-          ctrl: false,
-          meta: false,
-          shift: false,
-          paste: false,
-          sequence: '',
-        });
-        if (isPrefixNext) {
-          handleKeypress(undefined, createPasteKeyEvent('paste-start'));
-        } else if (isSuffixNext) {
-          handleKeypress(undefined, createPasteKeyEvent('paste-end'));
-        }
-        pos = nextMarkerPos + markerLength;
-      }
-    };
-
+    let cleanup = () => {};
     let rl: readline.Interface;
     if (keypressStream !== null) {
       rl = readline.createInterface({
@@ -824,22 +871,35 @@ export function KeypressProvider({
         escapeCodeTimeout: 0,
       });
       readline.emitKeypressEvents(keypressStream, rl);
+
+      const parser = pasteMarkerParser(keypressStream, handleKeypress);
+      parser.next(); // prime the generator so it starts listening.
+      let timeoutId: NodeJS.Timeout;
+      const handleRawKeypress = (data: string) => {
+        clearTimeout(timeoutId);
+        parser.next(data);
+        timeoutId = setTimeout(() => parser.next(''), PASTE_CODE_TIMEOUT_MS);
+      };
+
       keypressStream.on('keypress', handleKeypress);
+      process.stdin.setEncoding('utf8'); // so handleRawKeypress gets strings
       stdin.on('data', handleRawKeypress);
+
+      cleanup = () => {
+        keypressStream.removeListener('keypress', handleKeypress);
+        stdin.removeListener('data', handleRawKeypress);
+      };
     } else {
       rl = readline.createInterface({ input: stdin, escapeCodeTimeout: 0 });
       readline.emitKeypressEvents(stdin, rl);
+
       stdin.on('keypress', handleKeypress);
+
+      cleanup = () => stdin.removeListener('keypress', handleKeypress);
     }
 
     return () => {
-      if (keypressStream !== null) {
-        keypressStream.removeListener('keypress', handleKeypress);
-        stdin.removeListener('data', handleRawKeypress);
-      } else {
-        stdin.removeListener('keypress', handleKeypress);
-      }
-
+      cleanup();
       rl.close();
 
       // Restore the terminal to its original state.
diff --git a/packages/cli/src/ui/hooks/useKeypress.test.ts b/packages/cli/src/ui/hooks/useKeypress.test.ts
index 243152cc42..770a86fed0 100644
--- a/packages/cli/src/ui/hooks/useKeypress.test.ts
+++ b/packages/cli/src/ui/hooks/useKeypress.test.ts
@@ -34,7 +34,7 @@ class MockStdin extends EventEmitter {
   pause = vi.fn();
 
   write(text: string) {
-    this.emit('data', Buffer.from(text));
+    this.emit('data', text);
   }
 }
 
@@ -187,6 +187,104 @@ describe('useKeypress', () => {
       expect(onKeypress).toHaveBeenCalledTimes(3);
     });
 
+    it('should handle lone pastes', () => {
+      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
+        wrapper,
+      });
+
+      const pasteText = 'pasted';
+      act(() => {
+        stdin.write(PASTE_START);
+        stdin.write(pasteText);
+        stdin.write(PASTE_END);
+      });
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ paste: true, sequence: pasteText }),
+      );
+
+      expect(onKeypress).toHaveBeenCalledTimes(1);
+    });
+
+    it('should handle paste false alarm', () => {
+      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
+        wrapper,
+      });
+
+      act(() => {
+        stdin.write(PASTE_START.slice(0, 5));
+        stdin.write('do');
+      });
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ code: '[200d' }),
+      );
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ sequence: 'o' }),
+      );
+
+      expect(onKeypress).toHaveBeenCalledTimes(2);
+    });
+
+    it('should handle back to back pastes', () => {
+      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
+        wrapper,
+      });
+
+      const pasteText1 = 'herp';
+      const pasteText2 = 'derp';
+      act(() => {
+        stdin.write(
+          PASTE_START +
+            pasteText1 +
+            PASTE_END +
+            PASTE_START +
+            pasteText2 +
+            PASTE_END,
+        );
+      });
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ paste: true, sequence: pasteText1 }),
+      );
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ paste: true, sequence: pasteText2 }),
+      );
+
+      expect(onKeypress).toHaveBeenCalledTimes(2);
+    });
+
+    it('should handle pastes split across writes', async () => {
+      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
+        wrapper,
+      });
+
+      const keyA = { name: 'a', sequence: 'a' };
+      act(() => stdin.write('a'));
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ ...keyA, paste: false }),
+      );
+
+      const pasteText = 'pasted';
+      await act(async () => {
+        stdin.write(PASTE_START.slice(0, 3));
+        await new Promise((r) => setTimeout(r, 50));
+        stdin.write(PASTE_START.slice(3) + pasteText.slice(0, 3));
+        await new Promise((r) => setTimeout(r, 50));
+        stdin.write(pasteText.slice(3) + PASTE_END.slice(0, 3));
+        await new Promise((r) => setTimeout(r, 50));
+        stdin.write(PASTE_END.slice(3));
+      });
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ paste: true, sequence: pasteText }),
+      );
+
+      const keyB = { name: 'b', sequence: 'b' };
+      act(() => stdin.write('b'));
+      expect(onKeypress).toHaveBeenCalledWith(
+        expect.objectContaining({ ...keyB, paste: false }),
+      );
+
+      expect(onKeypress).toHaveBeenCalledTimes(3);
+    });
+
     it('should emit partial paste content if unmounted mid-paste', () => {
       const { unmount } = renderHook(
         () => useKeypress(onKeypress, { isActive: true }),

From b1059f891f18c478c2afa0c44766f36654fd7001 Mon Sep 17 00:00:00 2001
From: Eric Rahm <erahm@google.com>
Date: Fri, 24 Oct 2025 18:55:12 -0700
Subject: [PATCH 20/73] refactor: Switch over to unified shouldIgnoreFile
 (#11815)

---
 .../cli/src/zed-integration/zedIntegration.ts | 17 +++----
 .../src/services/fileDiscoveryService.test.ts | 22 ++++-----
 .../core/src/services/fileDiscoveryService.ts | 46 ++-----------------
 packages/core/src/tools/read-file.test.ts     |  8 +++-
 packages/core/src/tools/read-file.ts          |  7 ++-
 packages/core/src/utils/getFolderStructure.ts | 35 +++++++-------
 6 files changed, 51 insertions(+), 84 deletions(-)

diff --git a/packages/cli/src/zed-integration/zedIntegration.ts b/packages/cli/src/zed-integration/zedIntegration.ts
index 29739850ae..c320bbe3a9 100644
--- a/packages/cli/src/zed-integration/zedIntegration.ts
+++ b/packages/cli/src/zed-integration/zedIntegration.ts
@@ -12,6 +12,7 @@ import type {
   ToolResult,
   ToolCallConfirmationDetails,
   GeminiCLIExtension,
+  FilterFilesOptions,
 } from '@google/gemini-cli-core';
 import {
   AuthType,
@@ -571,7 +572,8 @@ class Session {
 
     // Get centralized file discovery service
     const fileDiscovery = this.config.getFileService();
-    const respectGitIgnore = this.config.getFileFilteringRespectGitIgnore();
+    const fileFilteringOptions: FilterFilesOptions =
+      this.config.getFileFilteringOptions();
 
     const pathSpecsToRead: string[] = [];
     const contentLabelsForDisplay: string[] = [];
@@ -587,13 +589,10 @@ class Session {
 
     for (const atPathPart of atPathCommandParts) {
       const pathName = atPathPart.fileData!.fileUri;
-      // Check if path should be ignored by git
-      if (fileDiscovery.shouldGitIgnoreFile(pathName)) {
+      // Check if path should be ignored
+      if (fileDiscovery.shouldIgnoreFile(pathName, fileFilteringOptions)) {
         ignoredPaths.push(pathName);
-        const reason = respectGitIgnore
-          ? 'git-ignored and will be skipped'
-          : 'ignored by custom patterns';
-        debugLogger.warn(`Path ${pathName} is ${reason}.`);
+        debugLogger.warn(`Path ${pathName} is ignored and will be skipped.`);
         continue;
       }
       let currentPathSpec = pathName;
@@ -730,9 +729,8 @@ class Session {
     initialQueryText = initialQueryText.trim();
     // Inform user about ignored paths
     if (ignoredPaths.length > 0) {
-      const ignoreType = respectGitIgnore ? 'git-ignored' : 'custom-ignored';
       this.debug(
-        `Ignored ${ignoredPaths.length} ${ignoreType} files: ${ignoredPaths.join(', ')}`,
+        `Ignored ${ignoredPaths.length} files: ${ignoredPaths.join(', ')}`,
       );
     }
 
@@ -747,7 +745,6 @@ class Session {
     if (pathSpecsToRead.length > 0) {
       const toolArgs = {
         paths: pathSpecsToRead,
-        respectGitIgnore, // Use configuration setting
       };
 
       const callId = `${readManyFilesTool.name}-${Date.now()}`;
diff --git a/packages/core/src/services/fileDiscoveryService.test.ts b/packages/core/src/services/fileDiscoveryService.test.ts
index de7c561e4d..c09309b13b 100644
--- a/packages/core/src/services/fileDiscoveryService.test.ts
+++ b/packages/core/src/services/fileDiscoveryService.test.ts
@@ -40,8 +40,8 @@ describe('FileDiscoveryService', () => {
 
       const service = new FileDiscoveryService(projectRoot);
       // Let's check the effect of the parser instead of mocking it.
-      expect(service.shouldGitIgnoreFile('node_modules/foo.js')).toBe(true);
-      expect(service.shouldGitIgnoreFile('src/foo.js')).toBe(false);
+      expect(service.shouldIgnoreFile('node_modules/foo.js')).toBe(true);
+      expect(service.shouldIgnoreFile('src/foo.js')).toBe(false);
     });
 
     it('should not load git repo patterns when not in a git repo', async () => {
@@ -50,15 +50,15 @@ describe('FileDiscoveryService', () => {
       const service = new FileDiscoveryService(projectRoot);
 
       // .gitignore is not loaded in non-git repos
-      expect(service.shouldGitIgnoreFile('node_modules/foo.js')).toBe(false);
+      expect(service.shouldIgnoreFile('node_modules/foo.js')).toBe(false);
     });
 
     it('should load .geminiignore patterns even when not in a git repo', async () => {
       await createTestFile('.geminiignore', 'secrets.txt');
       const service = new FileDiscoveryService(projectRoot);
 
-      expect(service.shouldGeminiIgnoreFile('secrets.txt')).toBe(true);
-      expect(service.shouldGeminiIgnoreFile('src/index.js')).toBe(false);
+      expect(service.shouldIgnoreFile('secrets.txt')).toBe(true);
+      expect(service.shouldIgnoreFile('src/index.js')).toBe(false);
     });
   });
 
@@ -184,7 +184,7 @@ describe('FileDiscoveryService', () => {
       const service = new FileDiscoveryService(projectRoot);
 
       expect(
-        service.shouldGitIgnoreFile(
+        service.shouldIgnoreFile(
           path.join(projectRoot, 'node_modules/package/index.js'),
         ),
       ).toBe(true);
@@ -194,7 +194,7 @@ describe('FileDiscoveryService', () => {
       const service = new FileDiscoveryService(projectRoot);
 
       expect(
-        service.shouldGitIgnoreFile(path.join(projectRoot, 'src/index.ts')),
+        service.shouldIgnoreFile(path.join(projectRoot, 'src/index.ts')),
       ).toBe(false);
     });
 
@@ -202,7 +202,7 @@ describe('FileDiscoveryService', () => {
       const service = new FileDiscoveryService(projectRoot);
 
       expect(
-        service.shouldGeminiIgnoreFile(path.join(projectRoot, 'debug.log')),
+        service.shouldIgnoreFile(path.join(projectRoot, 'debug.log')),
       ).toBe(true);
     });
 
@@ -210,7 +210,7 @@ describe('FileDiscoveryService', () => {
       const service = new FileDiscoveryService(projectRoot);
 
       expect(
-        service.shouldGeminiIgnoreFile(path.join(projectRoot, 'src/index.ts')),
+        service.shouldIgnoreFile(path.join(projectRoot, 'src/index.ts')),
       ).toBe(false);
     });
   });
@@ -224,10 +224,10 @@ describe('FileDiscoveryService', () => {
       );
 
       expect(
-        service.shouldGitIgnoreFile(path.join(projectRoot, 'ignored.txt')),
+        service.shouldIgnoreFile(path.join(projectRoot, 'ignored.txt')),
       ).toBe(true);
       expect(
-        service.shouldGitIgnoreFile(path.join(projectRoot, 'not-ignored.txt')),
+        service.shouldIgnoreFile(path.join(projectRoot, 'not-ignored.txt')),
       ).toBe(false);
     });
 
diff --git a/packages/core/src/services/fileDiscoveryService.ts b/packages/core/src/services/fileDiscoveryService.ts
index 981e81127e..7b4d3398bd 100644
--- a/packages/core/src/services/fileDiscoveryService.ts
+++ b/packages/core/src/services/fileDiscoveryService.ts
@@ -37,21 +37,13 @@ export class FileDiscoveryService {
   /**
    * Filters a list of file paths based on git ignore rules
    */
-  filterFiles(
-    filePaths: string[],
-    options: FilterFilesOptions = {
-      respectGitIgnore: true,
-      respectGeminiIgnore: true,
-    },
-  ): string[] {
+  filterFiles(filePaths: string[], options: FilterFilesOptions = {}): string[] {
+    const { respectGitIgnore = true, respectGeminiIgnore = true } = options;
     return filePaths.filter((filePath) => {
-      if (options.respectGitIgnore && this.shouldGitIgnoreFile(filePath)) {
+      if (respectGitIgnore && this.gitIgnoreFilter?.isIgnored(filePath)) {
         return false;
       }
-      if (
-        options.respectGeminiIgnore &&
-        this.shouldGeminiIgnoreFile(filePath)
-      ) {
+      if (respectGeminiIgnore && this.geminiIgnoreFilter?.isIgnored(filePath)) {
         return false;
       }
       return true;
@@ -78,26 +70,6 @@ export class FileDiscoveryService {
     };
   }
 
-  /**
-   * Checks if a single file should be git-ignored
-   */
-  shouldGitIgnoreFile(filePath: string): boolean {
-    if (this.gitIgnoreFilter) {
-      return this.gitIgnoreFilter.isIgnored(filePath);
-    }
-    return false;
-  }
-
-  /**
-   * Checks if a single file should be gemini-ignored
-   */
-  shouldGeminiIgnoreFile(filePath: string): boolean {
-    if (this.geminiIgnoreFilter) {
-      return this.geminiIgnoreFilter.isIgnored(filePath);
-    }
-    return false;
-  }
-
   /**
    * Unified method to check if a file should be ignored based on filtering options
    */
@@ -105,14 +77,6 @@ export class FileDiscoveryService {
     filePath: string,
     options: FilterFilesOptions = {},
   ): boolean {
-    const { respectGitIgnore = true, respectGeminiIgnore = true } = options;
-
-    if (respectGitIgnore && this.shouldGitIgnoreFile(filePath)) {
-      return true;
-    }
-    if (respectGeminiIgnore && this.shouldGeminiIgnoreFile(filePath)) {
-      return true;
-    }
-    return false;
+    return this.filterFiles([filePath], options).length === 0;
   }
 }
diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts
index 74b94e8af9..825d807cc4 100644
--- a/packages/core/src/tools/read-file.test.ts
+++ b/packages/core/src/tools/read-file.test.ts
@@ -38,6 +38,10 @@ describe('ReadFileTool', () => {
       getFileSystemService: () => new StandardFileSystemService(),
       getTargetDir: () => tempRootDir,
       getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir),
+      getFileFilteringOptions: () => ({
+        respectGitIgnore: true,
+        respectGeminiIgnore: true,
+      }),
       storage: {
         getProjectTempDir: () => path.join(tempRootDir, '.temp'),
       },
@@ -462,7 +466,7 @@ describe('ReadFileTool', () => {
         const params: ReadFileToolParams = {
           absolute_path: ignoredFilePath,
         };
-        const expectedError = `File path '${ignoredFilePath}' is ignored by .geminiignore pattern(s).`;
+        const expectedError = `File path '${ignoredFilePath}' is ignored by configured ignore patterns.`;
         expect(() => tool.build(params)).toThrow(expectedError);
       });
 
@@ -474,7 +478,7 @@ describe('ReadFileTool', () => {
         const params: ReadFileToolParams = {
           absolute_path: ignoredFilePath,
         };
-        const expectedError = `File path '${ignoredFilePath}' is ignored by .geminiignore pattern(s).`;
+        const expectedError = `File path '${ignoredFilePath}' is ignored by configured ignore patterns.`;
         expect(() => tool.build(params)).toThrow(expectedError);
       });
 
diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts
index 9584865746..affb428907 100644
--- a/packages/core/src/tools/read-file.ts
+++ b/packages/core/src/tools/read-file.ts
@@ -210,8 +210,11 @@ export class ReadFileTool extends BaseDeclarativeTool<
     }
 
     const fileService = this.config.getFileService();
-    if (fileService.shouldGeminiIgnoreFile(params.absolute_path)) {
-      return `File path '${filePath}' is ignored by .geminiignore pattern(s).`;
+    const fileFilteringOptions = this.config.getFileFilteringOptions();
+    if (
+      fileService.shouldIgnoreFile(params.absolute_path, fileFilteringOptions)
+    ) {
+      return `File path '${filePath}' is ignored by configured ignore patterns.`;
     }
 
     return null;
diff --git a/packages/core/src/utils/getFolderStructure.ts b/packages/core/src/utils/getFolderStructure.ts
index 0b9c54cb90..141d4f542d 100644
--- a/packages/core/src/utils/getFolderStructure.ts
+++ b/packages/core/src/utils/getFolderStructure.ts
@@ -8,7 +8,10 @@ import * as fs from 'node:fs/promises';
 import type { Dirent } from 'node:fs';
 import * as path from 'node:path';
 import { getErrorMessage, isNodeError } from './errors.js';
-import type { FileDiscoveryService } from '../services/fileDiscoveryService.js';
+import type {
+  FileDiscoveryService,
+  FilterFilesOptions,
+} from '../services/fileDiscoveryService.js';
 import type { FileFilteringOptions } from '../config/constants.js';
 import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
 import { debugLogger } from './debugLogger.js';
@@ -119,6 +122,10 @@ async function readFullStructure(
 
     const filesInCurrentDir: string[] = [];
     const subFoldersInCurrentDir: FullFolderInfo[] = [];
+    const filterFileOptions: FilterFilesOptions = {
+      respectGitIgnore: options.fileFilteringOptions?.respectGitIgnore,
+      respectGeminiIgnore: options.fileFilteringOptions?.respectGeminiIgnore,
+    };
 
     // Process files first in the current directory
     for (const entry of entries) {
@@ -129,15 +136,10 @@ async function readFullStructure(
         }
         const fileName = entry.name;
         const filePath = path.join(currentPath, fileName);
-        if (options.fileService) {
-          const shouldIgnore =
-            (options.fileFilteringOptions.respectGitIgnore &&
-              options.fileService.shouldGitIgnoreFile(filePath)) ||
-            (options.fileFilteringOptions.respectGeminiIgnore &&
-              options.fileService.shouldGeminiIgnoreFile(filePath));
-          if (shouldIgnore) {
-            continue;
-          }
+        if (
+          options.fileService?.shouldIgnoreFile(filePath, filterFileOptions)
+        ) {
+          continue;
         }
         if (
           !options.fileIncludePattern ||
@@ -168,14 +170,11 @@ async function readFullStructure(
         const subFolderName = entry.name;
         const subFolderPath = path.join(currentPath, subFolderName);
 
-        let isIgnored = false;
-        if (options.fileService) {
-          isIgnored =
-            (options.fileFilteringOptions.respectGitIgnore &&
-              options.fileService.shouldGitIgnoreFile(subFolderPath)) ||
-            (options.fileFilteringOptions.respectGeminiIgnore &&
-              options.fileService.shouldGeminiIgnoreFile(subFolderPath));
-        }
+        const isIgnored =
+          options.fileService?.shouldIgnoreFile(
+            subFolderPath,
+            filterFileOptions,
+          ) ?? false;
 
         if (options.ignoredFolders.has(subFolderName) || isIgnored) {
           const ignoredSubFolder: FullFolderInfo = {

From bcd9735a739e05d4c7b3eebaf658e3b2f32e8a66 Mon Sep 17 00:00:00 2001
From: Qiyu-Wei <46917749+Qiyu-Wei@users.noreply.github.com>
Date: Sat, 25 Oct 2025 03:00:48 +0100
Subject: [PATCH 21/73] Fix typo in: packages/cli/src/utils/handleAutoUpdate.ts
 (#11809)

---
 packages/cli/src/utils/handleAutoUpdate.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/utils/handleAutoUpdate.ts b/packages/cli/src/utils/handleAutoUpdate.ts
index e546b0c6fc..ac2540af04 100644
--- a/packages/cli/src/utils/handleAutoUpdate.ts
+++ b/packages/cli/src/utils/handleAutoUpdate.ts
@@ -100,7 +100,7 @@ export function setUpdateHandler(
   setUpdateInfo: (info: UpdateObject | null) => void,
 ) {
   let successfullyInstalled = false;
-  const handleUpdateRecieved = (info: UpdateObject) => {
+  const handleUpdateReceived = (info: UpdateObject) => {
     setUpdateInfo(info);
     const savedMessage = info.message;
     setTimeout(() => {
@@ -150,13 +150,13 @@ export function setUpdateHandler(
     );
   };
 
-  updateEventEmitter.on('update-received', handleUpdateRecieved);
+  updateEventEmitter.on('update-received', handleUpdateReceived);
   updateEventEmitter.on('update-failed', handleUpdateFailed);
   updateEventEmitter.on('update-success', handleUpdateSuccess);
   updateEventEmitter.on('update-info', handleUpdateInfo);
 
   return () => {
-    updateEventEmitter.off('update-received', handleUpdateRecieved);
+    updateEventEmitter.off('update-received', handleUpdateReceived);
     updateEventEmitter.off('update-failed', handleUpdateFailed);
     updateEventEmitter.off('update-success', handleUpdateSuccess);
     updateEventEmitter.off('update-info', handleUpdateInfo);

From ce26b58f09c2e30daad408cd2f8bac30a5ae298a Mon Sep 17 00:00:00 2001
From: Lakshan Perera <39025880+0xlakshan@users.noreply.github.com>
Date: Sat, 25 Oct 2025 07:38:42 +0530
Subject: [PATCH 22/73] docs(contributing): update project structure section
 with missing packages (#11599)

---
 CONTRIBUTING.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 437ed94835..03e9ad6564 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -282,8 +282,13 @@ npm run lint
 ### Project Structure
 
 - `packages/`: Contains the individual sub-packages of the project.
+  - `a2a-server`: A2A server implementation for the Gemini CLI. (Experimental)
   - `cli/`: The command-line interface.
   - `core/`: The core backend logic for the Gemini CLI.
+  - `test-utils` Utilities for creating and cleaning temporary file systems for
+    testing.
+  - `vscode-ide-companion/`: The Gemini CLI Companion extension pairs with
+    Gemini CLI.
 - `docs/`: Contains all project documentation.
 - `scripts/`: Utility scripts for building, testing, and development tasks.
 

From ef70e6323016f4391aa1f449408c70a381f1711c Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Fri, 24 Oct 2025 19:55:13 -0700
Subject: [PATCH 23/73] Make PASTE_WORKAROUND the default. (#12008)

---
 .../src/ui/components/InputPrompt.test.tsx    | 10 ++++-----
 .../src/ui/components/SettingsDialog.test.tsx |  2 +-
 .../cli/src/ui/contexts/KeypressContext.tsx   | 13 +----------
 packages/cli/src/ui/hooks/useKeypress.test.ts | 22 ++++---------------
 4 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index eed0020ffe..4fe14fbea0 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -752,7 +752,7 @@ describe('InputPrompt', () => {
     await wait();
 
     stdin.write('\x03'); // Ctrl+C character
-    await wait();
+    await wait(60);
 
     expect(props.buffer.setText).toHaveBeenCalledWith('');
     expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
@@ -766,7 +766,7 @@ describe('InputPrompt', () => {
     await wait();
 
     stdin.write('\x03'); // Ctrl+C character
-    await wait();
+    await wait(60);
 
     expect(props.buffer.setText).not.toHaveBeenCalled();
     unmount();
@@ -940,7 +940,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B[200~pasted text\x1B[201~');
-      await wait();
+      await wait(60);
 
       expect(mockBuffer.handleInput).toHaveBeenCalledWith(
         expect.objectContaining({
@@ -1331,7 +1331,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B');
-      await wait(100);
+      await wait(60);
 
       expect(props.buffer.setText).toHaveBeenCalledWith('');
       expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
@@ -1392,7 +1392,7 @@ describe('InputPrompt', () => {
       await wait();
 
       stdin.write('\x1B');
-      await wait(100);
+      await wait(60);
 
       expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
       unmount();
diff --git a/packages/cli/src/ui/components/SettingsDialog.test.tsx b/packages/cli/src/ui/components/SettingsDialog.test.tsx
index 4a36fafb75..908c1f994f 100644
--- a/packages/cli/src/ui/components/SettingsDialog.test.tsx
+++ b/packages/cli/src/ui/components/SettingsDialog.test.tsx
@@ -1348,7 +1348,7 @@ describe('SettingsDialog', () => {
 
       // Press Escape to exit
       stdin.write('\u001B');
-      await wait(100);
+      await wait(60);
 
       expect(onSelect).toHaveBeenCalledWith(undefined, 'User');
 
diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx
index 060efe1e72..c7c2f0aef5 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.tsx
@@ -481,19 +481,8 @@ export function useKeypressContext() {
   return context;
 }
 
-/**
- * Determines if the passthrough stream workaround should be used.
- * This is necessary for Node.js versions older than 20 or when the
- * PASTE_WORKAROUND environment variable is set, to correctly handle
- * paste events.
- */
 function shouldUsePassthrough(): boolean {
-  const nodeMajorVersion = parseInt(process.versions.node.split('.')[0], 10);
-  return (
-    nodeMajorVersion < 20 ||
-    process.env['PASTE_WORKAROUND'] === '1' ||
-    process.env['PASTE_WORKAROUND'] === 'true'
-  );
+  return process.env['PASTE_WORKAROUND'] !== 'false';
 }
 
 export function KeypressProvider({
diff --git a/packages/cli/src/ui/hooks/useKeypress.test.ts b/packages/cli/src/ui/hooks/useKeypress.test.ts
index 770a86fed0..07fcf62ead 100644
--- a/packages/cli/src/ui/hooks/useKeypress.test.ts
+++ b/packages/cli/src/ui/hooks/useKeypress.test.ts
@@ -66,13 +66,6 @@ describe('useKeypress', () => {
     });
   });
 
-  const setNodeVersion = (version: string) => {
-    Object.defineProperty(process.versions, 'node', {
-      value: version,
-      configurable: true,
-    });
-  };
-
   it('should not listen if isActive is false', () => {
     renderHook(() => useKeypress(onKeypress, { isActive: false }), {
       wrapper,
@@ -124,19 +117,12 @@ describe('useKeypress', () => {
 
   describe.each([
     {
-      description: 'Modern Node (>= v20)',
-      setup: () => setNodeVersion('20.0.0'),
+      description: 'PASTE_WORKAROUND true',
+      setup: () => vi.stubEnv('PASTE_WORKAROUND', 'true'),
     },
     {
-      description: 'Legacy Node (< v20)',
-      setup: () => setNodeVersion('18.0.0'),
-    },
-    {
-      description: 'Workaround Env Var',
-      setup: () => {
-        setNodeVersion('20.0.0');
-        vi.stubEnv('PASTE_WORKAROUND', 'true');
-      },
+      description: 'PASTE_WORKAROUND false',
+      setup: () => vi.stubEnv('PASTE_WORKAROUND', 'false'),
     },
   ])('in $description', ({ setup }) => {
     beforeEach(() => {

From 51578397a5f0e48ac0e73b2dec42b97a2ad4febc Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Fri, 24 Oct 2025 20:32:21 -0700
Subject: [PATCH 24/73] refactor(cli): replace custom wait with vi.waitFor in
 InputPrompt tests (#12005)

---
 .../src/ui/components/InputPrompt.test.tsx    | 924 ++++++++++--------
 .../__snapshots__/InputPrompt.test.tsx.snap   |  34 +-
 2 files changed, 549 insertions(+), 409 deletions(-)

diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 4fe14fbea0..33c53b8e2f 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -5,7 +5,7 @@
  */
 
 import { renderWithProviders } from '../../test-utils/render.js';
-import { waitFor, act } from '@testing-library/react';
+import { act } from '@testing-library/react';
 import type { InputPromptProps } from './InputPrompt.js';
 import { InputPrompt } from './InputPrompt.js';
 import type { TextBuffer } from './shared/text-buffer.js';
@@ -233,29 +233,29 @@ describe('InputPrompt', () => {
     };
   });
 
-  const wait = (ms = 50) => new Promise((resolve) => setTimeout(resolve, ms));
-
   it('should call shellHistory.getPreviousCommand on up arrow in shell mode', async () => {
     props.shellModeActive = true;
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\u001B[A');
-    await wait();
-
-    expect(mockShellHistory.getPreviousCommand).toHaveBeenCalled();
+    await act(async () => {
+      stdin.write('\u001B[A');
+    });
+    await vi.waitFor(() =>
+      expect(mockShellHistory.getPreviousCommand).toHaveBeenCalled(),
+    );
     unmount();
   });
 
   it('should call shellHistory.getNextCommand on down arrow in shell mode', async () => {
     props.shellModeActive = true;
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\u001B[B');
-    await wait();
-
-    expect(mockShellHistory.getNextCommand).toHaveBeenCalled();
+    await act(async () => {
+      stdin.write('\u001B[B');
+    });
+    await vi.waitFor(() =>
+      expect(mockShellHistory.getNextCommand).toHaveBeenCalled(),
+    );
     unmount();
   });
 
@@ -265,13 +265,14 @@ describe('InputPrompt', () => {
       'previous command',
     );
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\u001B[A');
-    await wait();
-
-    expect(mockShellHistory.getPreviousCommand).toHaveBeenCalled();
-    expect(props.buffer.setText).toHaveBeenCalledWith('previous command');
+    await act(async () => {
+      stdin.write('\u001B[A');
+    });
+    await vi.waitFor(() => {
+      expect(mockShellHistory.getPreviousCommand).toHaveBeenCalled();
+      expect(props.buffer.setText).toHaveBeenCalledWith('previous command');
+    });
     unmount();
   });
 
@@ -279,35 +280,47 @@ describe('InputPrompt', () => {
     props.shellModeActive = true;
     props.buffer.setText('ls -l');
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
-
-    expect(mockShellHistory.addCommandToHistory).toHaveBeenCalledWith('ls -l');
-    expect(props.onSubmit).toHaveBeenCalledWith('ls -l');
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() => {
+      expect(mockShellHistory.addCommandToHistory).toHaveBeenCalledWith(
+        'ls -l',
+      );
+      expect(props.onSubmit).toHaveBeenCalledWith('ls -l');
+    });
     unmount();
   });
 
   it('should NOT call shell history methods when not in shell mode', async () => {
     props.buffer.setText('some text');
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\u001B[A'); // Up arrow
-    await wait();
-    stdin.write('\u001B[B'); // Down arrow
-    await wait();
-    stdin.write('\r'); // Enter
-    await wait();
+    await act(async () => {
+      stdin.write('\u001B[A'); // Up arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockInputHistory.navigateUp).toHaveBeenCalled(),
+    );
+
+    await act(async () => {
+      stdin.write('\u001B[B'); // Down arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockInputHistory.navigateDown).toHaveBeenCalled(),
+    );
+
+    await act(async () => {
+      stdin.write('\r'); // Enter
+    });
+    await vi.waitFor(() =>
+      expect(props.onSubmit).toHaveBeenCalledWith('some text'),
+    );
 
     expect(mockShellHistory.getPreviousCommand).not.toHaveBeenCalled();
     expect(mockShellHistory.getNextCommand).not.toHaveBeenCalled();
     expect(mockShellHistory.addCommandToHistory).not.toHaveBeenCalled();
-
-    expect(mockInputHistory.navigateUp).toHaveBeenCalled();
-    expect(mockInputHistory.navigateDown).toHaveBeenCalled();
-    expect(props.onSubmit).toHaveBeenCalledWith('some text');
     unmount();
   });
 
@@ -324,15 +337,21 @@ describe('InputPrompt', () => {
     props.buffer.setText('/mem');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
     // Test up arrow
-    stdin.write('\u001B[A'); // Up arrow
-    await wait();
+    await act(async () => {
+      stdin.write('\u001B[A'); // Up arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.navigateUp).toHaveBeenCalledTimes(1),
+    );
 
-    stdin.write('\u0010'); // Ctrl+P
-    await wait();
-    expect(mockCommandCompletion.navigateUp).toHaveBeenCalledTimes(2);
+    await act(async () => {
+      stdin.write('\u0010'); // Ctrl+P
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.navigateUp).toHaveBeenCalledTimes(2),
+    );
     expect(mockCommandCompletion.navigateDown).not.toHaveBeenCalled();
 
     unmount();
@@ -350,15 +369,21 @@ describe('InputPrompt', () => {
     props.buffer.setText('/mem');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
     // Test down arrow
-    stdin.write('\u001B[B'); // Down arrow
-    await wait();
+    await act(async () => {
+      stdin.write('\u001B[B'); // Down arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.navigateDown).toHaveBeenCalledTimes(1),
+    );
 
-    stdin.write('\u000E'); // Ctrl+N
-    await wait();
-    expect(mockCommandCompletion.navigateDown).toHaveBeenCalledTimes(2);
+    await act(async () => {
+      stdin.write('\u000E'); // Ctrl+N
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.navigateDown).toHaveBeenCalledTimes(2),
+    );
     expect(mockCommandCompletion.navigateUp).not.toHaveBeenCalled();
 
     unmount();
@@ -372,16 +397,27 @@ describe('InputPrompt', () => {
     props.buffer.setText('some text');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\u001B[A'); // Up arrow
-    await wait();
-    stdin.write('\u001B[B'); // Down arrow
-    await wait();
-    stdin.write('\u0010'); // Ctrl+P
-    await wait();
-    stdin.write('\u000E'); // Ctrl+N
-    await wait();
+    await act(async () => {
+      stdin.write('\u001B[A'); // Up arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockInputHistory.navigateUp).toHaveBeenCalled(),
+    );
+    await act(async () => {
+      stdin.write('\u001B[B'); // Down arrow
+    });
+    await vi.waitFor(() =>
+      expect(mockInputHistory.navigateDown).toHaveBeenCalled(),
+    );
+    await act(async () => {
+      stdin.write('\u0010'); // Ctrl+P
+    });
+    await vi.waitFor(() => {});
+    await act(async () => {
+      stdin.write('\u000E'); // Ctrl+N
+    });
+    await vi.waitFor(() => {});
 
     expect(mockCommandCompletion.navigateUp).not.toHaveBeenCalled();
     expect(mockCommandCompletion.navigateDown).not.toHaveBeenCalled();
@@ -406,20 +442,21 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
       // Send Ctrl+V
-      stdin.write('\x16'); // Ctrl+V
-      await wait();
-
-      expect(clipboardUtils.clipboardHasImage).toHaveBeenCalled();
-      expect(clipboardUtils.saveClipboardImage).toHaveBeenCalledWith(
-        props.config.getTargetDir(),
-      );
-      expect(clipboardUtils.cleanupOldClipboardImages).toHaveBeenCalledWith(
-        props.config.getTargetDir(),
-      );
-      expect(mockBuffer.replaceRangeByOffset).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x16'); // Ctrl+V
+      });
+      await vi.waitFor(() => {
+        expect(clipboardUtils.clipboardHasImage).toHaveBeenCalled();
+        expect(clipboardUtils.saveClipboardImage).toHaveBeenCalledWith(
+          props.config.getTargetDir(),
+        );
+        expect(clipboardUtils.cleanupOldClipboardImages).toHaveBeenCalledWith(
+          props.config.getTargetDir(),
+        );
+        expect(mockBuffer.replaceRangeByOffset).toHaveBeenCalled();
+      });
       unmount();
     });
 
@@ -429,12 +466,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x16'); // Ctrl+V
-      await wait();
-
-      expect(clipboardUtils.clipboardHasImage).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x16'); // Ctrl+V
+      });
+      await vi.waitFor(() => {
+        expect(clipboardUtils.clipboardHasImage).toHaveBeenCalled();
+      });
       expect(clipboardUtils.saveClipboardImage).not.toHaveBeenCalled();
       expect(mockBuffer.setText).not.toHaveBeenCalled();
       unmount();
@@ -447,12 +485,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x16'); // Ctrl+V
-      await wait();
-
-      expect(clipboardUtils.saveClipboardImage).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x16'); // Ctrl+V
+      });
+      await vi.waitFor(() => {
+        expect(clipboardUtils.saveClipboardImage).toHaveBeenCalled();
+      });
       expect(mockBuffer.setText).not.toHaveBeenCalled();
       unmount();
     });
@@ -475,13 +514,14 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x16'); // Ctrl+V
-      await wait();
-
-      // Should insert at cursor position with spaces
-      expect(mockBuffer.replaceRangeByOffset).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x16'); // Ctrl+V
+      });
+      await vi.waitFor(() => {
+        // Should insert at cursor position with spaces
+        expect(mockBuffer.replaceRangeByOffset).toHaveBeenCalled();
+      });
 
       // Get the actual call to see what path was used
       const actualCall = vi.mocked(mockBuffer.replaceRangeByOffset).mock
@@ -505,15 +545,16 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x16'); // Ctrl+V
-      await wait();
-
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        'Error handling clipboard image:',
-        expect.any(Error),
-      );
+      await act(async () => {
+        stdin.write('\x16'); // Ctrl+V
+      });
+      await vi.waitFor(() => {
+        expect(consoleErrorSpy).toHaveBeenCalledWith(
+          'Error handling clipboard image:',
+          expect.any(Error),
+        );
+      });
       expect(mockBuffer.setText).not.toHaveBeenCalled();
 
       consoleErrorSpy.mockRestore();
@@ -532,12 +573,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/mem');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\t'); // Press Tab
-    await wait();
-
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\t'); // Press Tab
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0),
+    );
     unmount();
   });
 
@@ -555,12 +597,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/memory ');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\t'); // Press Tab
-    await wait();
-
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(1);
+    await act(async () => {
+      stdin.write('\t'); // Press Tab
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(1),
+    );
     unmount();
   });
 
@@ -579,13 +622,14 @@ describe('InputPrompt', () => {
     props.buffer.setText('/memory');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\t'); // Press Tab
-    await wait();
-
-    // It should NOT become '/show'. It should correctly become '/memory show'.
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\t'); // Press Tab
+    });
+    await vi.waitFor(() =>
+      // It should NOT become '/show'. It should correctly become '/memory show'.
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0),
+    );
     unmount();
   });
 
@@ -600,12 +644,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/chat resume fi-');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\t'); // Press Tab
-    await wait();
-
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\t'); // Press Tab
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0),
+    );
     unmount();
   });
 
@@ -619,13 +664,14 @@ describe('InputPrompt', () => {
     props.buffer.setText('/mem');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
-
-    // The app should autocomplete the text, NOT submit.
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() => {
+      // The app should autocomplete the text, NOT submit.
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    });
 
     expect(props.onSubmit).not.toHaveBeenCalled();
     unmount();
@@ -650,12 +696,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/?');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\t'); // Press Tab for autocomplete
-    await wait();
-
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\t'); // Press Tab for autocomplete
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0),
+    );
     unmount();
   });
 
@@ -663,10 +710,11 @@ describe('InputPrompt', () => {
     props.buffer.setText('   '); // Set buffer to whitespace
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r'); // Press Enter
-    await wait();
+    await act(async () => {
+      stdin.write('\r'); // Press Enter
+    });
+    await vi.waitFor(() => {});
 
     expect(props.onSubmit).not.toHaveBeenCalled();
     unmount();
@@ -681,12 +729,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/clear');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
-
-    expect(props.onSubmit).toHaveBeenCalledWith('/clear');
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() =>
+      expect(props.onSubmit).toHaveBeenCalledWith('/clear'),
+    );
     unmount();
   });
 
@@ -699,12 +748,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('/clear');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
-
-    expect(props.onSubmit).toHaveBeenCalledWith('/clear');
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() =>
+      expect(props.onSubmit).toHaveBeenCalledWith('/clear'),
+    );
     unmount();
   });
 
@@ -718,12 +768,13 @@ describe('InputPrompt', () => {
     props.buffer.setText('@src/components/');
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
-
-    expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0);
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() =>
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalledWith(0),
+    );
     expect(props.onSubmit).not.toHaveBeenCalled();
     unmount();
   });
@@ -735,27 +786,30 @@ describe('InputPrompt', () => {
     mockBuffer.lines = ['first line\\'];
 
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\r');
-    await wait();
+    await act(async () => {
+      stdin.write('\r');
+    });
+    await vi.waitFor(() => {
+      expect(props.buffer.backspace).toHaveBeenCalled();
+      expect(props.buffer.newline).toHaveBeenCalled();
+    });
 
     expect(props.onSubmit).not.toHaveBeenCalled();
-    expect(props.buffer.backspace).toHaveBeenCalled();
-    expect(props.buffer.newline).toHaveBeenCalled();
     unmount();
   });
 
   it('should clear the buffer on Ctrl+C if it has text', async () => {
     props.buffer.setText('some text to clear');
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\x03'); // Ctrl+C character
-    await wait(60);
-
-    expect(props.buffer.setText).toHaveBeenCalledWith('');
-    expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
+    await act(async () => {
+      stdin.write('\x03'); // Ctrl+C character
+    });
+    await vi.waitFor(() => {
+      expect(props.buffer.setText).toHaveBeenCalledWith('');
+      expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
+    });
     expect(props.onSubmit).not.toHaveBeenCalled();
     unmount();
   });
@@ -763,10 +817,11 @@ describe('InputPrompt', () => {
   it('should NOT clear the buffer on Ctrl+C if it is empty', async () => {
     props.buffer.text = '';
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
-    await wait();
 
-    stdin.write('\x03'); // Ctrl+C character
-    await wait(60);
+    await act(async () => {
+      stdin.write('\x03'); // Ctrl+C character
+    });
+    await vi.waitFor(() => {});
 
     expect(props.buffer.setText).not.toHaveBeenCalled();
     unmount();
@@ -866,18 +921,19 @@ describe('InputPrompt', () => {
       });
 
       const { unmount } = renderWithProviders(<InputPrompt {...props} />);
-      await wait();
 
-      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
-        mockBuffer,
-        ['/test/project/src'],
-        path.join('test', 'project', 'src'),
-        mockSlashCommands,
-        mockCommandContext,
-        false,
-        false,
-        expect.any(Object),
-      );
+      await vi.waitFor(() => {
+        expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
+          mockBuffer,
+          ['/test/project/src'],
+          path.join('test', 'project', 'src'),
+          mockSlashCommands,
+          mockCommandContext,
+          false,
+          false,
+          expect.any(Object),
+        );
+      });
 
       unmount();
     });
@@ -889,12 +945,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('i');
-      await wait();
-
-      expect(props.vimHandleInput).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('i');
+      });
+      await vi.waitFor(() => {
+        expect(props.vimHandleInput).toHaveBeenCalled();
+      });
       expect(mockBuffer.handleInput).not.toHaveBeenCalled();
       unmount();
     });
@@ -904,13 +961,14 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('i');
-      await wait();
-
-      expect(props.vimHandleInput).toHaveBeenCalled();
-      expect(mockBuffer.handleInput).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('i');
+      });
+      await vi.waitFor(() => {
+        expect(props.vimHandleInput).toHaveBeenCalled();
+        expect(mockBuffer.handleInput).toHaveBeenCalled();
+      });
       unmount();
     });
 
@@ -920,13 +978,14 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('i');
-      await wait();
-
-      expect(props.vimHandleInput).toHaveBeenCalled();
-      expect(mockBuffer.handleInput).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('i');
+      });
+      await vi.waitFor(() => {
+        expect(props.vimHandleInput).toHaveBeenCalled();
+        expect(mockBuffer.handleInput).toHaveBeenCalled();
+      });
       unmount();
     });
   });
@@ -937,17 +996,18 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x1B[200~pasted text\x1B[201~');
-      await wait(60);
-
-      expect(mockBuffer.handleInput).toHaveBeenCalledWith(
-        expect.objectContaining({
-          paste: true,
-          sequence: 'pasted text',
-        }),
-      );
+      await act(async () => {
+        stdin.write('\x1B[200~pasted text\x1B[201~');
+      });
+      await vi.waitFor(() => {
+        expect(mockBuffer.handleInput).toHaveBeenCalledWith(
+          expect.objectContaining({
+            paste: true,
+            sequence: 'pasted text',
+          }),
+        );
+      });
       unmount();
     });
 
@@ -956,10 +1016,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('a');
-      await wait();
+      await act(async () => {
+        stdin.write('a');
+      });
+      await vi.waitFor(() => {});
 
       expect(mockBuffer.handleInput).not.toHaveBeenCalled();
       unmount();
@@ -1028,10 +1089,11 @@ describe('InputPrompt', () => {
           const { stdout, unmount } = renderWithProviders(
             <InputPrompt {...props} />,
           );
-          await wait();
 
-          const frame = stdout.lastFrame();
-          expect(frame).toContain(expected);
+          await vi.waitFor(() => {
+            const frame = stdout.lastFrame();
+            expect(frame).toContain(expected);
+          });
           unmount();
         },
       );
@@ -1084,10 +1146,11 @@ describe('InputPrompt', () => {
           const { stdout, unmount } = renderWithProviders(
             <InputPrompt {...props} />,
           );
-          await wait();
 
-          const frame = stdout.lastFrame();
-          expect(frame).toContain(expected);
+          await vi.waitFor(() => {
+            const frame = stdout.lastFrame();
+            expect(frame).toContain(expected);
+          });
           unmount();
         },
       );
@@ -1107,14 +1170,15 @@ describe('InputPrompt', () => {
         const { stdout, unmount } = renderWithProviders(
           <InputPrompt {...props} />,
         );
-        await wait();
 
-        const frame = stdout.lastFrame();
-        const lines = frame!.split('\n');
-        // The line with the cursor should just be an inverted space inside the box border
-        expect(
-          lines.find((l) => l.includes(chalk.inverse(' '))),
-        ).not.toBeUndefined();
+        await vi.waitFor(() => {
+          const frame = stdout.lastFrame();
+          const lines = frame!.split('\n');
+          // The line with the cursor should just be an inverted space inside the box border
+          expect(
+            lines.find((l) => l.includes(chalk.inverse(' '))),
+          ).not.toBeUndefined();
+        });
         unmount();
       });
     });
@@ -1138,17 +1202,18 @@ describe('InputPrompt', () => {
       const { stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      const frame = stdout.lastFrame();
-      // Check that all lines, including the empty one, are rendered.
-      // This implicitly tests that the Box wrapper provides height for the empty line.
-      expect(frame).toContain('hello');
-      expect(frame).toContain(`world${chalk.inverse(' ')}`);
+      await vi.waitFor(() => {
+        const frame = stdout.lastFrame();
+        // Check that all lines, including the empty one, are rendered.
+        // This implicitly tests that the Box wrapper provides height for the empty line.
+        expect(frame).toContain('hello');
+        expect(frame).toContain(`world${chalk.inverse(' ')}`);
 
-      const outputLines = frame!.split('\n');
-      // The number of lines should be 2 for the border plus 3 for the content.
-      expect(outputLines.length).toBe(5);
+        const outputLines = frame!.split('\n');
+        // The number of lines should be 2 for the border plus 3 for the content.
+        expect(outputLines.length).toBe(5);
+      });
       unmount();
     });
   });
@@ -1171,20 +1236,21 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
       // Simulate a bracketed paste event from the terminal
-      stdin.write(`\x1b[200~${pastedText}\x1b[201~`);
-      await wait();
-
-      // Verify that the buffer's handleInput was called once with the full text
-      expect(props.buffer.handleInput).toHaveBeenCalledTimes(1);
-      expect(props.buffer.handleInput).toHaveBeenCalledWith(
-        expect.objectContaining({
-          paste: true,
-          sequence: pastedText,
-        }),
-      );
+      await act(async () => {
+        stdin.write(`\x1b[200~${pastedText}\x1b[201~`);
+      });
+      await vi.waitFor(() => {
+        // Verify that the buffer's handleInput was called once with the full text
+        expect(props.buffer.handleInput).toHaveBeenCalledTimes(1);
+        expect(props.buffer.handleInput).toHaveBeenCalledWith(
+          expect.objectContaining({
+            paste: true,
+            sequence: pastedText,
+          }),
+        );
+      });
 
       unmount();
     });
@@ -1214,12 +1280,14 @@ describe('InputPrompt', () => {
       await vi.runAllTimersAsync();
 
       // Simulate a paste operation (this should set the paste protection)
-      act(() => {
+      await act(async () => {
         stdin.write(`\x1b[200~pasted content\x1b[201~`);
       });
 
       // Simulate an Enter key press immediately after paste
-      stdin.write('\r');
+      await act(async () => {
+        stdin.write('\r');
+      });
       await vi.runAllTimersAsync();
 
       // Verify that onSubmit was NOT called due to recent paste protection
@@ -1239,7 +1307,7 @@ describe('InputPrompt', () => {
       await vi.runAllTimersAsync();
 
       // Simulate a paste operation (this sets the protection)
-      act(() => {
+      await act(async () => {
         stdin.write('\x1b[200~pasted text\x1b[201~');
       });
       await vi.runAllTimersAsync();
@@ -1250,7 +1318,9 @@ describe('InputPrompt', () => {
       });
 
       // Now Enter should work normally
-      stdin.write('\r');
+      await act(async () => {
+        stdin.write('\r');
+      });
       await vi.runAllTimersAsync();
 
       expect(props.onSubmit).toHaveBeenCalledWith('pasted text');
@@ -1282,11 +1352,15 @@ describe('InputPrompt', () => {
         await vi.runAllTimersAsync();
 
         // Simulate a paste operation
-        stdin.write('\x1b[200~some pasted stuff\x1b[201~');
+        await act(async () => {
+          stdin.write('\x1b[200~some pasted stuff\x1b[201~');
+        });
         await vi.runAllTimersAsync();
 
         // Simulate an Enter key press immediately after paste
-        stdin.write('\r');
+        await act(async () => {
+          stdin.write('\r');
+        });
         await vi.runAllTimersAsync();
 
         // Verify that onSubmit was called
@@ -1305,7 +1379,9 @@ describe('InputPrompt', () => {
       await vi.runAllTimersAsync();
 
       // Press Enter without any recent paste
-      stdin.write('\r');
+      await act(async () => {
+        stdin.write('\r');
+      });
       await vi.runAllTimersAsync();
 
       // Verify that onSubmit was called normally
@@ -1325,16 +1401,21 @@ describe('InputPrompt', () => {
         <InputPrompt {...props} />,
         { kittyProtocolEnabled: false },
       );
-      await wait();
 
-      stdin.write('\x1B');
-      await wait();
+      await act(async () => {
+        stdin.write('\x1B');
+      });
+      await vi.waitFor(() => {
+        expect(onEscapePromptChange).toHaveBeenCalledWith(true);
+      });
 
-      stdin.write('\x1B');
-      await wait(60);
-
-      expect(props.buffer.setText).toHaveBeenCalledWith('');
-      expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x1B');
+      });
+      await vi.waitFor(() => {
+        expect(props.buffer.setText).toHaveBeenCalledWith('');
+        expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
+      });
       unmount();
     });
 
@@ -1348,15 +1429,19 @@ describe('InputPrompt', () => {
         { kittyProtocolEnabled: false },
       );
 
-      stdin.write('\x1B');
+      await act(async () => {
+        stdin.write('\x1B');
+      });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(onEscapePromptChange).toHaveBeenCalledWith(true);
       });
 
-      stdin.write('a');
+      await act(async () => {
+        stdin.write('a');
+      });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(onEscapePromptChange).toHaveBeenCalledWith(false);
       });
       unmount();
@@ -1369,12 +1454,13 @@ describe('InputPrompt', () => {
         <InputPrompt {...props} />,
         { kittyProtocolEnabled: false },
       );
-      await wait();
 
-      stdin.write('\x1B');
-      await wait(100);
-
-      expect(props.setShellModeActive).toHaveBeenCalledWith(false);
+      await act(async () => {
+        stdin.write('\x1B');
+      });
+      await vi.waitFor(() =>
+        expect(props.setShellModeActive).toHaveBeenCalledWith(false),
+      );
       unmount();
     });
 
@@ -1389,12 +1475,13 @@ describe('InputPrompt', () => {
         <InputPrompt {...props} />,
         { kittyProtocolEnabled: false },
       );
-      await wait();
 
-      stdin.write('\x1B');
-      await wait(60);
-
-      expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\x1B');
+      });
+      await vi.waitFor(() =>
+        expect(mockCommandCompletion.resetCompletionState).toHaveBeenCalled(),
+      );
       unmount();
     });
 
@@ -1409,7 +1496,9 @@ describe('InputPrompt', () => {
       );
       await vi.runAllTimersAsync();
 
-      stdin.write('\x1B');
+      await act(async () => {
+        stdin.write('\x1B');
+      });
       await vi.runAllTimersAsync();
 
       vi.useRealTimers();
@@ -1421,17 +1510,18 @@ describe('InputPrompt', () => {
         <InputPrompt {...props} />,
         { kittyProtocolEnabled: false },
       );
-      await wait();
 
-      stdin.write('\x0C');
-      await wait();
+      await act(async () => {
+        stdin.write('\x0C');
+      });
+      await vi.waitFor(() => expect(props.onClearScreen).toHaveBeenCalled());
 
-      expect(props.onClearScreen).toHaveBeenCalled();
-
-      stdin.write('\x01');
-      await wait();
-
-      expect(props.buffer.move).toHaveBeenCalledWith('home');
+      await act(async () => {
+        stdin.write('\x01');
+      });
+      await vi.waitFor(() =>
+        expect(props.buffer.move).toHaveBeenCalledWith('home'),
+      );
       unmount();
     });
   });
@@ -1465,14 +1555,13 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
       // Trigger reverse search with Ctrl+R
-      act(() => {
+      await act(async () => {
         stdin.write('\x12');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const frame = stdout.lastFrame();
         expect(frame).toContain('(r:)');
         expect(frame).toContain('echo hello');
@@ -1487,14 +1576,19 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x12');
-      await wait();
-      stdin.write('\x1B');
-      stdin.write('\u001b[27u'); // Press kitty escape key
+      await act(async () => {
+        stdin.write('\x12');
+      });
+      await vi.waitFor(() => {});
+      await act(async () => {
+        stdin.write('\x1B');
+      });
+      await act(async () => {
+        stdin.write('\u001b[27u'); // Press kitty escape key
+      });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).not.toContain('(r:)');
       });
 
@@ -1530,23 +1624,23 @@ describe('InputPrompt', () => {
       );
 
       // Enter reverse search mode with Ctrl+R
-      act(() => {
+      await act(async () => {
         stdin.write('\x12');
       });
 
       // Verify reverse search is active
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).toContain('(r:)');
       });
 
       // Press Tab to complete the highlighted entry
-      act(() => {
+      await act(async () => {
         stdin.write('\t');
       });
-      await wait();
-
-      expect(mockHandleAutocomplete).toHaveBeenCalledWith(0);
-      expect(props.buffer.setText).toHaveBeenCalledWith('echo hello');
+      await vi.waitFor(() => {
+        expect(mockHandleAutocomplete).toHaveBeenCalledWith(0);
+        expect(props.buffer.setText).toHaveBeenCalledWith('echo hello');
+      });
       unmount();
     }, 15000);
 
@@ -1567,19 +1661,19 @@ describe('InputPrompt', () => {
         <InputPrompt {...props} />,
       );
 
-      act(() => {
+      await act(async () => {
         stdin.write('\x12');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).toContain('(r:)');
       });
 
-      act(() => {
+      await act(async () => {
         stdin.write('\r');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).not.toContain('(r:)');
       });
 
@@ -1608,23 +1702,22 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
       // reverse search with Ctrl+R
-      act(() => {
+      await act(async () => {
         stdin.write('\x12');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).toContain('(r:)');
       });
 
       // Press kitty escape key
-      act(() => {
+      await act(async () => {
         stdin.write('\u001b[27u');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(stdout.lastFrame()).not.toContain('(r:)');
         expect(props.buffer.text).toBe(initialText);
         expect(props.buffer.cursor).toEqual(initialCursor);
@@ -1643,12 +1736,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x05'); // Ctrl+E
-      await wait();
-
-      expect(props.buffer.move).toHaveBeenCalledWith('end');
+      await act(async () => {
+        stdin.write('\x05'); // Ctrl+E
+      });
+      await vi.waitFor(() => {
+        expect(props.buffer.move).toHaveBeenCalledWith('end');
+      });
       expect(props.buffer.moveToOffset).not.toHaveBeenCalled();
       unmount();
     });
@@ -1661,12 +1755,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x05'); // Ctrl+E
-      await wait();
-
-      expect(props.buffer.move).toHaveBeenCalledWith('end');
+      await act(async () => {
+        stdin.write('\x05'); // Ctrl+E
+      });
+      await vi.waitFor(() => {
+        expect(props.buffer.move).toHaveBeenCalledWith('end');
+      });
       expect(props.buffer.moveToOffset).not.toHaveBeenCalled();
       unmount();
     });
@@ -1693,17 +1788,17 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      act(() => {
+      await act(async () => {
         stdin.write('\x12'); // Ctrl+R
       });
-      await wait();
 
-      const frame = stdout.lastFrame() ?? '';
-      expect(frame).toContain('(r:)');
-      expect(frame).toContain('git commit');
-      expect(frame).toContain('git push');
+      await vi.waitFor(() => {
+        const frame = stdout.lastFrame() ?? '';
+        expect(frame).toContain('(r:)');
+        expect(frame).toContain('git commit');
+        expect(frame).toContain('git push');
+      });
       unmount();
     });
 
@@ -1723,25 +1818,32 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x12');
-      await wait();
+      await act(async () => {
+        stdin.write('\x12');
+      });
+      await vi.waitFor(() => {
+        expect(clean(stdout.lastFrame())).toContain('→');
+      });
 
-      expect(clean(stdout.lastFrame())).toContain('→');
-
-      stdin.write('\u001B[C');
-      await wait(200);
-      expect(clean(stdout.lastFrame())).toContain('←');
+      await act(async () => {
+        stdin.write('\u001B[C');
+      });
+      await vi.waitFor(() => {
+        expect(clean(stdout.lastFrame())).toContain('←');
+      });
       expect(stdout.lastFrame()).toMatchSnapshot(
-        'command-search-expanded-match',
+        'command-search-render-expanded-match',
       );
 
-      stdin.write('\u001B[D');
-      await wait();
-      expect(clean(stdout.lastFrame())).toContain('→');
+      await act(async () => {
+        stdin.write('\u001B[D');
+      });
+      await vi.waitFor(() => {
+        expect(clean(stdout.lastFrame())).toContain('→');
+      });
       expect(stdout.lastFrame()).toMatchSnapshot(
-        'command-search-collapsed-match',
+        'command-search-render-collapsed-match',
       );
       unmount();
     });
@@ -1765,19 +1867,24 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x12');
-      await wait();
-      expect(stdout.lastFrame()).toMatchSnapshot(
-        'command-search-collapsed-match',
-      );
+      await act(async () => {
+        stdin.write('\x12');
+      });
+      await vi.waitFor(() => {
+        expect(stdout.lastFrame()).toMatchSnapshot(
+          'command-search-render-collapsed-match',
+        );
+      });
 
-      stdin.write('\u001B[C');
-      await wait();
-      expect(stdout.lastFrame()).toMatchSnapshot(
-        'command-search-expanded-match',
-      );
+      await act(async () => {
+        stdin.write('\u001B[C');
+      });
+      await vi.waitFor(() => {
+        expect(stdout.lastFrame()).toMatchSnapshot(
+          'command-search-render-expanded-match',
+        );
+      });
 
       unmount();
     });
@@ -1798,14 +1905,17 @@ describe('InputPrompt', () => {
       const { stdin, stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\x12');
-      await wait();
-
-      const frame = clean(stdout.lastFrame());
-      expect(frame).not.toContain('→');
-      expect(frame).not.toContain('←');
+      await act(async () => {
+        stdin.write('\x12');
+      });
+      await vi.waitFor(() => {
+        const frame = clean(stdout.lastFrame());
+        // Ensure it rendered the search mode
+        expect(frame).toContain('(r:)');
+        expect(frame).not.toContain('→');
+        expect(frame).not.toContain('←');
+      });
       unmount();
     });
   });
@@ -1819,12 +1929,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-
-      expect(mockPopAllMessages).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       const callback = mockPopAllMessages.mock.calls[0][0];
 
       act(() => {
@@ -1844,12 +1953,14 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() =>
+        expect(mockInputHistory.navigateUp).toHaveBeenCalled(),
+      );
       expect(mockPopAllMessages).not.toHaveBeenCalled();
-      expect(mockInputHistory.navigateUp).toHaveBeenCalled();
       unmount();
     });
 
@@ -1861,12 +1972,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-
-      expect(mockPopAllMessages).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       const callback = mockPopAllMessages.mock.calls[0][0];
       act(() => {
         callback(undefined);
@@ -1888,11 +1998,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-      expect(mockPopAllMessages).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       unmount();
     });
 
@@ -1904,10 +2014,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
 
       const callback = mockPopAllMessages.mock.calls[0][0];
       act(() => {
@@ -1926,12 +2037,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-
-      expect(mockPopAllMessages).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       unmount();
     });
 
@@ -1942,12 +2052,13 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-
-      expect(mockInputHistory.navigateUp).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() =>
+        expect(mockInputHistory.navigateUp).toHaveBeenCalled(),
+      );
       unmount();
     });
 
@@ -1959,12 +2070,11 @@ describe('InputPrompt', () => {
       const { stdin, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
 
-      stdin.write('\u001B[A');
-      await wait();
-
-      expect(mockPopAllMessages).toHaveBeenCalled();
+      await act(async () => {
+        stdin.write('\u001B[A');
+      });
+      await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
 
       const callback = mockPopAllMessages.mock.calls[0][0];
       act(() => {
@@ -1984,8 +2094,7 @@ describe('InputPrompt', () => {
       const { stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
-      expect(stdout.lastFrame()).toMatchSnapshot();
+      await vi.waitFor(() => expect(stdout.lastFrame()).toMatchSnapshot());
       unmount();
     });
 
@@ -1994,8 +2103,7 @@ describe('InputPrompt', () => {
       const { stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
-      expect(stdout.lastFrame()).toMatchSnapshot();
+      await vi.waitFor(() => expect(stdout.lastFrame()).toMatchSnapshot());
       unmount();
     });
 
@@ -2004,8 +2112,7 @@ describe('InputPrompt', () => {
       const { stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
-      expect(stdout.lastFrame()).toMatchSnapshot();
+      await vi.waitFor(() => expect(stdout.lastFrame()).toMatchSnapshot());
       unmount();
     });
 
@@ -2015,11 +2122,12 @@ describe('InputPrompt', () => {
       const { stdout, unmount } = renderWithProviders(
         <InputPrompt {...props} />,
       );
-      await wait();
-      expect(stdout.lastFrame()).not.toContain(`{chalk.inverse(' ')}`);
-      // This snapshot is good to make sure there was an input prompt but does
-      // not show the inverted cursor because snapshots do not show colors.
-      expect(stdout.lastFrame()).toMatchSnapshot();
+      await vi.waitFor(() => {
+        expect(stdout.lastFrame()).not.toContain(`{chalk.inverse(' ')}`);
+        // This snapshot is good to make sure there was an input prompt but does
+        // not show the inverted cursor because snapshots do not show colors.
+        expect(stdout.lastFrame()).toMatchSnapshot();
+      });
       unmount();
     });
   });
@@ -2028,12 +2136,11 @@ describe('InputPrompt', () => {
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />, {
       shellFocus: false,
     });
-    await wait();
 
-    stdin.write('a');
-    await wait();
-
-    expect(mockBuffer.handleInput).toHaveBeenCalled();
+    await act(async () => {
+      stdin.write('a');
+    });
+    await vi.waitFor(() => expect(mockBuffer.handleInput).toHaveBeenCalled());
     unmount();
   });
   describe('command queuing while streaming', () => {
@@ -2074,17 +2181,20 @@ describe('InputPrompt', () => {
         const { stdin, unmount } = renderWithProviders(
           <InputPrompt {...props} />,
         );
-        await wait();
-        stdin.write('\r');
-        await wait();
-
-        if (shouldSubmit) {
-          expect(props.onSubmit).toHaveBeenCalledWith(bufferText);
-          expect(props.setQueueErrorMessage).not.toHaveBeenCalled();
-        } else {
-          expect(props.onSubmit).not.toHaveBeenCalled();
-          expect(props.setQueueErrorMessage).toHaveBeenCalledWith(errorMessage);
-        }
+        await act(async () => {
+          stdin.write('\r');
+        });
+        await vi.waitFor(() => {
+          if (shouldSubmit) {
+            expect(props.onSubmit).toHaveBeenCalledWith(bufferText);
+            expect(props.setQueueErrorMessage).not.toHaveBeenCalled();
+          } else {
+            expect(props.onSubmit).not.toHaveBeenCalled();
+            expect(props.setQueueErrorMessage).toHaveBeenCalledWith(
+              errorMessage,
+            );
+          }
+        });
         unmount();
       },
     );
diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
index 5ce22ac941..4991f1ac4f 100644
--- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
+++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
@@ -18,14 +18,44 @@ exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and c
  llllllllllllllllllllllllllllllllllllllllllllllllll"
 `;
 
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-collapsed-match 1`] = `
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and collapses long suggestion via Right/Left arrows > command-search-render-collapsed-match 1`] = `
+"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ (r:)    Type your message or @path/to/file                                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+ lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll →
+ lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll
+ ..."
+`;
+
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and collapses long suggestion via Right/Left arrows > command-search-render-expanded-match 1`] = `
+"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ (r:)    Type your message or @path/to/file                                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+ lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll ←
+ lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll
+ llllllllllllllllllllllllllllllllllllllllllllllllll"
+`;
+
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-collapsed-match 1`] = `
+"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ > commit                                                                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯"
+`;
+
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-collapsed-match 2`] = `
 "╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
 │ (r:)  commit                                                                                      │
 ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
  git commit -m "feat: add search" in src/app"
 `;
 
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-expanded-match 1`] = `
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-expanded-match 1`] = `
+"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ > commit                                                                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯"
+`;
+
+exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-expanded-match 2`] = `
 "╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
 │ (r:)  commit                                                                                      │
 ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯

From 73570f1c86e7f5e4b027a5879fa2a705be4be6a3 Mon Sep 17 00:00:00 2001
From: ph-sp <239453914+ph-sp@users.noreply.github.com>
Date: Fri, 24 Oct 2025 21:10:00 -0700
Subject: [PATCH 25/73] Fix the shortenPath function to correctly insert
 ellipsis. (#12004)

Co-authored-by: Greg Shikhman <shikhman@google.com>
---
 packages/core/src/utils/paths.test.ts | 217 +++++++++++++++++++++++++-
 packages/core/src/utils/paths.ts      | 182 ++++++++++++++++++---
 2 files changed, 372 insertions(+), 27 deletions(-)

diff --git a/packages/core/src/utils/paths.test.ts b/packages/core/src/utils/paths.test.ts
index 602f977a0c..210dc8b448 100644
--- a/packages/core/src/utils/paths.test.ts
+++ b/packages/core/src/utils/paths.test.ts
@@ -5,7 +5,7 @@
  */
 
 import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import { escapePath, unescapePath, isSubpath } from './paths.js';
+import { escapePath, unescapePath, isSubpath, shortenPath } from './paths.js';
 
 describe('escapePath', () => {
   it.each([
@@ -257,3 +257,218 @@ describe('isSubpath on Windows', () => {
     expect(isSubpath('Users\\Test\\file.txt', 'Users\\Test')).toBe(false);
   });
 });
+
+describe('shortenPath', () => {
+  describe.skipIf(process.platform === 'win32')('on POSIX', () => {
+    it('should not shorten a path that is shorter than maxLen', () => {
+      const p = '/path/to/file.txt';
+      expect(shortenPath(p, 40)).toBe(p);
+    });
+
+    it('should not shorten a path that is equal to maxLen', () => {
+      const p = '/path/to/file.txt';
+      expect(shortenPath(p, p.length)).toBe(p);
+    });
+
+    it('should shorten a long path, keeping start and end from a short limit', () => {
+      const p = '/path/to/a/very/long/directory/name/file.txt';
+      expect(shortenPath(p, 25)).toBe('/path/.../name/file.txt');
+    });
+
+    it('should shorten a long path, keeping more from the end from a longer limit', () => {
+      const p = '/path/to/a/very/long/directory/name/file.txt';
+      expect(shortenPath(p, 35)).toBe('/path/.../directory/name/file.txt');
+    });
+
+    it('should handle deep paths where few segments from the end fit', () => {
+      const p = '/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/file.txt';
+      expect(shortenPath(p, 20)).toBe('/a/.../y/z/file.txt');
+    });
+
+    it('should handle deep paths where many segments from the end fit', () => {
+      const p = '/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/file.txt';
+      expect(shortenPath(p, 45)).toBe(
+        '/a/.../l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/file.txt',
+      );
+    });
+
+    it('should handle a long filename in the root when it needs shortening', () => {
+      const p = '/a-very-long-filename-that-needs-to-be-shortened.txt';
+      expect(shortenPath(p, 40)).toBe(
+        '/a-very-long-filen...o-be-shortened.txt',
+      );
+    });
+
+    it('should handle root path', () => {
+      const p = '/';
+      expect(shortenPath(p, 10)).toBe('/');
+    });
+
+    it('should handle a path with one long segment after root', () => {
+      const p = '/a-very-long-directory-name';
+      expect(shortenPath(p, 20)).toBe('/a-very-...ory-name');
+    });
+
+    it('should handle a path with just a long filename (no root)', () => {
+      const p = 'a-very-long-filename-that-needs-to-be-shortened.txt';
+      expect(shortenPath(p, 40)).toBe(
+        'a-very-long-filena...o-be-shortened.txt',
+      );
+    });
+
+    it('should fallback to truncating earlier segments while keeping the last intact', () => {
+      const p = '/abcdef/fghij.txt';
+      const result = shortenPath(p, 10);
+      expect(result).toBe('/fghij.txt');
+      expect(result.length).toBeLessThanOrEqual(10);
+    });
+
+    it('should fallback by truncating start and middle segments when needed', () => {
+      const p = '/averylongcomponentname/another/short.txt';
+      const result = shortenPath(p, 25);
+      expect(result).toBe('/averylo.../.../short.txt');
+      expect(result.length).toBeLessThanOrEqual(25);
+    });
+
+    it('should show only the last segment when maxLen is tiny', () => {
+      const p = '/foo/bar/baz.txt';
+      const result = shortenPath(p, 8);
+      expect(result).toBe('/baz.txt');
+      expect(result.length).toBeLessThanOrEqual(8);
+    });
+
+    it('should fall back to simple truncation when the last segment exceeds maxLen', () => {
+      const longFile = 'x'.repeat(60) + '.txt';
+      const p = `/really/long/${longFile}`;
+      const result = shortenPath(p, 50);
+      expect(result).toBe('/really/long/xxxxxxxxxx...xxxxxxxxxxxxxxxxxxx.txt');
+      expect(result.length).toBeLessThanOrEqual(50);
+    });
+
+    it('should handle relative paths without a root', () => {
+      const p = 'foo/bar/baz/qux.txt';
+      const result = shortenPath(p, 18);
+      expect(result).toBe('foo/.../qux.txt');
+      expect(result.length).toBeLessThanOrEqual(18);
+    });
+
+    it('should ignore empty segments created by repeated separators', () => {
+      const p = '/foo//bar///baz/verylongname.txt';
+      const result = shortenPath(p, 20);
+      expect(result).toBe('.../verylongname.txt');
+      expect(result.length).toBeLessThanOrEqual(20);
+    });
+  });
+
+  describe.skipIf(process.platform !== 'win32')('on Windows', () => {
+    it('should not shorten a path that is shorter than maxLen', () => {
+      const p = 'C\\Users\\Test\\file.txt';
+      expect(shortenPath(p, 40)).toBe(p);
+    });
+
+    it('should not shorten a path that is equal to maxLen', () => {
+      const p = 'C\\path\\to\\file.txt';
+      expect(shortenPath(p, p.length)).toBe(p);
+    });
+
+    it('should shorten a long path, keeping start and end from a short limit', () => {
+      const p = 'C\\path\\to\\a\\very\\long\\directory\\name\\file.txt';
+      expect(shortenPath(p, 30)).toBe('C\\...\\directory\\name\\file.txt');
+    });
+
+    it('should shorten a long path, keeping more from the end from a longer limit', () => {
+      const p = 'C\\path\\to\\a\\very\\long\\directory\\name\\file.txt';
+      expect(shortenPath(p, 42)).toBe(
+        'C\\...\\a\\very\\long\\directory\\name\\file.txt',
+      );
+    });
+
+    it('should handle deep paths where few segments from the end fit', () => {
+      const p =
+        'C\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q\\r\\s\\t\\u\\v\\w\\x\\y\\z\\file.txt';
+      expect(shortenPath(p, 22)).toBe('C\\...\\w\\x\\y\\z\\file.txt');
+    });
+
+    it('should handle deep paths where many segments from the end fit', () => {
+      const p =
+        'C\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q\\r\\s\\t\\u\\v\\w\\x\\y\\z\\file.txt';
+      expect(shortenPath(p, 47)).toBe(
+        'C\\...\\k\\l\\m\\n\\o\\p\\q\\r\\s\\t\\u\\v\\w\\x\\y\\z\\file.txt',
+      );
+    });
+
+    it('should handle a long filename in the root when it needs shortening', () => {
+      const p = 'C\\a-very-long-filename-that-needs-to-be-shortened.txt';
+      expect(shortenPath(p, 40)).toBe(
+        'C\\a-very-long-file...o-be-shortened.txt',
+      );
+    });
+
+    it('should handle root path', () => {
+      const p = 'C\\';
+      expect(shortenPath(p, 10)).toBe('C\\');
+    });
+
+    it('should handle a path with one long segment after root', () => {
+      const p = 'C\\a-very-long-directory-name';
+      expect(shortenPath(p, 22)).toBe('C\\a-very-...tory-name');
+    });
+
+    it('should handle a path with just a long filename (no root)', () => {
+      const p = 'a-very-long-filename-that-needs-to-be-shortened.txt';
+      expect(shortenPath(p, 40)).toBe(
+        'a-very-long-filena...o-be-shortened.txt',
+      );
+    });
+
+    it('should fallback to truncating earlier segments while keeping the last intact', () => {
+      const p = 'C\\abcdef\\fghij.txt';
+      const result = shortenPath(p, 15);
+      expect(result).toBe('C\\...\\fghij.txt');
+      expect(result.length).toBeLessThanOrEqual(15);
+    });
+
+    it('should fallback by truncating start and middle segments when needed', () => {
+      const p = 'C\\averylongcomponentname\\another\\short.txt';
+      const result = shortenPath(p, 30);
+      expect(result).toBe('C\\...\\another\\short.txt');
+      expect(result.length).toBeLessThanOrEqual(30);
+    });
+
+    it('should show only the last segment for tiny maxLen values', () => {
+      const p = 'C\\foo\\bar\\baz.txt';
+      const result = shortenPath(p, 12);
+      expect(result).toBe('...\\baz.txt');
+      expect(result.length).toBeLessThanOrEqual(12);
+    });
+
+    it('should keep the drive prefix when space allows', () => {
+      const p = 'C\\foo\\bar\\baz.txt';
+      const result = shortenPath(p, 14);
+      expect(result).toBe('C\\...\\baz.txt');
+      expect(result.length).toBeLessThanOrEqual(14);
+    });
+
+    it('should fall back when the last segment exceeds maxLen on Windows', () => {
+      const longFile = 'x'.repeat(60) + '.txt';
+      const p = `C\\really\\long\\${longFile}`;
+      const result = shortenPath(p, 40);
+      expect(result).toBe('C\\really\\long\\xxxx...xxxxxxxxxxxxxx.txt');
+      expect(result.length).toBeLessThanOrEqual(40);
+    });
+
+    it('should handle UNC paths with limited space', () => {
+      const p = '\\server\\share\\deep\\path\\file.txt';
+      const result = shortenPath(p, 25);
+      expect(result).toBe('\\server\\...\\path\\file.txt');
+      expect(result.length).toBeLessThanOrEqual(25);
+    });
+
+    it('should collapse UNC paths further when maxLen shrinks', () => {
+      const p = '\\server\\share\\deep\\path\\file.txt';
+      const result = shortenPath(p, 18);
+      expect(result).toBe('\\s...\\...\\file.txt');
+      expect(result.length).toBeLessThanOrEqual(18);
+    });
+  });
+});
diff --git a/packages/core/src/utils/paths.ts b/packages/core/src/utils/paths.ts
index 5723527996..0546e11ffe 100644
--- a/packages/core/src/utils/paths.ts
+++ b/packages/core/src/utils/paths.ts
@@ -40,6 +40,53 @@ export function shortenPath(filePath: string, maxLen: number = 35): string {
     return filePath;
   }
 
+  const simpleTruncate = () => {
+    const keepLen = Math.floor((maxLen - 3) / 2);
+    if (keepLen <= 0) {
+      return filePath.substring(0, maxLen - 3) + '...';
+    }
+    const start = filePath.substring(0, keepLen);
+    const end = filePath.substring(filePath.length - keepLen);
+    return `${start}...${end}`;
+  };
+
+  type TruncateMode = 'start' | 'end' | 'center';
+
+  const truncateComponent = (
+    component: string,
+    targetLength: number,
+    mode: TruncateMode,
+  ): string => {
+    if (component.length <= targetLength) {
+      return component;
+    }
+
+    if (targetLength <= 0) {
+      return '';
+    }
+
+    if (targetLength <= 3) {
+      if (mode === 'end') {
+        return component.slice(-targetLength);
+      }
+      return component.slice(0, targetLength);
+    }
+
+    if (mode === 'start') {
+      return `${component.slice(0, targetLength - 3)}...`;
+    }
+
+    if (mode === 'end') {
+      return `...${component.slice(component.length - (targetLength - 3))}`;
+    }
+
+    const front = Math.ceil((targetLength - 3) / 2);
+    const back = targetLength - 3 - front;
+    return `${component.slice(0, front)}...${component.slice(
+      component.length - back,
+    )}`;
+  };
+
   const parsedPath = path.parse(filePath);
   const root = parsedPath.root;
   const separator = path.sep;
@@ -51,51 +98,134 @@ export function shortenPath(filePath: string, maxLen: number = 35): string {
   // Handle cases with no segments after root (e.g., "/", "C:\") or only one segment
   if (segments.length <= 1) {
     // Fall back to simple start/end truncation for very short paths or single segments
-    const keepLen = Math.floor((maxLen - 3) / 2);
-    // Ensure keepLen is not negative if maxLen is very small
-    if (keepLen <= 0) {
-      return filePath.substring(0, maxLen - 3) + '...';
-    }
-    const start = filePath.substring(0, keepLen);
-    const end = filePath.substring(filePath.length - keepLen);
-    return `${start}...${end}`;
+    return simpleTruncate();
   }
 
   const firstDir = segments[0];
   const lastSegment = segments[segments.length - 1];
   const startComponent = root + firstDir;
 
-  const endPartSegments: string[] = [];
-  // Base length: separator + "..." + lastDir
-  let currentLength = separator.length + lastSegment.length;
+  const endPartSegments = [lastSegment];
+  let endPartLength = lastSegment.length;
 
-  // Iterate backwards through segments (excluding the first one)
-  for (let i = segments.length - 2; i >= 0; i--) {
+  // Iterate backwards through the middle segments
+  for (let i = segments.length - 2; i > 0; i--) {
     const segment = segments[i];
-    // Length needed if we add this segment: current + separator + segment
-    const lengthWithSegment = currentLength + separator.length + segment.length;
+    const newLength =
+      startComponent.length +
+      separator.length +
+      3 + // for "..."
+      separator.length +
+      endPartLength +
+      separator.length +
+      segment.length;
 
-    if (lengthWithSegment <= maxLen) {
-      endPartSegments.unshift(segment); // Add to the beginning of the end part
-      currentLength = lengthWithSegment;
+    if (newLength <= maxLen) {
+      endPartSegments.unshift(segment);
+      endPartLength += separator.length + segment.length;
     } else {
       break;
     }
   }
 
-  let result = endPartSegments.join(separator) + separator + lastSegment;
+  const components = [firstDir, ...endPartSegments];
+  const componentModes: TruncateMode[] = components.map((_, index) => {
+    if (index === 0) {
+      return 'start';
+    }
+    if (index === components.length - 1) {
+      return 'end';
+    }
+    return 'center';
+  });
 
-  if (currentLength > maxLen) {
-    return result;
+  const separatorsCount = endPartSegments.length + 1;
+  const fixedLen = root.length + separatorsCount * separator.length + 3; // ellipsis length
+  const availableForComponents = maxLen - fixedLen;
+
+  const trailingFallback = () => {
+    const ellipsisTail = `...${separator}${lastSegment}`;
+    if (ellipsisTail.length <= maxLen) {
+      return ellipsisTail;
+    }
+
+    if (root) {
+      const rootEllipsisTail = `${root}...${separator}${lastSegment}`;
+      if (rootEllipsisTail.length <= maxLen) {
+        return rootEllipsisTail;
+      }
+    }
+
+    if (root && `${root}${lastSegment}`.length <= maxLen) {
+      return `${root}${lastSegment}`;
+    }
+
+    if (lastSegment.length <= maxLen) {
+      return lastSegment;
+    }
+
+    // As a final resort (e.g., last segment itself exceeds maxLen), fall back to simple truncation.
+    return simpleTruncate();
+  };
+
+  if (availableForComponents <= 0) {
+    return trailingFallback();
   }
 
-  // Construct the final path
-  result = startComponent + separator + result;
+  const minLengths = components.map((component, index) => {
+    if (index === 0) {
+      return Math.min(component.length, 1);
+    }
+    if (index === components.length - 1) {
+      return component.length; // Never truncate the last segment when possible.
+    }
+    return Math.min(component.length, 1);
+  });
+
+  const minTotal = minLengths.reduce((sum, len) => sum + len, 0);
+  if (availableForComponents < minTotal) {
+    return trailingFallback();
+  }
+
+  const budgets = components.map((component) => component.length);
+  let currentTotal = budgets.reduce((sum, len) => sum + len, 0);
+
+  const pickIndexToReduce = () => {
+    let bestIndex = -1;
+    let bestScore = -Infinity;
+    for (let i = 0; i < budgets.length; i++) {
+      if (budgets[i] <= minLengths[i]) {
+        continue;
+      }
+      const isLast = i === budgets.length - 1;
+      const score = (isLast ? 0 : 1_000_000) + budgets[i];
+      if (score > bestScore) {
+        bestScore = score;
+        bestIndex = i;
+      }
+    }
+    return bestIndex;
+  };
+
+  while (currentTotal > availableForComponents) {
+    const index = pickIndexToReduce();
+    if (index === -1) {
+      return trailingFallback();
+    }
+    budgets[index]--;
+    currentTotal--;
+  }
+
+  const truncatedComponents = components.map((component, index) =>
+    truncateComponent(component, budgets[index], componentModes[index]),
+  );
+
+  const truncatedFirst = truncatedComponents[0];
+  const truncatedEnd = truncatedComponents.slice(1).join(separator);
+  const result = `${root}${truncatedFirst}${separator}...${separator}${truncatedEnd}`;
 
-  // As a final check, if the result is somehow still too long
-  // truncate the result string from the beginning, prefixing with "...".
   if (result.length > maxLen) {
-    return '...' + result.substring(result.length - maxLen - 3);
+    return trailingFallback();
   }
 
   return result;

From a2d7f82b499f8d9ed44b732056267ec8e181ebeb Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Fri, 24 Oct 2025 21:22:26 -0700
Subject: [PATCH 26/73] fix(core): Prepend user message to loop detection
 history if it starts with a function call (#11860)

---
 .../src/services/loopDetectionService.test.ts | 31 +++++++++++++++++++
 .../core/src/services/loopDetectionService.ts |  6 ++++
 2 files changed, 37 insertions(+)

diff --git a/packages/core/src/services/loopDetectionService.test.ts b/packages/core/src/services/loopDetectionService.test.ts
index aaf7f90829..e464bfb6c9 100644
--- a/packages/core/src/services/loopDetectionService.test.ts
+++ b/packages/core/src/services/loopDetectionService.test.ts
@@ -5,6 +5,7 @@
  */
 
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import type { Content } from '@google/genai';
 import type { Config } from '../config/config.js';
 import type { GeminiClient } from '../core/client.js';
 import type { BaseLlmClient } from '../core/baseLlmClient.js';
@@ -754,4 +755,34 @@ describe('LoopDetectionService LLM Checks', () => {
     expect(result).toBe(false);
     expect(mockBaseLlmClient.generateJson).not.toHaveBeenCalled();
   });
+
+  it('should prepend user message if history starts with a function call', async () => {
+    const functionCallHistory: Content[] = [
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'someTool', args: {} } }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Some follow up text' }],
+      },
+    ];
+    vi.mocked(mockGeminiClient.getHistory).mockReturnValue(functionCallHistory);
+
+    mockBaseLlmClient.generateJson = vi
+      .fn()
+      .mockResolvedValue({ confidence: 0.1 });
+
+    await advanceTurns(30);
+
+    expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
+    const calledArg = vi.mocked(mockBaseLlmClient.generateJson).mock
+      .calls[0][0];
+    expect(calledArg.contents[0]).toEqual({
+      role: 'user',
+      parts: [{ text: 'Recent conversation history:' }],
+    });
+    // Verify the original history follows
+    expect(calledArg.contents[1]).toEqual(functionCallHistory[0]);
+  });
 });
diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts
index d2fbb3746d..ac291b679d 100644
--- a/packages/core/src/services/loopDetectionService.ts
+++ b/packages/core/src/services/loopDetectionService.ts
@@ -404,6 +404,12 @@ export class LoopDetectionService {
       ...trimmedHistory,
       { role: 'user', parts: [{ text: taskPrompt }] },
     ];
+    if (contents.length > 0 && isFunctionCall(contents[0])) {
+      contents.unshift({
+        role: 'user',
+        parts: [{ text: 'Recent conversation history:' }],
+      });
+    }
     const schema: Record<string, unknown> = {
       type: 'object',
       properties: {

From 8352980f014743625f5058cd73d5c3abdd69a518 Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Sat, 25 Oct 2025 09:07:35 -0700
Subject: [PATCH 27/73] Remove non-existent parallel flag. (#12018)

---
 package.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index ae3bdfa852..b283480a3f 100644
--- a/package.json
+++ b/package.json
@@ -34,8 +34,8 @@
     "build:packages": "npm run build --workspaces",
     "build:sandbox": "node scripts/build_sandbox.js",
     "bundle": "npm run generate && node esbuild.config.js && node scripts/copy_bundle_assets.js",
-    "test": "npm run test --workspaces --if-present --parallel",
-    "test:ci": "npm run test:ci --workspaces --if-present --parallel && npm run test:scripts",
+    "test": "npm run test --workspaces --if-present",
+    "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts",
     "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts",
     "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none",
     "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",

From ee66732ad258f097455ca0664b7084a88a4586d1 Mon Sep 17 00:00:00 2001
From: Jacob Richman <jacob314@gmail.com>
Date: Sat, 25 Oct 2025 14:41:53 -0700
Subject: [PATCH 28/73] First batch of fixing tests to use best practices.
 (#11964)

---
 packages/cli/src/config/extension.test.ts     |   8 +-
 .../cli/src/config/extensions/update.test.ts  |   2 +
 packages/cli/src/gemini.test.tsx              |   3 +-
 .../ui/components/FolderTrustDialog.test.tsx  |   2 +
 .../src/ui/components/InputPrompt.test.tsx    |  10 +-
 .../src/ui/components/ModelDialog.test.tsx    |   2 +
 .../PermissionsModifyTrustDialog.test.tsx     |   2 +
 .../src/ui/components/SettingsDialog.test.tsx |   2 +
 .../src/ui/components/ThemeDialog.test.tsx    |   3 +-
 .../__snapshots__/InputPrompt.test.tsx.snap   |  30 -
 .../shared/BaseSelectionList.test.tsx         |   2 +
 .../ui/components/shared/text-buffer.test.ts  |   2 +
 .../src/ui/contexts/KeypressContext.test.tsx  |   2 +
 .../src/ui/contexts/SessionContext.test.tsx   |   2 +
 ...test.ts => shellCommandProcessor.test.tsx} |  24 +-
 ...test.ts => slashCommandProcessor.test.tsx} | 175 +++--
 .../ui/hooks/useAutoAcceptIndicator.test.ts   |   2 +
 ....test.ts => useCommandCompletion.test.tsx} | 369 +++-------
 ...es.test.ts => useConsoleMessages.test.tsx} |  35 +-
 ...ngs.test.ts => useEditorSettings.test.tsx} |  98 ++-
 ...s.test.ts => useExtensionUpdates.test.tsx} |  48 +-
 .../src/ui/hooks/useFlickerDetector.test.ts   |   2 +
 .../{useFocus.test.ts => useFocus.test.tsx}   |  40 +-
 .../cli/src/ui/hooks/useFolderTrust.test.ts   |   2 +
 .../cli/src/ui/hooks/useGeminiStream.test.tsx |   2 +
 ...Name.test.ts => useGitBranchName.test.tsx} |  40 +-
 .../src/ui/hooks/useHistoryManager.test.ts    |   2 +
 ...r.test.ts => useIdeTrustListener.test.tsx} |  32 +-
 .../cli/src/ui/hooks/useInputHistory.test.ts  |   2 +
 .../src/ui/hooks/useInputHistoryStore.test.ts |   2 +
 ...eKeypress.test.ts => useKeypress.test.tsx} |  64 +-
 ...r.test.ts => useLoadingIndicator.test.tsx} |  52 +-
 ...itor.test.ts => useMemoryMonitor.test.tsx} |  15 +-
 ...Queue.test.ts => useMessageQueue.test.tsx} | 231 +++----
 .../cli/src/ui/hooks/useModelCommand.test.ts  |  42 --
 .../cli/src/ui/hooks/useModelCommand.test.tsx |  50 ++
 .../hooks/usePermissionsModifyTrust.test.ts   |   2 +
 .../cli/src/ui/hooks/usePhraseCycler.test.ts  |   2 +
 ...gs.test.ts => usePrivacySettings.test.tsx} |  36 +-
 .../src/ui/hooks/useQuotaAndFallback.test.ts  |   2 +
 .../ui/hooks/useReactToolScheduler.test.ts    |   2 +
 ...List.test.ts => useSelectionList.test.tsx} | 651 ++++++++----------
 .../cli/src/ui/hooks/useShellHistory.test.ts  |   2 +
 .../{useTimer.test.ts => useTimer.test.tsx}   |  81 ++-
 .../cli/src/ui/hooks/useToolScheduler.test.ts |   2 +
 .../ui/hooks/{vim.test.ts => vim.test.tsx}    |  45 +-
 packages/cli/vitest.config.ts                 |   9 +-
 .../src/agents/subagent-tool-wrapper.test.ts  |   6 +-
 48 files changed, 1128 insertions(+), 1113 deletions(-)
 rename packages/cli/src/ui/hooks/{shellCommandProcessor.test.ts => shellCommandProcessor.test.tsx} (98%)
 rename packages/cli/src/ui/hooks/{slashCommandProcessor.test.ts => slashCommandProcessor.test.tsx} (90%)
 rename packages/cli/src/ui/hooks/{useCommandCompletion.test.ts => useCommandCompletion.test.tsx} (65%)
 rename packages/cli/src/ui/hooks/{useConsoleMessages.test.ts => useConsoleMessages.test.tsx} (79%)
 rename packages/cli/src/ui/hooks/{useEditorSettings.test.ts => useEditorSettings.test.tsx} (68%)
 rename packages/cli/src/ui/hooks/{useExtensionUpdates.test.ts => useExtensionUpdates.test.tsx} (93%)
 rename packages/cli/src/ui/hooks/{useFocus.test.ts => useFocus.test.tsx} (82%)
 rename packages/cli/src/ui/hooks/{useGitBranchName.test.ts => useGitBranchName.test.tsx} (85%)
 rename packages/cli/src/ui/hooks/{useIdeTrustListener.test.ts => useIdeTrustListener.test.tsx} (90%)
 rename packages/cli/src/ui/hooks/{useKeypress.test.ts => useKeypress.test.tsx} (83%)
 rename packages/cli/src/ui/hooks/{useLoadingIndicator.test.ts => useLoadingIndicator.test.tsx} (77%)
 rename packages/cli/src/ui/hooks/{useMemoryMonitor.test.ts => useMemoryMonitor.test.tsx} (87%)
 rename packages/cli/src/ui/hooks/{useMessageQueue.test.ts => useMessageQueue.test.tsx} (69%)
 delete mode 100644 packages/cli/src/ui/hooks/useModelCommand.test.ts
 create mode 100644 packages/cli/src/ui/hooks/useModelCommand.test.tsx
 rename packages/cli/src/ui/hooks/{usePrivacySettings.test.ts => usePrivacySettings.test.tsx} (81%)
 rename packages/cli/src/ui/hooks/{useSelectionList.test.ts => useSelectionList.test.tsx} (64%)
 rename packages/cli/src/ui/hooks/{useTimer.test.ts => useTimer.test.tsx} (59%)
 rename packages/cli/src/ui/hooks/{vim.test.ts => vim.test.tsx} (98%)

diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index 7f0e4e2f02..f701e3cb3e 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { vi, type MockedFunction } from 'vitest';
 import * as fs from 'node:fs';
 import * as os from 'node:os';
@@ -460,8 +462,7 @@ describe('extension tests', () => {
 
       expect(extensions).toHaveLength(1);
       expect(extensions[0].name).toBe('good-ext');
-      expect(consoleSpy).toHaveBeenCalledOnce();
-      expect(consoleSpy).toHaveBeenCalledWith(
+      expect(consoleSpy).toHaveBeenCalledExactlyOnceWith(
         expect.stringContaining(
           `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}`,
         ),
@@ -492,8 +493,7 @@ describe('extension tests', () => {
 
       expect(extensions).toHaveLength(1);
       expect(extensions[0].name).toBe('good-ext');
-      expect(consoleSpy).toHaveBeenCalledOnce();
-      expect(consoleSpy).toHaveBeenCalledWith(
+      expect(consoleSpy).toHaveBeenCalledExactlyOnceWith(
         expect.stringContaining(
           `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}: Invalid configuration in ${badConfigPath}: missing "name"`,
         ),
diff --git a/packages/cli/src/config/extensions/update.test.ts b/packages/cli/src/config/extensions/update.test.ts
index 176e7ad3fa..66bf99fabc 100644
--- a/packages/cli/src/config/extensions/update.test.ts
+++ b/packages/cli/src/config/extensions/update.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { vi, type MockedFunction } from 'vitest';
 import * as fs from 'node:fs';
 import * as os from 'node:os';
diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx
index e1c04e2cfd..8be78561b9 100644
--- a/packages/cli/src/gemini.test.tsx
+++ b/packages/cli/src/gemini.test.tsx
@@ -377,8 +377,7 @@ describe('validateDnsResolutionOrder', () => {
 
   it('should return the default "ipv4first" and log a warning for an invalid string', () => {
     expect(validateDnsResolutionOrder('invalid-value')).toBe('ipv4first');
-    expect(consoleWarnSpy).toHaveBeenCalledOnce();
-    expect(consoleWarnSpy).toHaveBeenCalledWith(
+    expect(consoleWarnSpy).toHaveBeenCalledExactlyOnceWith(
       'Invalid value for dnsResolutionOrder in settings: "invalid-value". Using default "ipv4first".',
     );
   });
diff --git a/packages/cli/src/ui/components/FolderTrustDialog.test.tsx b/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
index 11676cf2f6..77280be320 100644
--- a/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
+++ b/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { renderWithProviders } from '../../test-utils/render.js';
 import { waitFor, act } from '@testing-library/react';
 import { vi } from 'vitest';
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 33c53b8e2f..3da977c409 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -5,7 +5,7 @@
  */
 
 import { renderWithProviders } from '../../test-utils/render.js';
-import { act } from '@testing-library/react';
+import { act } from 'react';
 import type { InputPromptProps } from './InputPrompt.js';
 import { InputPrompt } from './InputPrompt.js';
 import type { TextBuffer } from './shared/text-buffer.js';
@@ -1936,7 +1936,7 @@ describe('InputPrompt', () => {
       await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       const callback = mockPopAllMessages.mock.calls[0][0];
 
-      act(() => {
+      await act(async () => {
         callback('Message 1\n\nMessage 2\n\nMessage 3');
       });
       expect(props.buffer.setText).toHaveBeenCalledWith(
@@ -1978,7 +1978,7 @@ describe('InputPrompt', () => {
       });
       await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
       const callback = mockPopAllMessages.mock.calls[0][0];
-      act(() => {
+      await act(async () => {
         callback(undefined);
       });
 
@@ -2021,7 +2021,7 @@ describe('InputPrompt', () => {
       await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
 
       const callback = mockPopAllMessages.mock.calls[0][0];
-      act(() => {
+      await act(async () => {
         callback('Single message');
       });
 
@@ -2077,7 +2077,7 @@ describe('InputPrompt', () => {
       await vi.waitFor(() => expect(mockPopAllMessages).toHaveBeenCalled());
 
       const callback = mockPopAllMessages.mock.calls[0][0];
-      act(() => {
+      await act(async () => {
         callback(undefined);
       });
 
diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx
index 33236801ba..0080a03b3d 100644
--- a/packages/cli/src/ui/components/ModelDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { render, cleanup } from '@testing-library/react';
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import {
diff --git a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx b/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
index a88f533820..ed2740c580 100644
--- a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
+++ b/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 /// <reference types="vitest/globals" />
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
diff --git a/packages/cli/src/ui/components/SettingsDialog.test.tsx b/packages/cli/src/ui/components/SettingsDialog.test.tsx
index 908c1f994f..50d32c1871 100644
--- a/packages/cli/src/ui/components/SettingsDialog.test.tsx
+++ b/packages/cli/src/ui/components/SettingsDialog.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 /**
  *
  *
diff --git a/packages/cli/src/ui/components/ThemeDialog.test.tsx b/packages/cli/src/ui/components/ThemeDialog.test.tsx
index 4d5d50032a..0a2f81e858 100644
--- a/packages/cli/src/ui/components/ThemeDialog.test.tsx
+++ b/packages/cli/src/ui/components/ThemeDialog.test.tsx
@@ -12,7 +12,6 @@ import { KeypressProvider } from '../contexts/KeypressContext.js';
 import { SettingsContext } from '../contexts/SettingsContext.js';
 import { DEFAULT_THEME, themeManager } from '../themes/theme-manager.js';
 import { act } from 'react';
-import { waitFor } from '@testing-library/react';
 
 const createMockSettings = (
   userSettings = {},
@@ -127,7 +126,7 @@ describe('ThemeDialog Snapshots', () => {
       stdin.write('\x1b');
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockOnCancel).toHaveBeenCalled();
     });
   });
diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
index 4991f1ac4f..cd2cbb17d2 100644
--- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
+++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
@@ -1,23 +1,5 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and collapses long suggestion via Right/Left arrows > command-search-collapsed-match 1`] = `
-"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ (r:)    Type your message or @path/to/file                                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
- lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll →
- lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll
- ..."
-`;
-
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and collapses long suggestion via Right/Left arrows > command-search-expanded-match 1`] = `
-"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ (r:)    Type your message or @path/to/file                                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
- lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll ←
- lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll
- llllllllllllllllllllllllllllllllllllllllllllllllll"
-`;
-
 exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and collapses long suggestion via Right/Left arrows > command-search-render-collapsed-match 1`] = `
 "╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
 │ (r:)    Type your message or @path/to/file                                                        │
@@ -38,12 +20,6 @@ exports[`InputPrompt > command search (Ctrl+R when not in shell) > expands and c
 
 exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-collapsed-match 1`] = `
 "╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ > commit                                                                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯"
-`;
-
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-collapsed-match 2`] = `
-"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
 │ (r:)  commit                                                                                      │
 ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
  git commit -m "feat: add search" in src/app"
@@ -51,12 +27,6 @@ exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match
 
 exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-expanded-match 1`] = `
 "╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ > commit                                                                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯"
-`;
-
-exports[`InputPrompt > command search (Ctrl+R when not in shell) > renders match window and expanded view (snapshots) > command-search-render-expanded-match 2`] = `
-"╭────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
 │ (r:)  commit                                                                                      │
 ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
  git commit -m "feat: add search" in src/app"
diff --git a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
index 0d383a8641..bc2fd37db3 100644
--- a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
+++ b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { waitFor } from '@testing-library/react';
 import { renderWithProviders } from '../../../test-utils/render.js';
diff --git a/packages/cli/src/ui/components/shared/text-buffer.test.ts b/packages/cli/src/ui/components/shared/text-buffer.test.ts
index 9e56856aca..77013f27b5 100644
--- a/packages/cli/src/ui/components/shared/text-buffer.test.ts
+++ b/packages/cli/src/ui/components/shared/text-buffer.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { describe, it, expect, beforeEach } from 'vitest';
 import stripAnsi from 'strip-ansi';
 import { renderHook, act } from '@testing-library/react';
diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
index 197974c751..4f1aa42e69 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import type React from 'react';
 import { renderHook, act, waitFor } from '@testing-library/react';
 import type { Mock } from 'vitest';
diff --git a/packages/cli/src/ui/contexts/SessionContext.test.tsx b/packages/cli/src/ui/contexts/SessionContext.test.tsx
index c80262e503..45833ae5ee 100644
--- a/packages/cli/src/ui/contexts/SessionContext.test.tsx
+++ b/packages/cli/src/ui/contexts/SessionContext.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { type MutableRefObject } from 'react';
 import { render } from 'ink-testing-library';
 import { renderHook } from '@testing-library/react';
diff --git a/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx
similarity index 98%
rename from packages/cli/src/ui/hooks/shellCommandProcessor.test.ts
rename to packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx
index 154dcee6b9..51bf95dbac 100644
--- a/packages/cli/src/ui/hooks/shellCommandProcessor.test.ts
+++ b/packages/cli/src/ui/hooks/shellCommandProcessor.test.tsx
@@ -4,7 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { act, renderHook } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import {
   vi,
   describe,
@@ -92,9 +93,10 @@ describe('useShellCommandProcessor', () => {
     });
   });
 
-  const renderProcessorHook = () =>
-    renderHook(() =>
-      useShellCommandProcessor(
+  const renderProcessorHook = () => {
+    let hookResult: ReturnType<typeof useShellCommandProcessor>;
+    function TestComponent() {
+      hookResult = useShellCommandProcessor(
         addItemToHistoryMock,
         setPendingHistoryItemMock,
         onExecMock,
@@ -102,8 +104,18 @@ describe('useShellCommandProcessor', () => {
         mockConfig,
         mockGeminiClient,
         setShellInputFocusedMock,
-      ),
-    );
+      );
+      return null;
+    }
+    render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+    };
+  };
 
   const createMockServiceResult = (
     overrides: Partial<ShellExecutionResult> = {},
diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
similarity index 90%
rename from packages/cli/src/ui/hooks/slashCommandProcessor.test.ts
rename to packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
index 6016381f26..6707bf3058 100644
--- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts
+++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
@@ -4,8 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { act, renderHook, waitFor } from '@testing-library/react';
 import { vi, describe, it, expect, beforeEach } from 'vitest';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useSlashCommandProcessor } from './slashCommandProcessor.js';
 import type {
   CommandContext,
@@ -131,8 +132,10 @@ describe('useSlashCommandProcessor', () => {
     mockFileLoadCommands.mockResolvedValue(Object.freeze(fileCommands));
     mockMcpLoadCommands.mockResolvedValue(Object.freeze(mcpCommands));
 
-    const { result } = renderHook(() =>
-      useSlashCommandProcessor(
+    let hookResult: ReturnType<typeof useSlashCommandProcessor>;
+
+    function TestComponent() {
+      hookResult = useSlashCommandProcessor(
         mockConfig,
         mockSettings,
         mockAddItem,
@@ -159,10 +162,19 @@ describe('useSlashCommandProcessor', () => {
         },
         new Map(), // extensionsUpdateState
         true, // isConfigInitialized
-      ),
-    );
+      );
+      return null;
+    }
 
-    return result;
+    const { unmount, rerender } = render(<TestComponent />);
+
+    return {
+      get current() {
+        return hookResult;
+      },
+      unmount,
+      rerender: () => rerender(<TestComponent />),
+    };
   };
 
   describe('Initialization and Command Loading', () => {
@@ -177,7 +189,7 @@ describe('useSlashCommandProcessor', () => {
       const testCommand = createTestCommand({ name: 'test' });
       const result = setupProcessorHook([testCommand]);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.slashCommands).toHaveLength(1);
       });
 
@@ -191,7 +203,7 @@ describe('useSlashCommandProcessor', () => {
       const testCommand = createTestCommand({ name: 'test' });
       const result = setupProcessorHook([testCommand]);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.slashCommands).toHaveLength(1);
       });
 
@@ -219,7 +231,7 @@ describe('useSlashCommandProcessor', () => {
 
       const result = setupProcessorHook([builtinCommand], [fileCommand]);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         // The service should only return one command with the name 'override'
         expect(result.current.slashCommands).toHaveLength(1);
       });
@@ -237,7 +249,9 @@ describe('useSlashCommandProcessor', () => {
   describe('Command Execution Logic', () => {
     it('should display an error for an unknown command', async () => {
       const result = setupProcessorHook();
-      await waitFor(() => expect(result.current.slashCommands).toBeDefined());
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toBeDefined(),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/nonexistent');
@@ -268,7 +282,9 @@ describe('useSlashCommandProcessor', () => {
         ],
       };
       const result = setupProcessorHook([parentCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/parent');
@@ -302,7 +318,9 @@ describe('useSlashCommandProcessor', () => {
         ],
       };
       const result = setupProcessorHook([parentCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/parent child with args');
@@ -348,7 +366,9 @@ describe('useSlashCommandProcessor', () => {
         setMockIsProcessing,
       );
 
-      await waitFor(() => expect(result.current.slashCommands).toBeDefined());
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toBeDefined(),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/fail');
@@ -366,7 +386,9 @@ describe('useSlashCommandProcessor', () => {
       });
 
       const result = setupProcessorHook([command], [], [], mockSetIsProcessing);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       const executionPromise = act(async () => {
         await result.current.handleSlashCommand('/long-running');
@@ -392,7 +414,9 @@ describe('useSlashCommandProcessor', () => {
         action: vi.fn().mockResolvedValue({ type: 'dialog', dialog: 'theme' }),
       });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/themecmd');
@@ -407,7 +431,9 @@ describe('useSlashCommandProcessor', () => {
         action: vi.fn().mockResolvedValue({ type: 'dialog', dialog: 'model' }),
       });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/modelcmd');
@@ -432,7 +458,9 @@ describe('useSlashCommandProcessor', () => {
         }),
       });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/load');
@@ -468,7 +496,9 @@ describe('useSlashCommandProcessor', () => {
       });
 
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/loadwiththoughts');
@@ -488,7 +518,9 @@ describe('useSlashCommandProcessor', () => {
       });
       const result = setupProcessorHook([command]);
 
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/exit');
@@ -510,7 +542,9 @@ describe('useSlashCommandProcessor', () => {
       );
 
       const result = setupProcessorHook([], [fileCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       let actionResult;
       await act(async () => {
@@ -542,7 +576,9 @@ describe('useSlashCommandProcessor', () => {
       );
 
       const result = setupProcessorHook([], [], [mcpCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       let actionResult;
       await act(async () => {
@@ -584,7 +620,9 @@ describe('useSlashCommandProcessor', () => {
 
     it('should set confirmation request when action returns confirm_shell_commands', async () => {
       const result = setupProcessorHook([shellCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       // This is intentionally not awaited, because the promise it returns
       // will not resolve until the user responds to the confirmation.
@@ -593,7 +631,7 @@ describe('useSlashCommandProcessor', () => {
       });
 
       // We now wait for the state to be updated with the request.
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.shellConfirmationRequest).not.toBeNull();
       });
 
@@ -604,14 +642,16 @@ describe('useSlashCommandProcessor', () => {
 
     it('should do nothing if user cancels confirmation', async () => {
       const result = setupProcessorHook([shellCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       act(() => {
         result.current.handleSlashCommand('/shellcmd');
       });
 
       // Wait for the confirmation dialog to be set
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.shellConfirmationRequest).not.toBeNull();
       });
 
@@ -637,12 +677,14 @@ describe('useSlashCommandProcessor', () => {
 
     it('should re-run command with one-time allowlist on "Proceed Once"', async () => {
       const result = setupProcessorHook([shellCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       act(() => {
         result.current.handleSlashCommand('/shellcmd');
       });
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.shellConfirmationRequest).not.toBeNull();
       });
 
@@ -663,7 +705,7 @@ describe('useSlashCommandProcessor', () => {
       expect(result.current.shellConfirmationRequest).toBeNull();
 
       // The action should have been called twice (initial + re-run).
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockCommandAction).toHaveBeenCalledTimes(2);
       });
 
@@ -691,12 +733,14 @@ describe('useSlashCommandProcessor', () => {
 
     it('should re-run command and update session allowlist on "Proceed Always"', async () => {
       const result = setupProcessorHook([shellCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       act(() => {
         result.current.handleSlashCommand('/shellcmd');
       });
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.shellConfirmationRequest).not.toBeNull();
       });
 
@@ -712,7 +756,7 @@ describe('useSlashCommandProcessor', () => {
       });
 
       expect(result.current.shellConfirmationRequest).toBeNull();
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockCommandAction).toHaveBeenCalledTimes(2);
       });
 
@@ -722,7 +766,7 @@ describe('useSlashCommandProcessor', () => {
       );
 
       // Check that the session-wide allowlist WAS updated.
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const finalContext = result.current.commandContext;
         expect(finalContext.session.sessionShellAllowlist.has('rm -rf /')).toBe(
           true,
@@ -735,7 +779,9 @@ describe('useSlashCommandProcessor', () => {
     it('should be case-sensitive', async () => {
       const command = createTestCommand({ name: 'test' });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         // Use uppercase when command is lowercase
@@ -761,7 +807,9 @@ describe('useSlashCommandProcessor', () => {
         action,
       });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/alias');
@@ -777,7 +825,9 @@ describe('useSlashCommandProcessor', () => {
       const action = vi.fn();
       const command = createTestCommand({ name: 'test', action });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('  /test  with-args  ');
@@ -790,7 +840,9 @@ describe('useSlashCommandProcessor', () => {
       const action = vi.fn();
       const command = createTestCommand({ name: 'help', action });
       const result = setupProcessorHook([command]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(1),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('?help');
@@ -820,7 +872,7 @@ describe('useSlashCommandProcessor', () => {
 
       const result = setupProcessorHook([], [fileCommand], [mcpCommand]);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         // The service should only return one command with the name 'override'
         expect(result.current.slashCommands).toHaveLength(1);
       });
@@ -856,7 +908,7 @@ describe('useSlashCommandProcessor', () => {
       // so the test must work regardless of which comes first.
       const result = setupProcessorHook([quitCommand], [exitCommand]);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.slashCommands).toHaveLength(2);
       });
 
@@ -882,7 +934,9 @@ describe('useSlashCommandProcessor', () => {
       );
 
       const result = setupProcessorHook([quitCommand], [exitCommand]);
-      await waitFor(() => expect(result.current.slashCommands).toHaveLength(2));
+      await vi.waitFor(() =>
+        expect(result.current.slashCommands).toHaveLength(2),
+      );
 
       await act(async () => {
         await result.current.handleSlashCommand('/exit');
@@ -899,36 +953,7 @@ describe('useSlashCommandProcessor', () => {
   describe('Lifecycle', () => {
     it('should abort command loading when the hook unmounts', () => {
       const abortSpy = vi.spyOn(AbortController.prototype, 'abort');
-      const { unmount } = renderHook(() =>
-        useSlashCommandProcessor(
-          mockConfig,
-          mockSettings,
-          mockAddItem,
-          mockClearItems,
-          mockLoadHistory,
-          vi.fn(), // refreshStatic
-          vi.fn().mockResolvedValue(false), // toggleVimEnabled
-          vi.fn(), // setIsProcessing
-          vi.fn(), // setGeminiMdFileCount
-          {
-            openAuthDialog: vi.fn(),
-            openThemeDialog: vi.fn(),
-            openEditorDialog: vi.fn(),
-            openPrivacyNotice: vi.fn(),
-            openSettingsDialog: vi.fn(),
-            openModelDialog: vi.fn(),
-            openPermissionsDialog: vi.fn(),
-            quit: vi.fn(),
-            setDebugMessage: vi.fn(),
-            toggleCorgiMode: vi.fn(),
-            toggleDebugProfiler: vi.fn(),
-            dispatchExtensionStateUpdate: vi.fn(),
-            addConfirmUpdateExtensionRequest: vi.fn(),
-          },
-          new Map(), // extensionsUpdateState
-          true, // isConfigInitialized
-        ),
-      );
+      const { unmount } = setupProcessorHook();
 
       unmount();
 
@@ -972,7 +997,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('should log a simple slash command', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
@@ -991,7 +1016,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('logs nothing for a bogus command', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
@@ -1003,7 +1028,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('logs a failure event for a failed command', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
@@ -1022,7 +1047,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('should log a slash command with a subcommand', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
@@ -1040,7 +1065,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('should log the command path when an alias is used', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
@@ -1056,7 +1081,7 @@ describe('useSlashCommandProcessor', () => {
 
     it('should not log for unknown commands', async () => {
       const result = setupProcessorHook(loggingTestCommands);
-      await waitFor(() =>
+      await vi.waitFor(() =>
         expect(result.current.slashCommands?.length).toBeGreaterThan(0),
       );
       await act(async () => {
diff --git a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
index 2e103ca234..25b515de6b 100644
--- a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
+++ b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import {
   describe,
   it,
diff --git a/packages/cli/src/ui/hooks/useCommandCompletion.test.ts b/packages/cli/src/ui/hooks/useCommandCompletion.test.tsx
similarity index 65%
rename from packages/cli/src/ui/hooks/useCommandCompletion.test.ts
rename to packages/cli/src/ui/hooks/useCommandCompletion.test.tsx
index 4cc53f9885..01cf9e8c5d 100644
--- a/packages/cli/src/ui/hooks/useCommandCompletion.test.ts
+++ b/packages/cli/src/ui/hooks/useCommandCompletion.test.tsx
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import {
   describe,
   it,
@@ -15,12 +13,12 @@ import {
   afterEach,
   type Mock,
 } from 'vitest';
-import { renderHook, act, waitFor } from '@testing-library/react';
+import { act, useEffect } from 'react';
+import { render } from 'ink-testing-library';
 import { useCommandCompletion } from './useCommandCompletion.js';
 import type { CommandContext } from '../commands/types.js';
 import type { Config } from '@google/gemini-cli-core';
 import { useTextBuffer } from '../components/shared/text-buffer.js';
-import { useEffect } from 'react';
 import type { Suggestion } from '../components/SuggestionsDisplay.js';
 import type { UseAtCompletionProps } from './useAtCompletion.js';
 import { useAtCompletion } from './useAtCompletion.js';
@@ -93,7 +91,8 @@ describe('useCommandCompletion', () => {
   const mockCommandContext = {} as CommandContext;
   const mockConfig = {
     getEnablePromptCompletion: () => false,
-  } as Config;
+    getGeminiClient: vi.fn(),
+  } as unknown as Config;
   const testDirs: string[] = [];
   const testRootDir = '/';
 
@@ -108,6 +107,40 @@ describe('useCommandCompletion', () => {
     });
   }
 
+  const renderCommandCompletionHook = (
+    initialText: string,
+    cursorOffset?: number,
+    shellModeActive = false,
+  ) => {
+    let hookResult: ReturnType<typeof useCommandCompletion> & {
+      textBuffer: ReturnType<typeof useTextBuffer>;
+    };
+
+    function TestComponent() {
+      const textBuffer = useTextBufferForTest(initialText, cursorOffset);
+      const completion = useCommandCompletion(
+        textBuffer,
+        testDirs,
+        testRootDir,
+        [],
+        mockCommandContext,
+        false,
+        shellModeActive,
+        mockConfig,
+      );
+      hookResult = { ...completion, textBuffer };
+      return null;
+    }
+    render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+    };
+  };
+
   beforeEach(() => {
     vi.clearAllMocks();
     // Reset to default mocks before each test
@@ -121,18 +154,7 @@ describe('useCommandCompletion', () => {
   describe('Core Hook Behavior', () => {
     describe('State Management', () => {
       it('should initialize with default state', () => {
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest(''),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('');
 
         expect(result.current.suggestions).toEqual([]);
         expect(result.current.activeSuggestionIndex).toBe(-1);
@@ -146,26 +168,13 @@ describe('useCommandCompletion', () => {
           atSuggestions: [{ label: 'src/file.txt', value: 'src/file.txt' }],
         });
 
-        const { result } = renderHook(() => {
-          const textBuffer = useTextBufferForTest('@file');
-          const completion = useCommandCompletion(
-            textBuffer,
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          );
-          return { completion, textBuffer };
+        const { result } = renderCommandCompletionHook('@file');
+
+        await vi.waitFor(() => {
+          expect(result.current.suggestions).toHaveLength(1);
         });
 
-        await waitFor(() => {
-          expect(result.current.completion.suggestions).toHaveLength(1);
-        });
-
-        expect(result.current.completion.showSuggestions).toBe(true);
+        expect(result.current.showSuggestions).toBe(true);
 
         act(() => {
           result.current.textBuffer.replaceRangeByOffset(
@@ -175,24 +184,13 @@ describe('useCommandCompletion', () => {
           );
         });
 
-        await waitFor(() => {
-          expect(result.current.completion.showSuggestions).toBe(false);
+        await vi.waitFor(() => {
+          expect(result.current.showSuggestions).toBe(false);
         });
       });
 
       it('should reset all state to default values', () => {
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('@files'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('@files');
 
         act(() => {
           result.current.setActiveSuggestionIndex(5);
@@ -210,20 +208,9 @@ describe('useCommandCompletion', () => {
 
       it('should call useAtCompletion with the correct query for an escaped space', async () => {
         const text = '@src/a\\ file.txt';
-        renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest(text),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        renderCommandCompletionHook(text);
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(useAtCompletion).toHaveBeenLastCalledWith(
             expect.objectContaining({
               enabled: true,
@@ -237,20 +224,9 @@ describe('useCommandCompletion', () => {
         const text = '@file1 @file2';
         const cursorOffset = 3; // @fi|le1 @file2
 
-        renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest(text, cursorOffset),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        renderCommandCompletionHook(text, cursorOffset);
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(useAtCompletion).toHaveBeenLastCalledWith(
             expect.objectContaining({
               enabled: true,
@@ -286,22 +262,13 @@ describe('useCommandCompletion', () => {
             slashSuggestions: [{ label: 'clear', value: 'clear' }],
           });
 
-          const { result } = renderHook(() => {
-            const textBuffer = useTextBufferForTest('/');
-            const completion = useCommandCompletion(
-              textBuffer,
-              testDirs,
-              testRootDir,
-              [],
-              mockCommandContext,
-              false,
-              shellModeActive, // Parameterized shellModeActive
-              mockConfig,
-            );
-            return { ...completion, textBuffer };
-          });
+          const { result } = renderCommandCompletionHook(
+            '/',
+            undefined,
+            shellModeActive,
+          );
 
-          await waitFor(() => {
+          await vi.waitFor(() => {
             expect(result.current.suggestions.length).toBe(expectedSuggestions);
             expect(result.current.showSuggestions).toBe(
               expectedShowSuggestions,
@@ -327,18 +294,7 @@ describe('useCommandCompletion', () => {
       it('should handle navigateUp with no suggestions', () => {
         setupMocks({ slashSuggestions: [] });
 
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
         act(() => {
           result.current.navigateUp();
@@ -349,18 +305,7 @@ describe('useCommandCompletion', () => {
 
       it('should handle navigateDown with no suggestions', () => {
         setupMocks({ slashSuggestions: [] });
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
         act(() => {
           result.current.navigateDown();
@@ -370,20 +315,9 @@ describe('useCommandCompletion', () => {
       });
 
       it('should navigate up through suggestions with wrap-around', async () => {
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(result.current.suggestions.length).toBe(5);
         });
 
@@ -397,20 +331,9 @@ describe('useCommandCompletion', () => {
       });
 
       it('should navigate down through suggestions with wrap-around', async () => {
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(result.current.suggestions.length).toBe(5);
         });
 
@@ -427,20 +350,9 @@ describe('useCommandCompletion', () => {
       });
 
       it('should handle navigation with multiple suggestions', async () => {
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(result.current.suggestions.length).toBe(5);
         });
 
@@ -465,20 +377,9 @@ describe('useCommandCompletion', () => {
       it('should automatically select the first item when suggestions are available', async () => {
         setupMocks({ slashSuggestions: mockSuggestions });
 
-        const { result } = renderHook(() =>
-          useCommandCompletion(
-            useTextBufferForTest('/'),
-            testDirs,
-            testRootDir,
-            [],
-            mockCommandContext,
-            false,
-            false,
-            mockConfig,
-          ),
-        );
+        const { result } = renderCommandCompletionHook('/');
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(result.current.suggestions.length).toBe(
             mockSuggestions.length,
           );
@@ -495,22 +396,9 @@ describe('useCommandCompletion', () => {
         slashCompletionRange: { completionStart: 1, completionEnd: 4 },
       });
 
-      const { result } = renderHook(() => {
-        const textBuffer = useTextBufferForTest('/mem');
-        const completion = useCommandCompletion(
-          textBuffer,
-          testDirs,
-          testRootDir,
-          [],
-          mockCommandContext,
-          false,
-          false,
-          mockConfig,
-        );
-        return { ...completion, textBuffer };
-      });
+      const { result } = renderCommandCompletionHook('/mem');
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBe(1);
       });
 
@@ -526,22 +414,9 @@ describe('useCommandCompletion', () => {
         atSuggestions: [{ label: 'src/file1.txt', value: 'src/file1.txt' }],
       });
 
-      const { result } = renderHook(() => {
-        const textBuffer = useTextBufferForTest('@src/fi');
-        const completion = useCommandCompletion(
-          textBuffer,
-          testDirs,
-          testRootDir,
-          [],
-          mockCommandContext,
-          false,
-          false,
-          mockConfig,
-        );
-        return { ...completion, textBuffer };
-      });
+      const { result } = renderCommandCompletionHook('@src/fi');
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBe(1);
       });
 
@@ -560,22 +435,9 @@ describe('useCommandCompletion', () => {
         atSuggestions: [{ label: 'src/file1.txt', value: 'src/file1.txt' }],
       });
 
-      const { result } = renderHook(() => {
-        const textBuffer = useTextBufferForTest(text, cursorOffset);
-        const completion = useCommandCompletion(
-          textBuffer,
-          testDirs,
-          testRootDir,
-          [],
-          mockCommandContext,
-          false,
-          false,
-          mockConfig,
-        );
-        return { ...completion, textBuffer };
-      });
+      const { result } = renderCommandCompletionHook(text, cursorOffset);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBe(1);
       });
 
@@ -593,22 +455,9 @@ describe('useCommandCompletion', () => {
         atSuggestions: [{ label: 'src/components/', value: 'src/components/' }],
       });
 
-      const { result } = renderHook(() => {
-        const textBuffer = useTextBufferForTest('@src/comp');
-        const completion = useCommandCompletion(
-          textBuffer,
-          testDirs,
-          testRootDir,
-          [],
-          mockCommandContext,
-          false,
-          false,
-          mockConfig,
-        );
-        return { ...completion, textBuffer };
-      });
+      const { result } = renderCommandCompletionHook('@src/comp');
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBe(1);
       });
 
@@ -626,22 +475,9 @@ describe('useCommandCompletion', () => {
         ],
       });
 
-      const { result } = renderHook(() => {
-        const textBuffer = useTextBufferForTest('@src\\comp');
-        const completion = useCommandCompletion(
-          textBuffer,
-          testDirs,
-          testRootDir,
-          [],
-          mockCommandContext,
-          false,
-          false,
-          mockConfig,
-        );
-        return { ...completion, textBuffer };
-      });
+      const { result } = renderCommandCompletionHook('@src\\comp');
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBe(1);
       });
 
@@ -657,9 +493,14 @@ describe('useCommandCompletion', () => {
     it('should not trigger prompt completion for line comments', async () => {
       const mockConfig = {
         getEnablePromptCompletion: () => true,
-      } as Config;
+        getGeminiClient: vi.fn(),
+      } as unknown as Config;
 
-      const { result } = renderHook(() => {
+      let hookResult: ReturnType<typeof useCommandCompletion> & {
+        textBuffer: ReturnType<typeof useTextBuffer>;
+      };
+
+      function TestComponent() {
         const textBuffer = useTextBufferForTest('// This is a line comment');
         const completion = useCommandCompletion(
           textBuffer,
@@ -671,19 +512,26 @@ describe('useCommandCompletion', () => {
           false,
           mockConfig,
         );
-        return { ...completion, textBuffer };
-      });
+        hookResult = { ...completion, textBuffer };
+        return null;
+      }
+      render(<TestComponent />);
 
       // Should not trigger prompt completion for comments
-      expect(result.current.suggestions.length).toBe(0);
+      expect(hookResult!.suggestions.length).toBe(0);
     });
 
     it('should not trigger prompt completion for block comments', async () => {
       const mockConfig = {
         getEnablePromptCompletion: () => true,
-      } as Config;
+        getGeminiClient: vi.fn(),
+      } as unknown as Config;
 
-      const { result } = renderHook(() => {
+      let hookResult: ReturnType<typeof useCommandCompletion> & {
+        textBuffer: ReturnType<typeof useTextBuffer>;
+      };
+
+      function TestComponent() {
         const textBuffer = useTextBufferForTest(
           '/* This is a block comment */',
         );
@@ -697,19 +545,26 @@ describe('useCommandCompletion', () => {
           false,
           mockConfig,
         );
-        return { ...completion, textBuffer };
-      });
+        hookResult = { ...completion, textBuffer };
+        return null;
+      }
+      render(<TestComponent />);
 
       // Should not trigger prompt completion for comments
-      expect(result.current.suggestions.length).toBe(0);
+      expect(hookResult!.suggestions.length).toBe(0);
     });
 
     it('should trigger prompt completion for regular text when enabled', async () => {
       const mockConfig = {
         getEnablePromptCompletion: () => true,
-      } as Config;
+        getGeminiClient: vi.fn(),
+      } as unknown as Config;
 
-      const { result } = renderHook(() => {
+      let hookResult: ReturnType<typeof useCommandCompletion> & {
+        textBuffer: ReturnType<typeof useTextBuffer>;
+      };
+
+      function TestComponent() {
         const textBuffer = useTextBufferForTest(
           'This is regular text that should trigger completion',
         );
@@ -723,11 +578,13 @@ describe('useCommandCompletion', () => {
           false,
           mockConfig,
         );
-        return { ...completion, textBuffer };
-      });
+        hookResult = { ...completion, textBuffer };
+        return null;
+      }
+      render(<TestComponent />);
 
       // This test verifies that comments are filtered out while regular text is not
-      expect(result.current.textBuffer.text).toBe(
+      expect(hookResult!.textBuffer.text).toBe(
         'This is regular text that should trigger completion',
       );
     });
diff --git a/packages/cli/src/ui/hooks/useConsoleMessages.test.ts b/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx
similarity index 79%
rename from packages/cli/src/ui/hooks/useConsoleMessages.test.ts
rename to packages/cli/src/ui/hooks/useConsoleMessages.test.tsx
index a6c6409af3..5eada66818 100644
--- a/packages/cli/src/ui/hooks/useConsoleMessages.test.ts
+++ b/packages/cli/src/ui/hooks/useConsoleMessages.test.tsx
@@ -4,10 +4,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { act, renderHook } from '@testing-library/react';
+import { render } from 'ink-testing-library';
+import { act, useCallback } from 'react';
 import { vi } from 'vitest';
 import { useConsoleMessages } from './useConsoleMessages.js';
-import { useCallback } from 'react';
 
 describe('useConsoleMessages', () => {
   beforeEach(() => {
@@ -38,13 +38,30 @@ describe('useConsoleMessages', () => {
     };
   };
 
+  const renderConsoleMessagesHook = () => {
+    let hookResult: ReturnType<typeof useTestableConsoleMessages>;
+    function TestComponent() {
+      hookResult = useTestableConsoleMessages();
+      return null;
+    }
+    const { unmount } = render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      unmount,
+    };
+  };
+
   it('should initialize with an empty array of console messages', () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
     expect(result.current.consoleMessages).toEqual([]);
   });
 
   it('should add a new message when log is called', async () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
 
     act(() => {
       result.current.log('Test message');
@@ -60,7 +77,7 @@ describe('useConsoleMessages', () => {
   });
 
   it('should batch and count identical consecutive messages', async () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
 
     act(() => {
       result.current.log('Test message');
@@ -78,7 +95,7 @@ describe('useConsoleMessages', () => {
   });
 
   it('should not batch different messages', async () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
 
     act(() => {
       result.current.log('First message');
@@ -96,7 +113,7 @@ describe('useConsoleMessages', () => {
   });
 
   it('should clear all messages when clearConsoleMessages is called', async () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
 
     act(() => {
       result.current.log('A message');
@@ -116,7 +133,7 @@ describe('useConsoleMessages', () => {
   });
 
   it('should clear the pending timeout when clearConsoleMessages is called', () => {
-    const { result } = renderHook(() => useTestableConsoleMessages());
+    const { result } = renderConsoleMessagesHook();
     const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout');
 
     act(() => {
@@ -132,7 +149,7 @@ describe('useConsoleMessages', () => {
   });
 
   it('should clean up the timeout on unmount', () => {
-    const { result, unmount } = renderHook(() => useTestableConsoleMessages());
+    const { result, unmount } = renderConsoleMessagesHook();
     const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout');
 
     act(() => {
diff --git a/packages/cli/src/ui/hooks/useEditorSettings.test.ts b/packages/cli/src/ui/hooks/useEditorSettings.test.tsx
similarity index 68%
rename from packages/cli/src/ui/hooks/useEditorSettings.test.ts
rename to packages/cli/src/ui/hooks/useEditorSettings.test.tsx
index 3cc4136f96..22b092e036 100644
--- a/packages/cli/src/ui/hooks/useEditorSettings.test.ts
+++ b/packages/cli/src/ui/hooks/useEditorSettings.test.tsx
@@ -14,7 +14,7 @@ import {
   type MockedFunction,
 } from 'vitest';
 import { act } from 'react';
-import { renderHook } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import { useEditorSettings } from './useEditorSettings.js';
 import type { LoadedSettings } from '../../config/settings.js';
 import { SettingScope } from '../../config/settings.js';
@@ -43,6 +43,16 @@ describe('useEditorSettings', () => {
   let mockAddItem: MockedFunction<
     (item: Omit<HistoryItem, 'id'>, timestamp: number) => void
   >;
+  let result: ReturnType<typeof useEditorSettings>;
+
+  function TestComponent() {
+    result = useEditorSettings(
+      mockLoadedSettings,
+      mockSetEditorError,
+      mockAddItem,
+    );
+    return null;
+  }
 
   beforeEach(() => {
     vi.resetAllMocks();
@@ -64,47 +74,39 @@ describe('useEditorSettings', () => {
   });
 
   it('should initialize with dialog closed', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
-    expect(result.current.isEditorDialogOpen).toBe(false);
+    expect(result.isEditorDialogOpen).toBe(false);
   });
 
   it('should open editor dialog when openEditorDialog is called', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     act(() => {
-      result.current.openEditorDialog();
+      result.openEditorDialog();
     });
 
-    expect(result.current.isEditorDialogOpen).toBe(true);
+    expect(result.isEditorDialogOpen).toBe(true);
   });
 
   it('should close editor dialog when exitEditorDialog is called', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
     act(() => {
-      result.current.openEditorDialog();
-      result.current.exitEditorDialog();
+      result.openEditorDialog();
+      result.exitEditorDialog();
     });
-    expect(result.current.isEditorDialogOpen).toBe(false);
+    expect(result.isEditorDialogOpen).toBe(false);
   });
 
   it('should handle editor selection successfully', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     const editorType: EditorType = 'vscode';
     const scope = SettingScope.User;
 
     act(() => {
-      result.current.openEditorDialog();
-      result.current.handleEditorSelect(editorType, scope);
+      result.openEditorDialog();
+      result.handleEditorSelect(editorType, scope);
     });
 
     expect(mockLoadedSettings.setValue).toHaveBeenCalledWith(
@@ -122,19 +124,17 @@ describe('useEditorSettings', () => {
     );
 
     expect(mockSetEditorError).toHaveBeenCalledWith(null);
-    expect(result.current.isEditorDialogOpen).toBe(false);
+    expect(result.isEditorDialogOpen).toBe(false);
   });
 
   it('should handle clearing editor preference (undefined editor)', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     const scope = SettingScope.Workspace;
 
     act(() => {
-      result.current.openEditorDialog();
-      result.current.handleEditorSelect(undefined, scope);
+      result.openEditorDialog();
+      result.handleEditorSelect(undefined, scope);
     });
 
     expect(mockLoadedSettings.setValue).toHaveBeenCalledWith(
@@ -152,20 +152,18 @@ describe('useEditorSettings', () => {
     );
 
     expect(mockSetEditorError).toHaveBeenCalledWith(null);
-    expect(result.current.isEditorDialogOpen).toBe(false);
+    expect(result.isEditorDialogOpen).toBe(false);
   });
 
   it('should handle different editor types', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     const editorTypes: EditorType[] = ['cursor', 'windsurf', 'vim'];
     const scope = SettingScope.User;
 
     editorTypes.forEach((editorType) => {
       act(() => {
-        result.current.handleEditorSelect(editorType, scope);
+        result.handleEditorSelect(editorType, scope);
       });
 
       expect(mockLoadedSettings.setValue).toHaveBeenCalledWith(
@@ -185,16 +183,14 @@ describe('useEditorSettings', () => {
   });
 
   it('should handle different setting scopes', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     const editorType: EditorType = 'vscode';
     const scopes = [SettingScope.User, SettingScope.Workspace];
 
     scopes.forEach((scope) => {
       act(() => {
-        result.current.handleEditorSelect(editorType, scope);
+        result.handleEditorSelect(editorType, scope);
       });
 
       expect(mockLoadedSettings.setValue).toHaveBeenCalledWith(
@@ -214,9 +210,7 @@ describe('useEditorSettings', () => {
   });
 
   it('should not set preference for unavailable editors', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     mockCheckHasEditorType.mockReturnValue(false);
 
@@ -224,19 +218,17 @@ describe('useEditorSettings', () => {
     const scope = SettingScope.User;
 
     act(() => {
-      result.current.openEditorDialog();
-      result.current.handleEditorSelect(editorType, scope);
+      result.openEditorDialog();
+      result.handleEditorSelect(editorType, scope);
     });
 
     expect(mockLoadedSettings.setValue).not.toHaveBeenCalled();
     expect(mockAddItem).not.toHaveBeenCalled();
-    expect(result.current.isEditorDialogOpen).toBe(true);
+    expect(result.isEditorDialogOpen).toBe(true);
   });
 
   it('should not set preference for editors not allowed in sandbox', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     mockAllowEditorTypeInSandbox.mockReturnValue(false);
 
@@ -244,19 +236,17 @@ describe('useEditorSettings', () => {
     const scope = SettingScope.User;
 
     act(() => {
-      result.current.openEditorDialog();
-      result.current.handleEditorSelect(editorType, scope);
+      result.openEditorDialog();
+      result.handleEditorSelect(editorType, scope);
     });
 
     expect(mockLoadedSettings.setValue).not.toHaveBeenCalled();
     expect(mockAddItem).not.toHaveBeenCalled();
-    expect(result.current.isEditorDialogOpen).toBe(true);
+    expect(result.isEditorDialogOpen).toBe(true);
   });
 
   it('should handle errors during editor selection', () => {
-    const { result } = renderHook(() =>
-      useEditorSettings(mockLoadedSettings, mockSetEditorError, mockAddItem),
-    );
+    render(<TestComponent />);
 
     const errorMessage = 'Failed to save settings';
     (
@@ -271,14 +261,14 @@ describe('useEditorSettings', () => {
     const scope = SettingScope.User;
 
     act(() => {
-      result.current.openEditorDialog();
-      result.current.handleEditorSelect(editorType, scope);
+      result.openEditorDialog();
+      result.handleEditorSelect(editorType, scope);
     });
 
     expect(mockSetEditorError).toHaveBeenCalledWith(
       `Failed to set editor preference: Error: ${errorMessage}`,
     );
     expect(mockAddItem).not.toHaveBeenCalled();
-    expect(result.current.isEditorDialogOpen).toBe(true);
+    expect(result.isEditorDialogOpen).toBe(true);
   });
 });
diff --git a/packages/cli/src/ui/hooks/useExtensionUpdates.test.ts b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
similarity index 93%
rename from packages/cli/src/ui/hooks/useExtensionUpdates.test.ts
rename to packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
index b0949035d0..7d17a57611 100644
--- a/packages/cli/src/ui/hooks/useExtensionUpdates.test.ts
+++ b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
@@ -11,7 +11,7 @@ import * as path from 'node:path';
 import { createExtension } from '../../test-utils/createExtension.js';
 import { useExtensionUpdates } from './useExtensionUpdates.js';
 import { GEMINI_DIR, type GeminiCLIExtension } from '@google/gemini-cli-core';
-import { renderHook, waitFor } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import { MessageType } from '../types.js';
 import {
   checkForAllExtensionUpdates,
@@ -25,7 +25,7 @@ vi.mock('os', async (importOriginal) => {
   const mockedOs = await importOriginal<typeof os>();
   return {
     ...mockedOs,
-    homedir: vi.fn(),
+    homedir: vi.fn().mockReturnValue('/tmp/mock-home'),
   };
 });
 
@@ -96,15 +96,18 @@ describe('useExtensionUpdates', () => {
       },
     );
 
-    renderHook(() =>
+    function TestComponent() {
       useExtensionUpdates(
         extensions as GeminiCLIExtension[],
         extensionManager,
         addItem,
-      ),
-    );
+      );
+      return null;
+    }
 
-    await waitFor(() => {
+    render(<TestComponent />);
+
+    await vi.waitFor(() => {
       expect(addItem).toHaveBeenCalledWith(
         {
           type: MessageType.INFO,
@@ -148,11 +151,14 @@ describe('useExtensionUpdates', () => {
       name: '',
     });
 
-    renderHook(() =>
-      useExtensionUpdates([extension], extensionManager, addItem),
-    );
+    function TestComponent() {
+      useExtensionUpdates([extension], extensionManager, addItem);
+      return null;
+    }
 
-    await waitFor(
+    render(<TestComponent />);
+
+    await vi.waitFor(
       () => {
         expect(addItem).toHaveBeenCalledWith(
           {
@@ -226,11 +232,14 @@ describe('useExtensionUpdates', () => {
         name: '',
       });
 
-    renderHook(() =>
-      useExtensionUpdates(extensions, extensionManager, addItem),
-    );
+    function TestComponent() {
+      useExtensionUpdates(extensions, extensionManager, addItem);
+      return null;
+    }
 
-    await waitFor(
+    render(<TestComponent />);
+
+    await vi.waitFor(
       () => {
         expect(addItem).toHaveBeenCalledTimes(2);
         expect(addItem).toHaveBeenCalledWith(
@@ -308,15 +317,18 @@ describe('useExtensionUpdates', () => {
       },
     );
 
-    renderHook(() =>
+    function TestComponent() {
       useExtensionUpdates(
         extensions as GeminiCLIExtension[],
         extensionManager,
         addItem,
-      ),
-    );
+      );
+      return null;
+    }
 
-    await waitFor(() => {
+    render(<TestComponent />);
+
+    await vi.waitFor(() => {
       expect(addItem).toHaveBeenCalledTimes(1);
       expect(addItem).toHaveBeenCalledWith(
         {
diff --git a/packages/cli/src/ui/hooks/useFlickerDetector.test.ts b/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
index ffa1923a0d..aa60378648 100644
--- a/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
+++ b/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { renderHook } from '@testing-library/react';
 import { vi, type Mock } from 'vitest';
 import { useFlickerDetector } from './useFlickerDetector.js';
diff --git a/packages/cli/src/ui/hooks/useFocus.test.ts b/packages/cli/src/ui/hooks/useFocus.test.tsx
similarity index 82%
rename from packages/cli/src/ui/hooks/useFocus.test.ts
rename to packages/cli/src/ui/hooks/useFocus.test.tsx
index a4f784a18a..65c5c83b1a 100644
--- a/packages/cli/src/ui/hooks/useFocus.test.ts
+++ b/packages/cli/src/ui/hooks/useFocus.test.tsx
@@ -4,13 +4,13 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { renderHook, act } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import { EventEmitter } from 'node:events';
 import { useFocus } from './useFocus.js';
 import { vi, type Mock } from 'vitest';
 import { useStdin, useStdout } from 'ink';
 import { KeypressProvider } from '../contexts/KeypressContext.js';
-import React from 'react';
+import { act } from 'react';
 
 // Mock the ink hooks
 vi.mock('ink', async (importOriginal) => {
@@ -25,9 +25,6 @@ vi.mock('ink', async (importOriginal) => {
 const mockedUseStdin = vi.mocked(useStdin);
 const mockedUseStdout = vi.mocked(useStdout);
 
-const wrapper = ({ children }: { children: React.ReactNode }) =>
-  React.createElement(KeypressProvider, null, children);
-
 describe('useFocus', () => {
   let stdin: EventEmitter & { resume: Mock; pause: Mock };
   let stdout: { write: Mock };
@@ -51,15 +48,36 @@ describe('useFocus', () => {
     stdin.removeAllListeners();
   });
 
+  const renderFocusHook = () => {
+    let hookResult: ReturnType<typeof useFocus>;
+    function TestComponent() {
+      hookResult = useFocus();
+      return null;
+    }
+    const { unmount } = render(
+      <KeypressProvider kittyProtocolEnabled={false}>
+        <TestComponent />
+      </KeypressProvider>,
+    );
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      unmount,
+    };
+  };
+
   it('should initialize with focus and enable focus reporting', () => {
-    const { result } = renderHook(() => useFocus(), { wrapper });
+    const { result } = renderFocusHook();
 
     expect(result.current).toBe(true);
     expect(stdout.write).toHaveBeenCalledWith('\x1b[?1004h');
   });
 
   it('should set isFocused to false when a focus-out event is received', () => {
-    const { result } = renderHook(() => useFocus(), { wrapper });
+    const { result } = renderFocusHook();
 
     // Initial state is focused
     expect(result.current).toBe(true);
@@ -74,7 +92,7 @@ describe('useFocus', () => {
   });
 
   it('should set isFocused to true when a focus-in event is received', () => {
-    const { result } = renderHook(() => useFocus(), { wrapper });
+    const { result } = renderFocusHook();
 
     // Simulate focus-out to set initial state to false
     act(() => {
@@ -92,7 +110,7 @@ describe('useFocus', () => {
   });
 
   it('should clean up and disable focus reporting on unmount', () => {
-    const { unmount } = renderHook(() => useFocus(), { wrapper });
+    const { unmount } = renderFocusHook();
 
     // At this point we should have listeners from both KeypressProvider and useFocus
     const listenerCountAfterMount = stdin.listenerCount('data');
@@ -107,7 +125,7 @@ describe('useFocus', () => {
   });
 
   it('should handle multiple focus events correctly', () => {
-    const { result } = renderHook(() => useFocus(), { wrapper });
+    const { result } = renderFocusHook();
 
     act(() => {
       stdin.emit('data', Buffer.from('\x1b[O'));
@@ -131,7 +149,7 @@ describe('useFocus', () => {
   });
 
   it('restores focus on keypress after focus is lost', () => {
-    const { result } = renderHook(() => useFocus(), { wrapper });
+    const { result } = renderFocusHook();
 
     // Simulate focus-out event
     act(() => {
diff --git a/packages/cli/src/ui/hooks/useFolderTrust.test.ts b/packages/cli/src/ui/hooks/useFolderTrust.test.ts
index 6be20a3e63..cc663a11d9 100644
--- a/packages/cli/src/ui/hooks/useFolderTrust.test.ts
+++ b/packages/cli/src/ui/hooks/useFolderTrust.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { vi, type Mock, type MockInstance } from 'vitest';
 import { renderHook, act } from '@testing-library/react';
 import { useFolderTrust } from './useFolderTrust.js';
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 02db0f466e..14a596c9e1 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import type { Mock, MockInstance } from 'vitest';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
diff --git a/packages/cli/src/ui/hooks/useGitBranchName.test.ts b/packages/cli/src/ui/hooks/useGitBranchName.test.tsx
similarity index 85%
rename from packages/cli/src/ui/hooks/useGitBranchName.test.ts
rename to packages/cli/src/ui/hooks/useGitBranchName.test.tsx
index 7688a48916..9695c60b67 100644
--- a/packages/cli/src/ui/hooks/useGitBranchName.test.ts
+++ b/packages/cli/src/ui/hooks/useGitBranchName.test.tsx
@@ -7,7 +7,7 @@
 import type { MockedFunction } from 'vitest';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { act } from 'react';
-import { renderHook, waitFor } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import { useGitBranchName } from './useGitBranchName.js';
 import { fs, vol } from 'memfs';
 import * as fsPromises from 'node:fs/promises';
@@ -54,13 +54,31 @@ describe('useGitBranchName', () => {
     vi.restoreAllMocks();
   });
 
+  const renderGitBranchNameHook = (cwd: string) => {
+    let hookResult: ReturnType<typeof useGitBranchName>;
+    function TestComponent() {
+      hookResult = useGitBranchName(cwd);
+      return null;
+    }
+    const { rerender, unmount } = render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: () => rerender(<TestComponent />),
+      unmount,
+    };
+  };
+
   it('should return branch name', async () => {
     (mockSpawnAsync as MockedFunction<typeof mockSpawnAsync>).mockResolvedValue(
       {
         stdout: 'main\n',
       } as { stdout: string; stderr: string },
     );
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
 
     await act(async () => {
       rerender(); // Rerender to get the updated state
@@ -74,7 +92,7 @@ describe('useGitBranchName', () => {
       new Error('Git error'),
     );
 
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
     expect(result.current).toBeUndefined();
 
     await act(async () => {
@@ -95,7 +113,7 @@ describe('useGitBranchName', () => {
       return { stdout: '' } as { stdout: string; stderr: string };
     });
 
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
     await act(async () => {
       rerender();
     });
@@ -114,7 +132,7 @@ describe('useGitBranchName', () => {
       return { stdout: '' } as { stdout: string; stderr: string };
     });
 
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
     await act(async () => {
       rerender();
     });
@@ -135,7 +153,7 @@ describe('useGitBranchName', () => {
         stderr: string;
       });
 
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
 
     await act(async () => {
       rerender();
@@ -143,7 +161,7 @@ describe('useGitBranchName', () => {
     expect(result.current).toBe('main');
 
     // Wait for watcher to be set up
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(watchSpy).toHaveBeenCalled();
     });
 
@@ -153,7 +171,7 @@ describe('useGitBranchName', () => {
       rerender();
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current).toBe('develop');
     });
   });
@@ -168,7 +186,7 @@ describe('useGitBranchName', () => {
       } as { stdout: string; stderr: string },
     );
 
-    const { result, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { result, rerender } = renderGitBranchNameHook(CWD);
 
     await act(async () => {
       rerender();
@@ -211,14 +229,14 @@ describe('useGitBranchName', () => {
       } as { stdout: string; stderr: string },
     );
 
-    const { unmount, rerender } = renderHook(() => useGitBranchName(CWD));
+    const { unmount, rerender } = renderGitBranchNameHook(CWD);
 
     await act(async () => {
       rerender();
     });
 
     // Wait for watcher to be set up BEFORE unmounting
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(watchMock).toHaveBeenCalledWith(
         GIT_LOGS_HEAD_PATH,
         expect.any(Function),
diff --git a/packages/cli/src/ui/hooks/useHistoryManager.test.ts b/packages/cli/src/ui/hooks/useHistoryManager.test.ts
index c6f600323e..d813379ac2 100644
--- a/packages/cli/src/ui/hooks/useHistoryManager.test.ts
+++ b/packages/cli/src/ui/hooks/useHistoryManager.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { describe, it, expect } from 'vitest';
 import { renderHook, act } from '@testing-library/react';
 import { useHistory } from './useHistoryManager.js';
diff --git a/packages/cli/src/ui/hooks/useIdeTrustListener.test.ts b/packages/cli/src/ui/hooks/useIdeTrustListener.test.tsx
similarity index 90%
rename from packages/cli/src/ui/hooks/useIdeTrustListener.test.ts
rename to packages/cli/src/ui/hooks/useIdeTrustListener.test.tsx
index e3d62a218c..3bc84f8553 100644
--- a/packages/cli/src/ui/hooks/useIdeTrustListener.test.ts
+++ b/packages/cli/src/ui/hooks/useIdeTrustListener.test.tsx
@@ -4,9 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { renderHook, act } from '@testing-library/react';
+import { render } from 'ink-testing-library';
+import { act } from 'react';
 import { vi, describe, it, expect, beforeEach } from 'vitest';
 import {
   IdeClient,
@@ -79,13 +78,30 @@ describe('useIdeTrustListener', () => {
     );
   });
 
+  const renderTrustListenerHook = () => {
+    let hookResult: ReturnType<typeof useIdeTrustListener>;
+    function TestComponent() {
+      hookResult = useIdeTrustListener();
+      return null;
+    }
+    const { rerender } = render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: () => rerender(<TestComponent />),
+    };
+  };
+
   it('should initialize correctly with no trust information', () => {
     vi.mocked(trustedFolders.isWorkspaceTrusted).mockReturnValue({
       isTrusted: undefined,
       source: undefined,
     });
 
-    const { result } = renderHook(() => useIdeTrustListener());
+    const { result } = renderTrustListenerHook();
 
     expect(result.current.isIdeTrusted).toBe(undefined);
     expect(result.current.needsRestart).toBe(false);
@@ -100,7 +116,7 @@ describe('useIdeTrustListener', () => {
       isTrusted: true,
       source: 'ide',
     });
-    const { result } = renderHook(() => useIdeTrustListener());
+    const { result } = renderTrustListenerHook();
 
     // Manually trigger the initial connection state for the test setup
     await act(async () => {
@@ -134,7 +150,7 @@ describe('useIdeTrustListener', () => {
       source: 'ide',
     });
 
-    const { result } = renderHook(() => useIdeTrustListener());
+    const { result } = renderTrustListenerHook();
 
     // Manually trigger the initial connection state for the test setup
     await act(async () => {
@@ -172,7 +188,7 @@ describe('useIdeTrustListener', () => {
       source: 'ide',
     });
 
-    const { result } = renderHook(() => useIdeTrustListener());
+    const { result } = renderTrustListenerHook();
 
     // Manually trigger the initial connection state for the test setup
     await act(async () => {
@@ -208,7 +224,7 @@ describe('useIdeTrustListener', () => {
       source: 'ide',
     });
 
-    const { result, rerender } = renderHook(() => useIdeTrustListener());
+    const { result, rerender } = renderTrustListenerHook();
 
     // Manually trigger the initial connection state for the test setup
     await act(async () => {
diff --git a/packages/cli/src/ui/hooks/useInputHistory.test.ts b/packages/cli/src/ui/hooks/useInputHistory.test.ts
index 8d10c376b6..55e0b63182 100644
--- a/packages/cli/src/ui/hooks/useInputHistory.test.ts
+++ b/packages/cli/src/ui/hooks/useInputHistory.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { act, renderHook } from '@testing-library/react';
 import { useInputHistory } from './useInputHistory.js';
 
diff --git a/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts b/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
index 5404cefc02..6953ce1b37 100644
--- a/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
+++ b/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { act, renderHook } from '@testing-library/react';
 import { vi, describe, it, expect, beforeEach } from 'vitest';
 import { useInputHistoryStore } from './useInputHistoryStore.js';
diff --git a/packages/cli/src/ui/hooks/useKeypress.test.ts b/packages/cli/src/ui/hooks/useKeypress.test.tsx
similarity index 83%
rename from packages/cli/src/ui/hooks/useKeypress.test.ts
rename to packages/cli/src/ui/hooks/useKeypress.test.tsx
index 07fcf62ead..aecc4fd876 100644
--- a/packages/cli/src/ui/hooks/useKeypress.test.ts
+++ b/packages/cli/src/ui/hooks/useKeypress.test.tsx
@@ -4,8 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import React from 'react';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useKeypress } from './useKeypress.js';
 import { KeypressProvider } from '../contexts/KeypressContext.js';
 import { useStdin } from 'ink';
@@ -44,8 +44,17 @@ describe('useKeypress', () => {
   const onKeypress = vi.fn();
   let originalNodeVersion: string;
 
-  const wrapper = ({ children }: { children: React.ReactNode }) =>
-    React.createElement(KeypressProvider, null, children);
+  const renderKeypressHook = (isActive = true) => {
+    function TestComponent() {
+      useKeypress(onKeypress, { isActive });
+      return null;
+    }
+    return render(
+      <KeypressProvider kittyProtocolEnabled={false}>
+        <TestComponent />
+      </KeypressProvider>,
+    );
+  };
 
   beforeEach(() => {
     vi.clearAllMocks();
@@ -67,9 +76,7 @@ describe('useKeypress', () => {
   });
 
   it('should not listen if isActive is false', () => {
-    renderHook(() => useKeypress(onKeypress, { isActive: false }), {
-      wrapper,
-    });
+    renderKeypressHook(false);
     act(() => stdin.write('a'));
     expect(onKeypress).not.toHaveBeenCalled();
   });
@@ -81,33 +88,27 @@ describe('useKeypress', () => {
     { key: { name: 'up', sequence: '\x1b[A' } },
     { key: { name: 'down', sequence: '\x1b[B' } },
   ])('should listen for keypress when active for key $key.name', ({ key }) => {
-    renderHook(() => useKeypress(onKeypress, { isActive: true }), { wrapper });
+    renderKeypressHook(true);
     act(() => stdin.write(key.sequence));
     expect(onKeypress).toHaveBeenCalledWith(expect.objectContaining(key));
   });
 
   it('should set and release raw mode', () => {
-    const { unmount } = renderHook(
-      () => useKeypress(onKeypress, { isActive: true }),
-      { wrapper },
-    );
+    const { unmount } = renderKeypressHook(true);
     expect(mockSetRawMode).toHaveBeenCalledWith(true);
     unmount();
     expect(mockSetRawMode).toHaveBeenCalledWith(false);
   });
 
   it('should stop listening after being unmounted', () => {
-    const { unmount } = renderHook(
-      () => useKeypress(onKeypress, { isActive: true }),
-      { wrapper },
-    );
+    const { unmount } = renderKeypressHook(true);
     unmount();
     act(() => stdin.write('a'));
     expect(onKeypress).not.toHaveBeenCalled();
   });
 
   it('should correctly identify alt+enter (meta key)', () => {
-    renderHook(() => useKeypress(onKeypress, { isActive: true }), { wrapper });
+    renderKeypressHook(true);
     const key = { name: 'return', sequence: '\x1B\r' };
     act(() => stdin.write(key.sequence));
     expect(onKeypress).toHaveBeenCalledWith(
@@ -130,9 +131,7 @@ describe('useKeypress', () => {
     });
 
     it('should process a paste as a single event', () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
       const pasteText = 'hello world';
       act(() => stdin.write(PASTE_START + pasteText + PASTE_END));
 
@@ -148,9 +147,7 @@ describe('useKeypress', () => {
     });
 
     it('should handle keypress interspersed with pastes', () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
 
       const keyA = { name: 'a', sequence: 'a' };
       act(() => stdin.write('a'));
@@ -174,9 +171,7 @@ describe('useKeypress', () => {
     });
 
     it('should handle lone pastes', () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
 
       const pasteText = 'pasted';
       act(() => {
@@ -192,9 +187,7 @@ describe('useKeypress', () => {
     });
 
     it('should handle paste false alarm', () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
 
       act(() => {
         stdin.write(PASTE_START.slice(0, 5));
@@ -211,9 +204,7 @@ describe('useKeypress', () => {
     });
 
     it('should handle back to back pastes', () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
 
       const pasteText1 = 'herp';
       const pasteText2 = 'derp';
@@ -238,9 +229,7 @@ describe('useKeypress', () => {
     });
 
     it('should handle pastes split across writes', async () => {
-      renderHook(() => useKeypress(onKeypress, { isActive: true }), {
-        wrapper,
-      });
+      renderKeypressHook(true);
 
       const keyA = { name: 'a', sequence: 'a' };
       act(() => stdin.write('a'));
@@ -272,10 +261,7 @@ describe('useKeypress', () => {
     });
 
     it('should emit partial paste content if unmounted mid-paste', () => {
-      const { unmount } = renderHook(
-        () => useKeypress(onKeypress, { isActive: true }),
-        { wrapper },
-      );
+      const { unmount } = renderKeypressHook(true);
       const pasteText = 'incomplete paste';
 
       act(() => stdin.write(PASTE_START + pasteText));
diff --git a/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts b/packages/cli/src/ui/hooks/useLoadingIndicator.test.tsx
similarity index 77%
rename from packages/cli/src/ui/hooks/useLoadingIndicator.test.ts
rename to packages/cli/src/ui/hooks/useLoadingIndicator.test.tsx
index 77e381b873..904010bcca 100644
--- a/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts
+++ b/packages/cli/src/ui/hooks/useLoadingIndicator.test.tsx
@@ -5,7 +5,8 @@
  */
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useLoadingIndicator } from './useLoadingIndicator.js';
 import { StreamingState } from '../types.js';
 import {
@@ -24,11 +25,35 @@ describe('useLoadingIndicator', () => {
     vi.restoreAllMocks();
   });
 
+  const renderLoadingIndicatorHook = (
+    initialStreamingState: StreamingState,
+  ) => {
+    let hookResult: ReturnType<typeof useLoadingIndicator>;
+    function TestComponent({
+      streamingState,
+    }: {
+      streamingState: StreamingState;
+    }) {
+      hookResult = useLoadingIndicator(streamingState);
+      return null;
+    }
+    const { rerender } = render(
+      <TestComponent streamingState={initialStreamingState} />,
+    );
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: (newProps: { streamingState: StreamingState }) =>
+        rerender(<TestComponent {...newProps} />),
+    };
+  };
+
   it('should initialize with default values when Idle', () => {
     vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result } = renderHook(() =>
-      useLoadingIndicator(StreamingState.Idle),
-    );
+    const { result } = renderLoadingIndicatorHook(StreamingState.Idle);
     expect(result.current.elapsedTime).toBe(0);
     expect(WITTY_LOADING_PHRASES).toContain(
       result.current.currentLoadingPhrase,
@@ -37,9 +62,7 @@ describe('useLoadingIndicator', () => {
 
   it('should reflect values when Responding', async () => {
     vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result } = renderHook(() =>
-      useLoadingIndicator(StreamingState.Responding),
-    );
+    const { result } = renderLoadingIndicatorHook(StreamingState.Responding);
 
     // Initial state before timers advance
     expect(result.current.elapsedTime).toBe(0);
@@ -58,9 +81,8 @@ describe('useLoadingIndicator', () => {
   });
 
   it('should show waiting phrase and retain elapsedTime when WaitingForConfirmation', async () => {
-    const { result, rerender } = renderHook(
-      ({ streamingState }) => useLoadingIndicator(streamingState),
-      { initialProps: { streamingState: StreamingState.Responding } },
+    const { result, rerender } = renderLoadingIndicatorHook(
+      StreamingState.Responding,
     );
 
     await act(async () => {
@@ -86,9 +108,8 @@ describe('useLoadingIndicator', () => {
 
   it('should reset elapsedTime and use a witty phrase when transitioning from WaitingForConfirmation to Responding', async () => {
     vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result, rerender } = renderHook(
-      ({ streamingState }) => useLoadingIndicator(streamingState),
-      { initialProps: { streamingState: StreamingState.Responding } },
+    const { result, rerender } = renderLoadingIndicatorHook(
+      StreamingState.Responding,
     );
 
     await act(async () => {
@@ -120,9 +141,8 @@ describe('useLoadingIndicator', () => {
 
   it('should reset timer and phrase when streamingState changes from Responding to Idle', async () => {
     vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result, rerender } = renderHook(
-      ({ streamingState }) => useLoadingIndicator(streamingState),
-      { initialProps: { streamingState: StreamingState.Responding } },
+    const { result, rerender } = renderLoadingIndicatorHook(
+      StreamingState.Responding,
     );
 
     await act(async () => {
diff --git a/packages/cli/src/ui/hooks/useMemoryMonitor.test.ts b/packages/cli/src/ui/hooks/useMemoryMonitor.test.tsx
similarity index 87%
rename from packages/cli/src/ui/hooks/useMemoryMonitor.test.ts
rename to packages/cli/src/ui/hooks/useMemoryMonitor.test.tsx
index 3250a33833..4fb3db97e1 100644
--- a/packages/cli/src/ui/hooks/useMemoryMonitor.test.ts
+++ b/packages/cli/src/ui/hooks/useMemoryMonitor.test.tsx
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { renderHook } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import { vi } from 'vitest';
 import {
   useMemoryMonitor,
@@ -27,11 +27,16 @@ describe('useMemoryMonitor', () => {
     vi.useRealTimers();
   });
 
+  function TestComponent() {
+    useMemoryMonitor({ addItem });
+    return null;
+  }
+
   it('should not warn when memory usage is below threshold', () => {
     memoryUsageSpy.mockReturnValue({
       rss: MEMORY_WARNING_THRESHOLD / 2,
     } as NodeJS.MemoryUsage);
-    renderHook(() => useMemoryMonitor({ addItem }));
+    render(<TestComponent />);
     vi.advanceTimersByTime(10000);
     expect(addItem).not.toHaveBeenCalled();
   });
@@ -40,7 +45,7 @@ describe('useMemoryMonitor', () => {
     memoryUsageSpy.mockReturnValue({
       rss: MEMORY_WARNING_THRESHOLD * 1.5,
     } as NodeJS.MemoryUsage);
-    renderHook(() => useMemoryMonitor({ addItem }));
+    render(<TestComponent />);
     vi.advanceTimersByTime(MEMORY_CHECK_INTERVAL);
     expect(addItem).toHaveBeenCalledTimes(1);
     expect(addItem).toHaveBeenCalledWith(
@@ -56,7 +61,7 @@ describe('useMemoryMonitor', () => {
     memoryUsageSpy.mockReturnValue({
       rss: MEMORY_WARNING_THRESHOLD * 1.5,
     } as NodeJS.MemoryUsage);
-    const { rerender } = renderHook(() => useMemoryMonitor({ addItem }));
+    const { rerender } = render(<TestComponent />);
     vi.advanceTimersByTime(MEMORY_CHECK_INTERVAL);
     expect(addItem).toHaveBeenCalledTimes(1);
 
@@ -64,7 +69,7 @@ describe('useMemoryMonitor', () => {
     memoryUsageSpy.mockReturnValue({
       rss: MEMORY_WARNING_THRESHOLD * 1.5,
     } as NodeJS.MemoryUsage);
-    rerender();
+    rerender(<TestComponent />);
     vi.advanceTimersByTime(MEMORY_CHECK_INTERVAL);
     expect(addItem).toHaveBeenCalledTimes(1);
   });
diff --git a/packages/cli/src/ui/hooks/useMessageQueue.test.ts b/packages/cli/src/ui/hooks/useMessageQueue.test.tsx
similarity index 69%
rename from packages/cli/src/ui/hooks/useMessageQueue.test.ts
rename to packages/cli/src/ui/hooks/useMessageQueue.test.tsx
index d28f5fb250..001897bb5d 100644
--- a/packages/cli/src/ui/hooks/useMessageQueue.test.ts
+++ b/packages/cli/src/ui/hooks/useMessageQueue.test.tsx
@@ -5,7 +5,8 @@
  */
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useMessageQueue } from './useMessageQueue.js';
 import { StreamingState } from '../types.js';
 
@@ -22,27 +23,45 @@ describe('useMessageQueue', () => {
     vi.clearAllMocks();
   });
 
+  const renderMessageQueueHook = (initialProps: {
+    isConfigInitialized: boolean;
+    streamingState: StreamingState;
+    submitQuery: (query: string) => void;
+  }) => {
+    let hookResult: ReturnType<typeof useMessageQueue>;
+    function TestComponent(props: typeof initialProps) {
+      hookResult = useMessageQueue(props);
+      return null;
+    }
+    const { rerender } = render(<TestComponent {...initialProps} />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: (newProps: Partial<typeof initialProps>) =>
+        rerender(<TestComponent {...initialProps} {...newProps} />),
+    };
+  };
+
   it('should initialize with empty queue', () => {
-    const { result } = renderHook(() =>
-      useMessageQueue({
-        isConfigInitialized: true,
-        streamingState: StreamingState.Idle,
-        submitQuery: mockSubmitQuery,
-      }),
-    );
+    const { result } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Idle,
+      submitQuery: mockSubmitQuery,
+    });
 
     expect(result.current.messageQueue).toEqual([]);
     expect(result.current.getQueuedMessagesText()).toBe('');
   });
 
   it('should add messages to queue', () => {
-    const { result } = renderHook(() =>
-      useMessageQueue({
-        isConfigInitialized: true,
-        streamingState: StreamingState.Responding,
-        submitQuery: mockSubmitQuery,
-      }),
-    );
+    const { result } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     act(() => {
       result.current.addMessage('Test message 1');
@@ -56,13 +75,11 @@ describe('useMessageQueue', () => {
   });
 
   it('should filter out empty messages', () => {
-    const { result } = renderHook(() =>
-      useMessageQueue({
-        isConfigInitialized: true,
-        streamingState: StreamingState.Responding,
-        submitQuery: mockSubmitQuery,
-      }),
-    );
+    const { result } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     act(() => {
       result.current.addMessage('Valid message');
@@ -78,13 +95,11 @@ describe('useMessageQueue', () => {
   });
 
   it('should clear queue', () => {
-    const { result } = renderHook(() =>
-      useMessageQueue({
-        isConfigInitialized: true,
-        streamingState: StreamingState.Responding,
-        submitQuery: mockSubmitQuery,
-      }),
-    );
+    const { result } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     act(() => {
       result.current.addMessage('Test message');
@@ -100,13 +115,11 @@ describe('useMessageQueue', () => {
   });
 
   it('should return queued messages as text with double newlines', () => {
-    const { result } = renderHook(() =>
-      useMessageQueue({
-        isConfigInitialized: true,
-        streamingState: StreamingState.Responding,
-        submitQuery: mockSubmitQuery,
-      }),
-    );
+    const { result } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     act(() => {
       result.current.addMessage('Message 1');
@@ -119,18 +132,12 @@ describe('useMessageQueue', () => {
     );
   });
 
-  it('should auto-submit queued messages when transitioning to Idle', () => {
-    const { result, rerender } = renderHook(
-      ({ streamingState }) =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState,
-          submitQuery: mockSubmitQuery,
-        }),
-      {
-        initialProps: { streamingState: StreamingState.Responding },
-      },
-    );
+  it('should auto-submit queued messages when transitioning to Idle', async () => {
+    const { result, rerender } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     // Add some messages
     act(() => {
@@ -143,22 +150,18 @@ describe('useMessageQueue', () => {
     // Transition to Idle
     rerender({ streamingState: StreamingState.Idle });
 
-    expect(mockSubmitQuery).toHaveBeenCalledWith('Message 1\n\nMessage 2');
-    expect(result.current.messageQueue).toEqual([]);
+    await vi.waitFor(() => {
+      expect(mockSubmitQuery).toHaveBeenCalledWith('Message 1\n\nMessage 2');
+      expect(result.current.messageQueue).toEqual([]);
+    });
   });
 
   it('should not auto-submit when queue is empty', () => {
-    const { rerender } = renderHook(
-      ({ streamingState }) =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState,
-          submitQuery: mockSubmitQuery,
-        }),
-      {
-        initialProps: { streamingState: StreamingState.Responding },
-      },
-    );
+    const { rerender } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     // Transition to Idle with empty queue
     rerender({ streamingState: StreamingState.Idle });
@@ -167,17 +170,11 @@ describe('useMessageQueue', () => {
   });
 
   it('should not auto-submit when not transitioning to Idle', () => {
-    const { result, rerender } = renderHook(
-      ({ streamingState }) =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState,
-          submitQuery: mockSubmitQuery,
-        }),
-      {
-        initialProps: { streamingState: StreamingState.Responding },
-      },
-    );
+    const { result, rerender } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Responding,
+      submitQuery: mockSubmitQuery,
+    });
 
     // Add messages
     act(() => {
@@ -191,18 +188,12 @@ describe('useMessageQueue', () => {
     expect(result.current.messageQueue).toEqual(['Message 1']);
   });
 
-  it('should handle multiple state transitions correctly', () => {
-    const { result, rerender } = renderHook(
-      ({ streamingState }) =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState,
-          submitQuery: mockSubmitQuery,
-        }),
-      {
-        initialProps: { streamingState: StreamingState.Idle },
-      },
-    );
+  it('should handle multiple state transitions correctly', async () => {
+    const { result, rerender } = renderMessageQueueHook({
+      isConfigInitialized: true,
+      streamingState: StreamingState.Idle,
+      submitQuery: mockSubmitQuery,
+    });
 
     // Start responding
     rerender({ streamingState: StreamingState.Responding });
@@ -215,8 +206,10 @@ describe('useMessageQueue', () => {
     // Go back to idle - should submit
     rerender({ streamingState: StreamingState.Idle });
 
-    expect(mockSubmitQuery).toHaveBeenCalledWith('First batch');
-    expect(result.current.messageQueue).toEqual([]);
+    await vi.waitFor(() => {
+      expect(mockSubmitQuery).toHaveBeenCalledWith('First batch');
+      expect(result.current.messageQueue).toEqual([]);
+    });
 
     // Start responding again
     rerender({ streamingState: StreamingState.Responding });
@@ -229,19 +222,19 @@ describe('useMessageQueue', () => {
     // Go back to idle - should submit again
     rerender({ streamingState: StreamingState.Idle });
 
-    expect(mockSubmitQuery).toHaveBeenCalledWith('Second batch');
-    expect(mockSubmitQuery).toHaveBeenCalledTimes(2);
+    await vi.waitFor(() => {
+      expect(mockSubmitQuery).toHaveBeenCalledWith('Second batch');
+      expect(mockSubmitQuery).toHaveBeenCalledTimes(2);
+    });
   });
 
   describe('popAllMessages', () => {
     it('should pop all messages and return them joined with double newlines', () => {
-      const { result } = renderHook(() =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState: StreamingState.Responding,
-          submitQuery: mockSubmitQuery,
-        }),
-      );
+      const { result } = renderMessageQueueHook({
+        isConfigInitialized: true,
+        streamingState: StreamingState.Responding,
+        submitQuery: mockSubmitQuery,
+      });
 
       // Add multiple messages
       act(() => {
@@ -269,13 +262,11 @@ describe('useMessageQueue', () => {
     });
 
     it('should return undefined when queue is empty', () => {
-      const { result } = renderHook(() =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState: StreamingState.Responding,
-          submitQuery: mockSubmitQuery,
-        }),
-      );
+      const { result } = renderMessageQueueHook({
+        isConfigInitialized: true,
+        streamingState: StreamingState.Responding,
+        submitQuery: mockSubmitQuery,
+      });
 
       let poppedMessages: string | undefined = 'not-undefined';
       act(() => {
@@ -289,13 +280,11 @@ describe('useMessageQueue', () => {
     });
 
     it('should handle single message correctly', () => {
-      const { result } = renderHook(() =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState: StreamingState.Responding,
-          submitQuery: mockSubmitQuery,
-        }),
-      );
+      const { result } = renderMessageQueueHook({
+        isConfigInitialized: true,
+        streamingState: StreamingState.Responding,
+        submitQuery: mockSubmitQuery,
+      });
 
       act(() => {
         result.current.addMessage('Single message');
@@ -313,13 +302,11 @@ describe('useMessageQueue', () => {
     });
 
     it('should clear the entire queue after popping', () => {
-      const { result } = renderHook(() =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState: StreamingState.Responding,
-          submitQuery: mockSubmitQuery,
-        }),
-      );
+      const { result } = renderMessageQueueHook({
+        isConfigInitialized: true,
+        streamingState: StreamingState.Responding,
+        submitQuery: mockSubmitQuery,
+      });
 
       act(() => {
         result.current.addMessage('Message 1');
@@ -346,13 +333,11 @@ describe('useMessageQueue', () => {
     });
 
     it('should work correctly with state updates', () => {
-      const { result } = renderHook(() =>
-        useMessageQueue({
-          isConfigInitialized: true,
-          streamingState: StreamingState.Responding,
-          submitQuery: mockSubmitQuery,
-        }),
-      );
+      const { result } = renderMessageQueueHook({
+        isConfigInitialized: true,
+        streamingState: StreamingState.Responding,
+        submitQuery: mockSubmitQuery,
+      });
 
       // Add messages
       act(() => {
diff --git a/packages/cli/src/ui/hooks/useModelCommand.test.ts b/packages/cli/src/ui/hooks/useModelCommand.test.ts
deleted file mode 100644
index 30cbe7e56a..0000000000
--- a/packages/cli/src/ui/hooks/useModelCommand.test.ts
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { describe, it, expect } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
-import { useModelCommand } from './useModelCommand.js';
-
-describe('useModelCommand', () => {
-  it('should initialize with the model dialog closed', () => {
-    const { result } = renderHook(() => useModelCommand());
-    expect(result.current.isModelDialogOpen).toBe(false);
-  });
-
-  it('should open the model dialog when openModelDialog is called', () => {
-    const { result } = renderHook(() => useModelCommand());
-
-    act(() => {
-      result.current.openModelDialog();
-    });
-
-    expect(result.current.isModelDialogOpen).toBe(true);
-  });
-
-  it('should close the model dialog when closeModelDialog is called', () => {
-    const { result } = renderHook(() => useModelCommand());
-
-    // Open it first
-    act(() => {
-      result.current.openModelDialog();
-    });
-    expect(result.current.isModelDialogOpen).toBe(true);
-
-    // Then close it
-    act(() => {
-      result.current.closeModelDialog();
-    });
-    expect(result.current.isModelDialogOpen).toBe(false);
-  });
-});
diff --git a/packages/cli/src/ui/hooks/useModelCommand.test.tsx b/packages/cli/src/ui/hooks/useModelCommand.test.tsx
new file mode 100644
index 0000000000..0717ab6414
--- /dev/null
+++ b/packages/cli/src/ui/hooks/useModelCommand.test.tsx
@@ -0,0 +1,50 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
+import { useModelCommand } from './useModelCommand.js';
+
+describe('useModelCommand', () => {
+  let result: ReturnType<typeof useModelCommand>;
+
+  function TestComponent() {
+    result = useModelCommand();
+    return null;
+  }
+
+  it('should initialize with the model dialog closed', () => {
+    render(<TestComponent />);
+    expect(result.isModelDialogOpen).toBe(false);
+  });
+
+  it('should open the model dialog when openModelDialog is called', () => {
+    render(<TestComponent />);
+
+    act(() => {
+      result.openModelDialog();
+    });
+
+    expect(result.isModelDialogOpen).toBe(true);
+  });
+
+  it('should close the model dialog when closeModelDialog is called', () => {
+    render(<TestComponent />);
+
+    // Open it first
+    act(() => {
+      result.openModelDialog();
+    });
+    expect(result.isModelDialogOpen).toBe(true);
+
+    // Then close it
+    act(() => {
+      result.closeModelDialog();
+    });
+    expect(result.isModelDialogOpen).toBe(false);
+  });
+});
diff --git a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts b/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
index 519752e82b..9549274160 100644
--- a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
+++ b/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 /// <reference types="vitest/globals" />
 
 import {
diff --git a/packages/cli/src/ui/hooks/usePhraseCycler.test.ts b/packages/cli/src/ui/hooks/usePhraseCycler.test.ts
index 538f6d204b..bfa53ff8c8 100644
--- a/packages/cli/src/ui/hooks/usePhraseCycler.test.ts
+++ b/packages/cli/src/ui/hooks/usePhraseCycler.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { renderHook, act } from '@testing-library/react';
 import {
diff --git a/packages/cli/src/ui/hooks/usePrivacySettings.test.ts b/packages/cli/src/ui/hooks/usePrivacySettings.test.tsx
similarity index 81%
rename from packages/cli/src/ui/hooks/usePrivacySettings.test.ts
rename to packages/cli/src/ui/hooks/usePrivacySettings.test.tsx
index 30dd0c4483..5c2a15d579 100644
--- a/packages/cli/src/ui/hooks/usePrivacySettings.test.ts
+++ b/packages/cli/src/ui/hooks/usePrivacySettings.test.tsx
@@ -5,7 +5,7 @@
  */
 
 import { describe, it, expect, beforeEach, vi } from 'vitest';
-import { renderHook, waitFor } from '@testing-library/react';
+import { render } from 'ink-testing-library';
 import type {
   Config,
   CodeAssistServer,
@@ -31,12 +31,28 @@ describe('usePrivacySettings', () => {
     vi.clearAllMocks();
   });
 
+  const renderPrivacySettingsHook = () => {
+    let hookResult: ReturnType<typeof usePrivacySettings>;
+    function TestComponent() {
+      hookResult = usePrivacySettings(mockConfig);
+      return null;
+    }
+    render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+    };
+  };
+
   it('should throw error when content generator is not a CodeAssistServer', async () => {
     vi.mocked(getCodeAssistServer).mockReturnValue(undefined);
 
-    const { result } = renderHook(() => usePrivacySettings(mockConfig));
+    const { result } = renderPrivacySettingsHook();
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current.privacyState.isLoading).toBe(false);
     });
 
@@ -53,9 +69,9 @@ describe('usePrivacySettings', () => {
         }) as unknown as LoadCodeAssistResponse,
     } as unknown as CodeAssistServer);
 
-    const { result } = renderHook(() => usePrivacySettings(mockConfig));
+    const { result } = renderPrivacySettingsHook();
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current.privacyState.isLoading).toBe(false);
     });
 
@@ -72,9 +88,9 @@ describe('usePrivacySettings', () => {
         }) as unknown as LoadCodeAssistResponse,
     } as unknown as CodeAssistServer);
 
-    const { result } = renderHook(() => usePrivacySettings(mockConfig));
+    const { result } = renderPrivacySettingsHook();
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current.privacyState.isLoading).toBe(false);
     });
 
@@ -99,10 +115,10 @@ describe('usePrivacySettings', () => {
     } as unknown as CodeAssistServer;
     vi.mocked(getCodeAssistServer).mockReturnValue(mockCodeAssistServer);
 
-    const { result } = renderHook(() => usePrivacySettings(mockConfig));
+    const { result } = renderPrivacySettingsHook();
 
     // Wait for initial load
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current.privacyState.isLoading).toBe(false);
     });
 
@@ -110,7 +126,7 @@ describe('usePrivacySettings', () => {
     await result.current.updateDataCollectionOptIn(false);
 
     // Wait for update to complete
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(result.current.privacyState.dataCollectionOptIn).toBe(false);
     });
 
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
index 0e94a1874d..e3a86009dd 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import {
   vi,
   describe,
diff --git a/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts b/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
index b3fcfad8b7..ac38b5d1e4 100644
--- a/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { CoreToolScheduler } from '@google/gemini-cli-core';
 import type { Config } from '@google/gemini-cli-core';
 import { renderHook } from '@testing-library/react';
diff --git a/packages/cli/src/ui/hooks/useSelectionList.test.ts b/packages/cli/src/ui/hooks/useSelectionList.test.tsx
similarity index 64%
rename from packages/cli/src/ui/hooks/useSelectionList.test.ts
rename to packages/cli/src/ui/hooks/useSelectionList.test.tsx
index a8878d195c..9ee99746ca 100644
--- a/packages/cli/src/ui/hooks/useSelectionList.test.ts
+++ b/packages/cli/src/ui/hooks/useSelectionList.test.tsx
@@ -5,7 +5,8 @@
  */
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import {
   useSelectionList,
   type SelectionListItem,
@@ -66,40 +67,64 @@ describe('useSelectionList', () => {
     });
   };
 
+  const renderSelectionListHook = (initialProps: {
+    items: Array<SelectionListItem<string>>;
+    onSelect: (item: string) => void;
+    onHighlight?: (item: string) => void;
+    initialIndex?: number;
+    isFocused?: boolean;
+    showNumbers?: boolean;
+  }) => {
+    let hookResult: ReturnType<typeof useSelectionList>;
+    function TestComponent(props: typeof initialProps) {
+      hookResult = useSelectionList(props);
+      return null;
+    }
+    const { rerender, unmount } = render(<TestComponent {...initialProps} />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: (newProps: Partial<typeof initialProps>) =>
+        rerender(<TestComponent {...initialProps} {...newProps} />),
+      unmount,
+    };
+  };
+
   describe('Initialization', () => {
     it('should initialize with the default index (0) if enabled', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items, onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(0);
     });
 
     it('should initialize with the provided initialIndex if enabled', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: 2,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        initialIndex: 2,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(2);
     });
 
     it('should handle an empty list gracefully', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items: [], onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items: [],
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(0);
     });
 
     it('should find the next enabled item (downwards) if initialIndex is disabled', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: 1,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        initialIndex: 1,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(2);
     });
 
@@ -109,33 +134,27 @@ describe('useSelectionList', () => {
         { value: 'B', disabled: true, key: 'B' },
         { value: 'C', disabled: true, key: 'C' },
       ];
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: wrappingItems,
-          initialIndex: 2,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: wrappingItems,
+        initialIndex: 2,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(0);
     });
 
     it('should default to 0 if initialIndex is out of bounds', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: 10,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        initialIndex: 10,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(0);
 
-      const { result: resultNeg } = renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: -1,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result: resultNeg } = renderSelectionListHook({
+        items,
+        initialIndex: -1,
+        onSelect: mockOnSelect,
+      });
       expect(resultNeg.current.activeIndex).toBe(0);
     });
 
@@ -144,22 +163,21 @@ describe('useSelectionList', () => {
         { value: 'A', disabled: true, key: 'A' },
         { value: 'B', disabled: true, key: 'B' },
       ];
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: allDisabled,
-          initialIndex: 1,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: allDisabled,
+        initialIndex: 1,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(1);
     });
   });
 
   describe('Keyboard Navigation (Up/Down/J/K)', () => {
     it('should move down with "j" and "down" keys, skipping disabled items', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items, onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(0);
       pressKey('j');
       expect(result.current.activeIndex).toBe(2);
@@ -168,9 +186,11 @@ describe('useSelectionList', () => {
     });
 
     it('should move up with "k" and "up" keys, skipping disabled items', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items, initialIndex: 3, onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        initialIndex: 3,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(3);
       pressKey('k');
       expect(result.current.activeIndex).toBe(2);
@@ -179,13 +199,11 @@ describe('useSelectionList', () => {
     });
 
     it('should wrap navigation correctly', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: items.length - 1,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        initialIndex: items.length - 1,
+        onSelect: mockOnSelect,
+      });
       expect(result.current.activeIndex).toBe(3);
       pressKey('down');
       expect(result.current.activeIndex).toBe(0);
@@ -195,13 +213,11 @@ describe('useSelectionList', () => {
     });
 
     it('should call onHighlight when index changes', () => {
-      renderHook(() =>
-        useSelectionList({
-          items,
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-        }),
-      );
+      renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+      });
       pressKey('down');
       expect(mockOnHighlight).toHaveBeenCalledTimes(1);
       expect(mockOnHighlight).toHaveBeenCalledWith('C');
@@ -209,13 +225,11 @@ describe('useSelectionList', () => {
 
     it('should not move or call onHighlight if navigation results in the same index (e.g., single item)', () => {
       const singleItem = [{ value: 'A', key: 'A' }];
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: singleItem,
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: singleItem,
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+      });
       pressKey('down');
       expect(result.current.activeIndex).toBe(0);
       expect(mockOnHighlight).not.toHaveBeenCalled();
@@ -226,13 +240,11 @@ describe('useSelectionList', () => {
         { value: 'A', disabled: true, key: 'A' },
         { value: 'B', disabled: true, key: 'B' },
       ];
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: allDisabled,
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: allDisabled,
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+      });
       const initialIndex = result.current.activeIndex;
       pressKey('down');
       expect(result.current.activeIndex).toBe(initialIndex);
@@ -242,25 +254,21 @@ describe('useSelectionList', () => {
 
   describe('Selection (Enter)', () => {
     it('should call onSelect when "return" is pressed on enabled item', () => {
-      renderHook(() =>
-        useSelectionList({
-          items,
-          initialIndex: 2,
-          onSelect: mockOnSelect,
-        }),
-      );
+      renderSelectionListHook({
+        items,
+        initialIndex: 2,
+        onSelect: mockOnSelect,
+      });
       pressKey('return');
       expect(mockOnSelect).toHaveBeenCalledTimes(1);
       expect(mockOnSelect).toHaveBeenCalledWith('C');
     });
 
     it('should not call onSelect if the active item is disabled', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items,
-          onSelect: mockOnSelect,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+      });
 
       act(() => result.current.setActiveIndex(1));
 
@@ -271,13 +279,11 @@ describe('useSelectionList', () => {
 
   describe('Keyboard Navigation Robustness (Rapid Input)', () => {
     it('should handle rapid navigation and selection robustly (avoiding stale state)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items, // A, B(disabled), C, D. Initial index 0 (A).
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items, // A, B(disabled), C, D. Initial index 0 (A).
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+      });
 
       // Simulate rapid inputs with separate act blocks to allow effects to run
       if (!activeKeypressHandler) throw new Error('Handler not active');
@@ -321,13 +327,11 @@ describe('useSelectionList', () => {
     });
 
     it('should handle ultra-rapid input (multiple presses in single act) without stale state', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items, // A, B(disabled), C, D. Initial index 0 (A).
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items, // A, B(disabled), C, D. Initial index 0 (A).
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+      });
 
       // Simulate ultra-rapid inputs where all keypresses happen faster than React can re-render
       act(() => {
@@ -363,40 +367,41 @@ describe('useSelectionList', () => {
 
   describe('Focus Management (isFocused)', () => {
     it('should activate the keypress handler when focused (default) and items exist', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items, onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+      });
       expect(activeKeypressHandler).not.toBeNull();
       pressKey('down');
       expect(result.current.activeIndex).toBe(2);
     });
 
     it('should not activate the keypress handler when isFocused is false', () => {
-      renderHook(() =>
-        useSelectionList({ items, onSelect: mockOnSelect, isFocused: false }),
-      );
+      renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        isFocused: false,
+      });
       expect(activeKeypressHandler).toBeNull();
       expect(() => pressKey('down')).toThrow(/keypress handler is not active/);
     });
 
     it('should not activate the keypress handler when items list is empty', () => {
-      renderHook(() =>
-        useSelectionList({
-          items: [],
-          onSelect: mockOnSelect,
-          isFocused: true,
-        }),
-      );
+      renderSelectionListHook({
+        items: [],
+        onSelect: mockOnSelect,
+        isFocused: true,
+      });
       expect(activeKeypressHandler).toBeNull();
       expect(() => pressKey('down')).toThrow(/keypress handler is not active/);
     });
 
     it('should activate/deactivate when isFocused prop changes', () => {
-      const { result, rerender } = renderHook(
-        (props: { isFocused: boolean }) =>
-          useSelectionList({ items, onSelect: mockOnSelect, ...props }),
-        { initialProps: { isFocused: false } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        isFocused: false,
+      });
 
       expect(activeKeypressHandler).toBeNull();
 
@@ -429,23 +434,22 @@ describe('useSelectionList', () => {
     const pressNumber = (num: string) => pressKey(num, num);
 
     it('should not respond to numbers if showNumbers is false (default)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items: shortList, onSelect: mockOnSelect }),
-      );
+      const { result } = renderSelectionListHook({
+        items: shortList,
+        onSelect: mockOnSelect,
+      });
       pressNumber('1');
       expect(result.current.activeIndex).toBe(0);
       expect(mockOnSelect).not.toHaveBeenCalled();
     });
 
     it('should select item immediately if the number cannot be extended (unambiguous)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: shortList,
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: shortList,
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+        showNumbers: true,
+      });
       pressNumber('3');
 
       expect(result.current.activeIndex).toBe(2);
@@ -456,15 +460,13 @@ describe('useSelectionList', () => {
     });
 
     it('should highlight and wait for timeout if the number can be extended (ambiguous)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: longList,
-          initialIndex: 1, // Start at index 1 so pressing "1" (index 0) causes a change
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: longList,
+        initialIndex: 1, // Start at index 1 so pressing "1" (index 0) causes a change
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+        showNumbers: true,
+      });
 
       pressNumber('1');
 
@@ -483,13 +485,11 @@ describe('useSelectionList', () => {
     });
 
     it('should handle multi-digit input correctly', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('1');
       expect(mockOnSelect).not.toHaveBeenCalled();
@@ -503,13 +503,11 @@ describe('useSelectionList', () => {
     });
 
     it('should reset buffer if input becomes invalid (out of bounds)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: shortList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: shortList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('5');
 
@@ -522,13 +520,11 @@ describe('useSelectionList', () => {
     });
 
     it('should allow "0" as subsequent digit, but ignore as first digit', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('0');
       expect(result.current.activeIndex).toBe(0);
@@ -545,13 +541,11 @@ describe('useSelectionList', () => {
     });
 
     it('should clear the initial "0" input after timeout', () => {
-      renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('0');
       act(() => vi.advanceTimersByTime(1000)); // Timeout the '0' input
@@ -564,14 +558,12 @@ describe('useSelectionList', () => {
     });
 
     it('should highlight but not select a disabled item (immediate selection case)', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: shortList, // B (index 1, number 2) is disabled
-          onSelect: mockOnSelect,
-          onHighlight: mockOnHighlight,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: shortList, // B (index 1, number 2) is disabled
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+        showNumbers: true,
+      });
 
       pressNumber('2');
 
@@ -589,13 +581,11 @@ describe('useSelectionList', () => {
         ...longList.slice(1),
       ];
 
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: disabledAmbiguousList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: disabledAmbiguousList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('1');
       expect(result.current.activeIndex).toBe(0);
@@ -610,13 +600,11 @@ describe('useSelectionList', () => {
     });
 
     it('should clear the number buffer if a non-numeric key (e.g., navigation) is pressed', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { result } = renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('1');
       expect(vi.getTimerCount()).toBe(1);
@@ -632,13 +620,11 @@ describe('useSelectionList', () => {
     });
 
     it('should clear the number buffer if "return" is pressed', () => {
-      renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressNumber('1');
 
@@ -655,31 +641,25 @@ describe('useSelectionList', () => {
   });
 
   describe('Reactivity (Dynamic Updates)', () => {
-    it('should update activeIndex when initialIndex prop changes', () => {
-      const { result, rerender } = renderHook(
-        ({ initialIndex }: { initialIndex: number }) =>
-          useSelectionList({
-            items,
-            onSelect: mockOnSelect,
-            initialIndex,
-          }),
-        { initialProps: { initialIndex: 0 } },
-      );
+    it('should update activeIndex when initialIndex prop changes', async () => {
+      const { result, rerender } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        initialIndex: 0,
+      });
 
       rerender({ initialIndex: 2 });
-      expect(result.current.activeIndex).toBe(2);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(2);
+      });
     });
 
-    it('should respect a new initialIndex even after user interaction', () => {
-      const { result, rerender } = renderHook(
-        ({ initialIndex }: { initialIndex: number }) =>
-          useSelectionList({
-            items,
-            onSelect: mockOnSelect,
-            initialIndex,
-          }),
-        { initialProps: { initialIndex: 0 } },
-      );
+    it('should respect a new initialIndex even after user interaction', async () => {
+      const { result, rerender } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        initialIndex: 0,
+      });
 
       // User navigates, changing the active index
       pressKey('down');
@@ -689,35 +669,31 @@ describe('useSelectionList', () => {
       rerender({ initialIndex: 3 });
 
       // The hook should now respect the new initial index
-      expect(result.current.activeIndex).toBe(3);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(3);
+      });
     });
 
-    it('should validate index when initialIndex prop changes to a disabled item', () => {
-      const { result, rerender } = renderHook(
-        ({ initialIndex }: { initialIndex: number }) =>
-          useSelectionList({
-            items,
-            onSelect: mockOnSelect,
-            initialIndex,
-          }),
-        { initialProps: { initialIndex: 0 } },
-      );
+    it('should validate index when initialIndex prop changes to a disabled item', async () => {
+      const { result, rerender } = renderSelectionListHook({
+        items,
+        onSelect: mockOnSelect,
+        initialIndex: 0,
+      });
 
       rerender({ initialIndex: 1 });
 
-      expect(result.current.activeIndex).toBe(2);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(2);
+      });
     });
 
-    it('should adjust activeIndex if items change and the initialIndex is now out of bounds', () => {
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            initialIndex: 3,
-            items: testItems,
-          }),
-        { initialProps: { items } },
-      );
+    it('should adjust activeIndex if items change and the initialIndex is now out of bounds', async () => {
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        initialIndex: 3,
+        items,
+      });
 
       expect(result.current.activeIndex).toBe(3);
 
@@ -728,24 +704,22 @@ describe('useSelectionList', () => {
       rerender({ items: shorterItems }); // Length 2
 
       // The useEffect syncs based on the initialIndex (3) which is now out of bounds. It defaults to 0.
-      expect(result.current.activeIndex).toBe(0);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(0);
+      });
     });
 
-    it('should adjust activeIndex if items change and the initialIndex becomes disabled', () => {
+    it('should adjust activeIndex if items change and the initialIndex becomes disabled', async () => {
       const initialItems = [
         { value: 'A', key: 'A' },
         { value: 'B', key: 'B' },
         { value: 'C', key: 'C' },
       ];
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            initialIndex: 1,
-            items: testItems,
-          }),
-        { initialProps: { items: initialItems } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        initialIndex: 1,
+        items: initialItems,
+      });
 
       expect(result.current.activeIndex).toBe(1);
 
@@ -756,25 +730,25 @@ describe('useSelectionList', () => {
       ];
       rerender({ items: newItems });
 
-      expect(result.current.activeIndex).toBe(2);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(2);
+      });
     });
 
-    it('should reset to 0 if items change to an empty list', () => {
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            initialIndex: 2,
-            items: testItems,
-          }),
-        { initialProps: { items } },
-      );
+    it('should reset to 0 if items change to an empty list', async () => {
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        initialIndex: 2,
+        items,
+      });
 
       rerender({ items: [] });
-      expect(result.current.activeIndex).toBe(0);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(0);
+      });
     });
 
-    it('should not reset activeIndex when items are deeply equal', () => {
+    it('should not reset activeIndex when items are deeply equal', async () => {
       const initialItems = [
         { value: 'A', key: 'A' },
         { value: 'B', disabled: true, key: 'B' },
@@ -782,16 +756,12 @@ describe('useSelectionList', () => {
         { value: 'D', key: 'D' },
       ];
 
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            onHighlight: mockOnHighlight,
-            initialIndex: 2,
-            items: testItems,
-          }),
-        { initialProps: { items: initialItems } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+        initialIndex: 2,
+        items: initialItems,
+      });
 
       expect(result.current.activeIndex).toBe(2);
 
@@ -813,12 +783,14 @@ describe('useSelectionList', () => {
       rerender({ items: newItems });
 
       // Active index should remain the same since items are deeply equal
-      expect(result.current.activeIndex).toBe(3);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(3);
+      });
       // onHighlight should NOT be called since the index didn't change
       expect(mockOnHighlight).not.toHaveBeenCalled();
     });
 
-    it('should update activeIndex when items change structurally', () => {
+    it('should update activeIndex when items change structurally', async () => {
       const initialItems = [
         { value: 'A', key: 'A' },
         { value: 'B', disabled: true, key: 'B' },
@@ -826,16 +798,12 @@ describe('useSelectionList', () => {
         { value: 'D', key: 'D' },
       ];
 
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            onHighlight: mockOnHighlight,
-            initialIndex: 3,
-            items: testItems,
-          }),
-        { initialProps: { items: initialItems } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        onHighlight: mockOnHighlight,
+        initialIndex: 3,
+        items: initialItems,
+      });
 
       expect(result.current.activeIndex).toBe(3);
       mockOnHighlight.mockClear();
@@ -850,25 +818,23 @@ describe('useSelectionList', () => {
       rerender({ items: newItems });
 
       // Active index should update based on initialIndex and new items
-      expect(result.current.activeIndex).toBe(0);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(0);
+      });
     });
 
-    it('should handle partial changes in items array', () => {
+    it('should handle partial changes in items array', async () => {
       const initialItems = [
         { value: 'A', key: 'A' },
         { value: 'B', key: 'B' },
         { value: 'C', key: 'C' },
       ];
 
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            initialIndex: 1,
-            items: testItems,
-          }),
-        { initialProps: { items: initialItems } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        initialIndex: 1,
+        items: initialItems,
+      });
 
       expect(result.current.activeIndex).toBe(1);
 
@@ -882,24 +848,22 @@ describe('useSelectionList', () => {
       rerender({ items: newItems });
 
       // Should find next valid index since current became disabled
-      expect(result.current.activeIndex).toBe(2);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(2);
+      });
     });
 
-    it('should update selection when a new item is added to the start of the list', () => {
+    it('should update selection when a new item is added to the start of the list', async () => {
       const initialItems = [
         { value: 'A', key: 'A' },
         { value: 'B', key: 'B' },
         { value: 'C', key: 'C' },
       ];
 
-      const { result, rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) =>
-          useSelectionList({
-            onSelect: mockOnSelect,
-            items: testItems,
-          }),
-        { initialProps: { items: initialItems } },
-      );
+      const { result, rerender } = renderSelectionListHook({
+        onSelect: mockOnSelect,
+        items: initialItems,
+      });
 
       pressKey('down');
       expect(result.current.activeIndex).toBe(1);
@@ -913,7 +877,9 @@ describe('useSelectionList', () => {
 
       rerender({ items: newItems });
 
-      expect(result.current.activeIndex).toBe(2);
+      await vi.waitFor(() => {
+        expect(result.current.activeIndex).toBe(2);
+      });
     });
 
     it('should not re-initialize when items have identical keys but are different objects', () => {
@@ -924,17 +890,26 @@ describe('useSelectionList', () => {
 
       let renderCount = 0;
 
-      const { rerender } = renderHook(
-        ({ items: testItems }: { items: Array<SelectionListItem<string>> }) => {
+      const renderHookWithCount = (initialProps: {
+        items: Array<SelectionListItem<string>>;
+      }) => {
+        function TestComponent(props: typeof initialProps) {
           renderCount++;
-          return useSelectionList({
+          useSelectionList({
             onSelect: mockOnSelect,
             onHighlight: mockOnHighlight,
-            items: testItems,
+            items: props.items,
           });
-        },
-        { initialProps: { items: initialItems } },
-      );
+          return null;
+        }
+        const { rerender } = render(<TestComponent {...initialProps} />);
+        return {
+          rerender: (newProps: Partial<typeof initialProps>) =>
+            rerender(<TestComponent {...initialProps} {...newProps} />),
+        };
+      };
+
+      const { rerender } = renderHookWithCount({ items: initialItems });
 
       // Initial render
       expect(renderCount).toBe(1);
@@ -950,24 +925,6 @@ describe('useSelectionList', () => {
     });
   });
 
-  describe('Manual Control', () => {
-    it('should allow manual setting of active index via setActiveIndex', () => {
-      const { result } = renderHook(() =>
-        useSelectionList({ items, onSelect: mockOnSelect }),
-      );
-
-      act(() => {
-        result.current.setActiveIndex(3);
-      });
-      expect(result.current.activeIndex).toBe(3);
-
-      act(() => {
-        result.current.setActiveIndex(1);
-      });
-      expect(result.current.activeIndex).toBe(1);
-    });
-  });
-
   describe('Cleanup', () => {
     beforeEach(() => {
       vi.useFakeTimers();
@@ -983,13 +940,11 @@ describe('useSelectionList', () => {
         (_, i) => ({ value: `Item ${i + 1}`, key: `Item ${i + 1}` }),
       );
 
-      const { unmount } = renderHook(() =>
-        useSelectionList({
-          items: longList,
-          onSelect: mockOnSelect,
-          showNumbers: true,
-        }),
-      );
+      const { unmount } = renderSelectionListHook({
+        items: longList,
+        onSelect: mockOnSelect,
+        showNumbers: true,
+      });
 
       pressKey('1', '1');
 
diff --git a/packages/cli/src/ui/hooks/useShellHistory.test.ts b/packages/cli/src/ui/hooks/useShellHistory.test.ts
index ccb4bb7b6d..865bc7cf3f 100644
--- a/packages/cli/src/ui/hooks/useShellHistory.test.ts
+++ b/packages/cli/src/ui/hooks/useShellHistory.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 import { renderHook, act, waitFor } from '@testing-library/react';
 import { useShellHistory } from './useShellHistory.js';
 import * as fs from 'node:fs/promises';
diff --git a/packages/cli/src/ui/hooks/useTimer.test.ts b/packages/cli/src/ui/hooks/useTimer.test.tsx
similarity index 59%
rename from packages/cli/src/ui/hooks/useTimer.test.ts
rename to packages/cli/src/ui/hooks/useTimer.test.tsx
index 20d44d1781..475116086b 100644
--- a/packages/cli/src/ui/hooks/useTimer.test.ts
+++ b/packages/cli/src/ui/hooks/useTimer.test.tsx
@@ -5,7 +5,8 @@
  */
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useTimer } from './useTimer.js';
 
 describe('useTimer', () => {
@@ -17,13 +18,43 @@ describe('useTimer', () => {
     vi.restoreAllMocks();
   });
 
+  const renderTimerHook = (
+    initialIsActive: boolean,
+    initialResetKey: number,
+  ) => {
+    let hookResult: ReturnType<typeof useTimer>;
+    function TestComponent({
+      isActive,
+      resetKey,
+    }: {
+      isActive: boolean;
+      resetKey: number;
+    }) {
+      hookResult = useTimer(isActive, resetKey);
+      return null;
+    }
+    const { rerender, unmount } = render(
+      <TestComponent isActive={initialIsActive} resetKey={initialResetKey} />,
+    );
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: (newProps: { isActive: boolean; resetKey: number }) =>
+        rerender(<TestComponent {...newProps} />),
+      unmount,
+    };
+  };
+
   it('should initialize with 0', () => {
-    const { result } = renderHook(() => useTimer(false, 0));
+    const { result } = renderTimerHook(false, 0);
     expect(result.current).toBe(0);
   });
 
   it('should not increment time if isActive is false', () => {
-    const { result } = renderHook(() => useTimer(false, 0));
+    const { result } = renderTimerHook(false, 0);
     act(() => {
       vi.advanceTimersByTime(5000);
     });
@@ -31,7 +62,7 @@ describe('useTimer', () => {
   });
 
   it('should increment time every second if isActive is true', () => {
-    const { result } = renderHook(() => useTimer(true, 0));
+    const { result } = renderTimerHook(true, 0);
     act(() => {
       vi.advanceTimersByTime(1000);
     });
@@ -43,13 +74,12 @@ describe('useTimer', () => {
   });
 
   it('should reset to 0 and start incrementing when isActive becomes true from false', () => {
-    const { result, rerender } = renderHook(
-      ({ isActive, resetKey }) => useTimer(isActive, resetKey),
-      { initialProps: { isActive: false, resetKey: 0 } },
-    );
+    const { result, rerender } = renderTimerHook(false, 0);
     expect(result.current).toBe(0);
 
-    rerender({ isActive: true, resetKey: 0 });
+    act(() => {
+      rerender({ isActive: true, resetKey: 0 });
+    });
     expect(result.current).toBe(0); // Should reset to 0 upon becoming active
 
     act(() => {
@@ -59,16 +89,15 @@ describe('useTimer', () => {
   });
 
   it('should reset to 0 when resetKey changes while active', () => {
-    const { result, rerender } = renderHook(
-      ({ isActive, resetKey }) => useTimer(isActive, resetKey),
-      { initialProps: { isActive: true, resetKey: 0 } },
-    );
+    const { result, rerender } = renderTimerHook(true, 0);
     act(() => {
       vi.advanceTimersByTime(3000); // 3s
     });
     expect(result.current).toBe(3);
 
-    rerender({ isActive: true, resetKey: 1 }); // Change resetKey
+    act(() => {
+      rerender({ isActive: true, resetKey: 1 }); // Change resetKey
+    });
     expect(result.current).toBe(0); // Should reset to 0
 
     act(() => {
@@ -78,39 +107,39 @@ describe('useTimer', () => {
   });
 
   it('should be 0 if isActive is false, regardless of resetKey changes', () => {
-    const { result, rerender } = renderHook(
-      ({ isActive, resetKey }) => useTimer(isActive, resetKey),
-      { initialProps: { isActive: false, resetKey: 0 } },
-    );
+    const { result, rerender } = renderTimerHook(false, 0);
     expect(result.current).toBe(0);
 
-    rerender({ isActive: false, resetKey: 1 });
+    act(() => {
+      rerender({ isActive: false, resetKey: 1 });
+    });
     expect(result.current).toBe(0);
   });
 
   it('should clear timer on unmount', () => {
-    const { unmount } = renderHook(() => useTimer(true, 0));
+    const { unmount } = renderTimerHook(true, 0);
     const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
     unmount();
     expect(clearIntervalSpy).toHaveBeenCalledOnce();
   });
 
   it('should preserve elapsedTime when isActive becomes false, and reset to 0 when it becomes active again', () => {
-    const { result, rerender } = renderHook(
-      ({ isActive, resetKey }) => useTimer(isActive, resetKey),
-      { initialProps: { isActive: true, resetKey: 0 } },
-    );
+    const { result, rerender } = renderTimerHook(true, 0);
 
     act(() => {
       vi.advanceTimersByTime(3000); // Advance to 3 seconds
     });
     expect(result.current).toBe(3);
 
-    rerender({ isActive: false, resetKey: 0 });
+    act(() => {
+      rerender({ isActive: false, resetKey: 0 });
+    });
     expect(result.current).toBe(3); // Time should be preserved when timer becomes inactive
 
     // Now make it active again, it should reset to 0
-    rerender({ isActive: true, resetKey: 0 });
+    act(() => {
+      rerender({ isActive: true, resetKey: 0 });
+    });
     expect(result.current).toBe(0);
     act(() => {
       vi.advanceTimersByTime(1000);
diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
index 9fd31b89f9..d80f8eceb2 100644
--- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
@@ -4,6 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+/** @vitest-environment jsdom */
+
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import type { Mock } from 'vitest';
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
diff --git a/packages/cli/src/ui/hooks/vim.test.ts b/packages/cli/src/ui/hooks/vim.test.tsx
similarity index 98%
rename from packages/cli/src/ui/hooks/vim.test.ts
rename to packages/cli/src/ui/hooks/vim.test.tsx
index 2bfba0c31f..7588899b87 100644
--- a/packages/cli/src/ui/hooks/vim.test.ts
+++ b/packages/cli/src/ui/hooks/vim.test.tsx
@@ -5,8 +5,9 @@
  */
 
 import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
 import type React from 'react';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
 import { useVim } from './vim.js';
 import type { VimMode } from './vim.js';
 import type { Key } from './useKeypress.js';
@@ -173,10 +174,25 @@ describe('useVim hook', () => {
     };
   };
 
-  const renderVimHook = (buffer?: Partial<TextBuffer>) =>
-    renderHook(() =>
-      useVim((buffer || mockBuffer) as TextBuffer, mockHandleFinalSubmit),
-    );
+  const renderVimHook = (buffer?: Partial<TextBuffer>) => {
+    let hookResult: ReturnType<typeof useVim>;
+    function TestComponent() {
+      hookResult = useVim(
+        (buffer || mockBuffer) as TextBuffer,
+        mockHandleFinalSubmit,
+      );
+      return null;
+    }
+    const { rerender } = render(<TestComponent />);
+    return {
+      result: {
+        get current() {
+          return hookResult;
+        },
+      },
+      rerender: () => rerender(<TestComponent />),
+    };
+  };
 
   const exitInsertMode = (result: {
     current: {
@@ -1286,10 +1302,14 @@ describe('useVim hook', () => {
   });
 
   describe('Shell command pass-through', () => {
-    it('should pass through ctrl+r in INSERT mode', () => {
+    it('should pass through ctrl+r in INSERT mode', async () => {
       mockVimContext.vimMode = 'INSERT';
       const { result } = renderVimHook();
 
+      await vi.waitFor(() => {
+        expect(result.current.mode).toBe('INSERT');
+      });
+
       const handled = result.current.handleInput(
         createKey({ name: 'r', ctrl: true }),
       );
@@ -1297,20 +1317,29 @@ describe('useVim hook', () => {
       expect(handled).toBe(false);
     });
 
-    it('should pass through ! in INSERT mode when buffer is empty', () => {
+    it('should pass through ! in INSERT mode when buffer is empty', async () => {
       mockVimContext.vimMode = 'INSERT';
       const emptyBuffer = createMockBuffer('');
       const { result } = renderVimHook(emptyBuffer);
 
+      await vi.waitFor(() => {
+        expect(result.current.mode).toBe('INSERT');
+      });
+
       const handled = result.current.handleInput(createKey({ sequence: '!' }));
 
       expect(handled).toBe(false);
     });
 
-    it('should handle ! as input in INSERT mode when buffer is not empty', () => {
+    it('should handle ! as input in INSERT mode when buffer is not empty', async () => {
       mockVimContext.vimMode = 'INSERT';
       const nonEmptyBuffer = createMockBuffer('not empty');
       const { result } = renderVimHook(nonEmptyBuffer);
+
+      await vi.waitFor(() => {
+        expect(result.current.mode).toBe('INSERT');
+      });
+
       const key = createKey({ sequence: '!', name: '!' });
 
       act(() => {
diff --git a/packages/cli/vitest.config.ts b/packages/cli/vitest.config.ts
index fcffa292ff..aeac3ad329 100644
--- a/packages/cli/vitest.config.ts
+++ b/packages/cli/vitest.config.ts
@@ -6,18 +6,25 @@
 
 /// <reference types="vitest" />
 import { defineConfig } from 'vitest/config';
+import { fileURLToPath } from 'node:url';
+import * as path from 'node:path';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
 export default defineConfig({
   test: {
     include: ['**/*.{test,spec}.?(c|m)[jt]s?(x)', 'config.test.ts'],
     exclude: ['**/node_modules/**', '**/dist/**', '**/cypress/**'],
-    environment: 'jsdom',
+    environment: 'node',
     globals: true,
     reporters: ['default', 'junit'],
     silent: true,
     outputFile: {
       junit: 'junit.xml',
     },
+    alias: {
+      react: path.resolve(__dirname, '../../node_modules/react'),
+    },
     setupFiles: ['./test-setup.ts'],
     coverage: {
       enabled: true,
diff --git a/packages/core/src/agents/subagent-tool-wrapper.test.ts b/packages/core/src/agents/subagent-tool-wrapper.test.ts
index 5cfd744dc2..f971dc5162 100644
--- a/packages/core/src/agents/subagent-tool-wrapper.test.ts
+++ b/packages/core/src/agents/subagent-tool-wrapper.test.ts
@@ -67,8 +67,7 @@ describe('SubagentToolWrapper', () => {
     it('should call convertInputConfigToJsonSchema with the correct agent inputConfig', () => {
       new SubagentToolWrapper(mockDefinition, mockConfig);
 
-      expect(convertInputConfigToJsonSchema).toHaveBeenCalledOnce();
-      expect(convertInputConfigToJsonSchema).toHaveBeenCalledWith(
+      expect(convertInputConfigToJsonSchema).toHaveBeenCalledExactlyOnceWith(
         mockDefinition.inputConfig,
       );
     });
@@ -115,8 +114,7 @@ describe('SubagentToolWrapper', () => {
       const invocation = wrapper.build(params);
 
       expect(invocation).toBeInstanceOf(SubagentInvocation);
-      expect(MockedSubagentInvocation).toHaveBeenCalledOnce();
-      expect(MockedSubagentInvocation).toHaveBeenCalledWith(
+      expect(MockedSubagentInvocation).toHaveBeenCalledExactlyOnceWith(
         params,
         mockDefinition,
         mockConfig,

From 2fa13420aeb67adcbba0ca0fa8c4827be34b8f0d Mon Sep 17 00:00:00 2001
From: Gaurav Sehgal <sehgalgaurav@google.com>
Date: Mon, 27 Oct 2025 09:47:13 +0530
Subject: [PATCH 29/73] add absolute file path description for windows (#12007)

---
 packages/core/src/tools/edit.test.ts      | 40 +++++++++++++++++++++++
 packages/core/src/tools/edit.ts           |  5 ++-
 packages/core/src/tools/read-file.test.ts | 40 +++++++++++++++++++++++
 packages/core/src/tools/read-file.ts      |  5 ++-
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts
index 60f09c7a81..ab021cd161 100644
--- a/packages/core/src/tools/edit.test.ts
+++ b/packages/core/src/tools/edit.test.ts
@@ -36,6 +36,14 @@ vi.mock('../telemetry/loggers.js', () => ({
   logFileOperation: vi.fn(),
 }));
 
+interface EditFileParameterSchema {
+  properties: {
+    file_path: {
+      description: string;
+    };
+  };
+}
+
 import type { Mock } from 'vitest';
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import type { EditToolParams } from './edit.js';
@@ -1025,6 +1033,38 @@ describe('EditTool', () => {
     });
   });
 
+  describe('constructor', () => {
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    it('should use windows-style path examples on windows', () => {
+      vi.spyOn(process, 'platform', 'get').mockReturnValue('win32');
+
+      const tool = new EditTool({} as unknown as Config);
+      const schema = tool.schema;
+      expect(
+        (schema.parametersJsonSchema as EditFileParameterSchema).properties
+          .file_path.description,
+      ).toBe(
+        "The absolute path to the file to modify (e.g., 'C:\\Users\\project\\file.txt'). Must be an absolute path.",
+      );
+    });
+
+    it('should use unix-style path examples on non-windows platforms', () => {
+      vi.spyOn(process, 'platform', 'get').mockReturnValue('linux');
+
+      const tool = new EditTool({} as unknown as Config);
+      const schema = tool.schema;
+      expect(
+        (schema.parametersJsonSchema as EditFileParameterSchema).properties
+          .file_path.description,
+      ).toBe(
+        "The absolute path to the file to modify (e.g., '/home/user/project/file.txt'). Must start with '/'.",
+      );
+    });
+  });
+
   describe('IDE mode', () => {
     const testFile = 'edit_me.txt';
     let filePath: string;
diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts
index 749dffe813..6ce1a1f946 100644
--- a/packages/core/src/tools/edit.ts
+++ b/packages/core/src/tools/edit.ts
@@ -7,6 +7,7 @@
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import * as Diff from 'diff';
+import process from 'node:process';
 import type {
   ToolCallConfirmationDetails,
   ToolEditConfirmationDetails,
@@ -504,7 +505,9 @@ Expectation for required parameters:
         properties: {
           file_path: {
             description:
-              "The absolute path to the file to modify. Must start with '/'.",
+              process.platform === 'win32'
+                ? "The absolute path to the file to modify (e.g., 'C:\\Users\\project\\file.txt'). Must be an absolute path."
+                : "The absolute path to the file to modify (e.g., '/home/user/project/file.txt'). Must start with '/'.",
             type: 'string',
           },
           old_string: {
diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts
index 825d807cc4..a079651298 100644
--- a/packages/core/src/tools/read-file.test.ts
+++ b/packages/core/src/tools/read-file.test.ts
@@ -22,6 +22,14 @@ vi.mock('../telemetry/loggers.js', () => ({
   logFileOperation: vi.fn(),
 }));
 
+interface ReadFileParameterSchema {
+  properties: {
+    absolute_path: {
+      description: string;
+    };
+  };
+}
+
 describe('ReadFileTool', () => {
   let tempRootDir: string;
   let tool: ReadFileTool;
@@ -196,6 +204,38 @@ describe('ReadFileTool', () => {
     });
   });
 
+  describe('constructor', () => {
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    it('should use windows-style path examples on windows', () => {
+      vi.spyOn(process, 'platform', 'get').mockReturnValue('win32');
+
+      const tool = new ReadFileTool({} as unknown as Config);
+      const schema = tool.schema;
+      expect(
+        (schema.parametersJsonSchema as ReadFileParameterSchema).properties
+          .absolute_path.description,
+      ).toBe(
+        "The absolute path to the file to read (e.g., 'C:\\Users\\project\\file.txt'). Relative paths are not supported. You must provide an absolute path.",
+      );
+    });
+
+    it('should use unix-style path examples on non-windows platforms', () => {
+      vi.spyOn(process, 'platform', 'get').mockReturnValue('linux');
+
+      const tool = new ReadFileTool({} as unknown as Config);
+      const schema = tool.schema;
+      expect(
+        (schema.parametersJsonSchema as ReadFileParameterSchema).properties
+          .absolute_path.description,
+      ).toBe(
+        "The absolute path to the file to read (e.g., '/home/user/project/file.txt'). Relative paths are not supported. You must provide an absolute path.",
+      );
+    });
+  });
+
   describe('execute', () => {
     it('should return error if file does not exist', async () => {
       const filePath = path.join(tempRootDir, 'nonexistent.txt');
diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts
index affb428907..95461c1f06 100644
--- a/packages/core/src/tools/read-file.ts
+++ b/packages/core/src/tools/read-file.ts
@@ -6,6 +6,7 @@
 
 import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import path from 'node:path';
+import process from 'node:process';
 import { makeRelative, shortenPath } from '../utils/paths.js';
 import type { ToolInvocation, ToolLocation, ToolResult } from './tools.js';
 import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
@@ -155,7 +156,9 @@ export class ReadFileTool extends BaseDeclarativeTool<
         properties: {
           absolute_path: {
             description:
-              "The absolute path to the file to read (e.g., '/home/user/project/file.txt'). Relative paths are not supported. You must provide an absolute path.",
+              process.platform === 'win32'
+                ? "The absolute path to the file to read (e.g., 'C:\\Users\\project\\file.txt'). Relative paths are not supported. You must provide an absolute path."
+                : "The absolute path to the file to read (e.g., '/home/user/project/file.txt'). Relative paths are not supported. You must provide an absolute path.",
             type: 'string',
           },
           offset: {

From c7817aee305712c74a139ecb08333fec81a633b9 Mon Sep 17 00:00:00 2001
From: Krishna Bajpai <bajpaikrishna2@gmail.com>
Date: Mon, 27 Oct 2025 07:57:54 -0700
Subject: [PATCH 30/73] fix(cli): Add delimiter before printing tool response
 in non-interactive mode (#11351)

---
 package-lock.json                             | 46 ++++++---
 .../nonInteractiveCli.test.ts.snap            |  8 ++
 packages/cli/src/nonInteractiveCli.test.ts    | 96 +++++++++++++++---
 packages/cli/src/nonInteractiveCli.ts         | 11 ++-
 .../__snapshots__/textOutput.test.ts.snap     | 23 +++++
 packages/cli/src/ui/utils/textOutput.test.ts  | 99 +++++++++++++++++++
 packages/cli/src/ui/utils/textOutput.ts       | 54 ++++++++++
 7 files changed, 305 insertions(+), 32 deletions(-)
 create mode 100644 packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap
 create mode 100644 packages/cli/src/ui/utils/__snapshots__/textOutput.test.ts.snap
 create mode 100644 packages/cli/src/ui/utils/textOutput.test.ts
 create mode 100644 packages/cli/src/ui/utils/textOutput.ts

diff --git a/package-lock.json b/package-lock.json
index a0e554676c..69fb107bc6 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -598,6 +598,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=18"
       },
@@ -621,6 +622,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=18"
       }
@@ -2426,6 +2428,7 @@
       "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@octokit/auth-token": "^6.0.0",
         "@octokit/graphql": "^9.0.2",
@@ -2606,6 +2609,7 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
       "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
       "license": "Apache-2.0",
+      "peer": true,
       "engines": {
         "node": ">=8.0.0"
       }
@@ -2639,6 +2643,7 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.0.1.tgz",
       "integrity": "sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "@opentelemetry/semantic-conventions": "^1.29.0"
       },
@@ -3007,6 +3012,7 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.0.1.tgz",
       "integrity": "sha512-dZOB3R6zvBwDKnHDTB4X1xtMArB/d324VsbiPkX/Yu0Q8T2xceRthoIVFhJdvgVM2QhGVUyX9tzwiNxGtoBJUw==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/semantic-conventions": "^1.29.0"
@@ -3040,6 +3046,7 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.0.1.tgz",
       "integrity": "sha512-wf8OaJoSnujMAHWR3g+/hGvNcsC16rf9s1So4JlMiFaFHiE4HpIA3oUh+uWZQ7CNuK8gVW/pQSkgoa5HkkOl0g==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/resources": "2.0.1"
@@ -3092,6 +3099,7 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.0.1.tgz",
       "integrity": "sha512-xYLlvk/xdScGx1aEqvxLwf6sXQLXCjk3/1SQT9X9AoN5rXRhkdvIFShuNNmtTEPRBqcsMbS4p/gJLNI2wXaDuQ==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/resources": "2.0.1",
@@ -3807,6 +3815,7 @@
       "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
       "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.10.4",
         "@babel/runtime": "^7.12.5",
@@ -4339,6 +4348,7 @@
       "integrity": "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -4349,6 +4359,7 @@
       "integrity": "sha512-4hOiT/dwO8Ko0gV1m/TJZYk3y0KBnY9vzDh7W+DH17b2HFSOGgdj33dhihPeuy3l0q23+4e+hoXHV6hCC4dCXw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "peerDependencies": {
         "@types/react": "^19.0.0"
       }
@@ -4626,6 +4637,7 @@
       "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.35.0",
         "@typescript-eslint/types": "8.35.0",
@@ -5393,6 +5405,7 @@
       "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "license": "MIT",
+      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -5756,8 +5769,7 @@
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
       "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
     },
     "node_modules/array-includes": {
       "version": "3.1.9",
@@ -7003,7 +7015,6 @@
       "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
       "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "safe-buffer": "5.2.1"
       },
@@ -8051,6 +8062,7 @@
       "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.2.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -8640,7 +8652,6 @@
       "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
       "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">= 0.6"
       }
@@ -8650,7 +8661,6 @@
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
       "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "ms": "2.0.0"
       }
@@ -8660,7 +8670,6 @@
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
       "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">= 0.8"
       }
@@ -8890,7 +8899,6 @@
       "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
       "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "debug": "2.6.9",
         "encodeurl": "~2.0.0",
@@ -8909,7 +8917,6 @@
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
       "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "ms": "2.0.0"
       }
@@ -8918,15 +8925,13 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
       "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
     },
     "node_modules/finalhandler/node_modules/statuses": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
       "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">= 0.8"
       }
@@ -10143,6 +10148,7 @@
       "resolved": "https://registry.npmjs.org/ink/-/ink-6.2.3.tgz",
       "integrity": "sha512-fQkfEJjKbLXIcVWEE3MvpYSnwtbbmRsmeNDNz1pIuOFlwE+UF2gsy228J36OXKZGWJWZJKUigphBSqCNMcARtg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@alcalzone/ansi-tokenize": "^0.2.0",
         "ansi-escapes": "^7.0.0",
@@ -13279,8 +13285,7 @@
       "version": "0.1.12",
       "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
       "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
     },
     "node_modules/path-type": {
       "version": "3.0.0",
@@ -13814,6 +13819,7 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
       "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -13824,6 +13830,7 @@
       "integrity": "sha512-cq/o30z9W2Wb4rzBefjv5fBalHU0rJGZCHAkf/RHSBWSSYwh8PlQTqqOJmgIIbBtpj27T6FIPXeomIjZtCNVqA==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "shell-quote": "^1.6.1",
         "ws": "^7"
@@ -13857,6 +13864,7 @@
       "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "scheduler": "^0.26.0"
       },
@@ -15920,6 +15928,7 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -16130,7 +16139,8 @@
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
       "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
       "dev": true,
-      "license": "0BSD"
+      "license": "0BSD",
+      "peer": true
     },
     "node_modules/tsx": {
       "version": "4.20.3",
@@ -16138,6 +16148,7 @@
       "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "esbuild": "~0.25.0",
         "get-tsconfig": "^4.7.5"
@@ -16322,6 +16333,7 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -16483,7 +16495,6 @@
       "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
       "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">= 0.4.0"
       }
@@ -16539,6 +16550,7 @@
       "integrity": "sha512-4nVGliEpxmhCL8DslSAUdxlB6+SMrhB0a1v5ijlh1xB1nEPuy1mxaHxysVucLHuWryAxLWg6a5ei+U4TLn/rFg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "esbuild": "^0.25.0",
         "fdir": "^6.5.0",
@@ -16655,6 +16667,7 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -16668,6 +16681,7 @@
       "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@types/chai": "^5.2.2",
         "@vitest/expect": "3.2.4",
@@ -17419,6 +17433,7 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
       "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
@@ -17960,6 +17975,7 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
diff --git a/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap
new file mode 100644
index 0000000000..5d41472b89
--- /dev/null
+++ b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap
@@ -0,0 +1,8 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`runNonInteractive > should write a single newline between sequential text outputs from the model 1`] = `
+"Use mock tool
+Use mock tool again
+Finished.
+"
+`;
diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts
index da5d097c64..cff544305d 100644
--- a/packages/cli/src/nonInteractiveCli.test.ts
+++ b/packages/cli/src/nonInteractiveCli.test.ts
@@ -190,6 +190,9 @@ describe('runNonInteractive', () => {
     }
   }
 
+  const getWrittenOutput = () =>
+    processStdoutSpy.mock.calls.map((c) => c[0]).join('');
+
   it('should process input and write text output', async () => {
     const events: ServerGeminiStreamEvent[] = [
       { type: GeminiEventType.Content, value: 'Hello' },
@@ -215,9 +218,7 @@ describe('runNonInteractive', () => {
       expect.any(AbortSignal),
       'prompt-id-1',
     );
-    expect(processStdoutSpy).toHaveBeenCalledWith('Hello');
-    expect(processStdoutSpy).toHaveBeenCalledWith(' World');
-    expect(processStdoutSpy).toHaveBeenCalledWith('\n');
+    expect(getWrittenOutput()).toBe('Hello World\n');
     expect(mockShutdownTelemetry).toHaveBeenCalled();
   });
 
@@ -285,8 +286,77 @@ describe('runNonInteractive', () => {
       expect.any(AbortSignal),
       'prompt-id-2',
     );
-    expect(processStdoutSpy).toHaveBeenCalledWith('Final answer');
-    expect(processStdoutSpy).toHaveBeenCalledWith('\n');
+    expect(getWrittenOutput()).toBe('Final answer\n');
+  });
+
+  it('should write a single newline between sequential text outputs from the model', async () => {
+    // This test simulates a multi-turn conversation to ensure that a single newline
+    // is printed between each block of text output from the model.
+
+    // 1. Define the tool requests that the model will ask the CLI to run.
+    const toolCallEvent: ServerGeminiStreamEvent = {
+      type: GeminiEventType.ToolCallRequest,
+      value: {
+        callId: 'mock-tool',
+        name: 'mockTool',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-multi',
+      },
+    };
+
+    // 2. Mock the execution of the tools. We just need them to succeed.
+    mockCoreExecuteToolCall.mockResolvedValue({
+      status: 'success',
+      request: toolCallEvent.value, // This is generic enough for both calls
+      tool: {} as AnyDeclarativeTool,
+      invocation: {} as AnyToolInvocation,
+      response: {
+        responseParts: [],
+        callId: 'mock-tool',
+      },
+    });
+
+    // 3. Define the sequence of events streamed from the mock model.
+    // Turn 1: Model outputs text, then requests a tool call.
+    const modelTurn1: ServerGeminiStreamEvent[] = [
+      { type: GeminiEventType.Content, value: 'Use mock tool' },
+      toolCallEvent,
+    ];
+    // Turn 2: Model outputs more text, then requests another tool call.
+    const modelTurn2: ServerGeminiStreamEvent[] = [
+      { type: GeminiEventType.Content, value: 'Use mock tool again' },
+      toolCallEvent,
+    ];
+    // Turn 3: Model outputs a final answer.
+    const modelTurn3: ServerGeminiStreamEvent[] = [
+      { type: GeminiEventType.Content, value: 'Finished.' },
+      {
+        type: GeminiEventType.Finished,
+        value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
+      },
+    ];
+
+    mockGeminiClient.sendMessageStream
+      .mockReturnValueOnce(createStreamFromEvents(modelTurn1))
+      .mockReturnValueOnce(createStreamFromEvents(modelTurn2))
+      .mockReturnValueOnce(createStreamFromEvents(modelTurn3));
+
+    // 4. Run the command.
+    await runNonInteractive(
+      mockConfig,
+      mockSettings,
+      'Use mock tool multiple times',
+      'prompt-id-multi',
+    );
+
+    // 5. Verify the output.
+    // The rendered output should contain the text from each turn, separated by a
+    // single newline, with a final newline at the end.
+    expect(getWrittenOutput()).toMatchSnapshot();
+
+    // Also verify the tools were called as expected.
+    expect(mockCoreExecuteToolCall).toHaveBeenCalledTimes(2);
   });
 
   it('should handle error during tool execution and should send error back to the model', async () => {
@@ -369,7 +439,7 @@ describe('runNonInteractive', () => {
       expect.any(AbortSignal),
       'prompt-id-3',
     );
-    expect(processStdoutSpy).toHaveBeenCalledWith('Sorry, let me try again.');
+    expect(getWrittenOutput()).toBe('Sorry, let me try again.\n');
   });
 
   it('should exit with error if sendMessageStream throws initially', async () => {
@@ -444,9 +514,7 @@ describe('runNonInteractive', () => {
       'Error executing tool nonexistentTool: Tool "nonexistentTool" not found in registry.',
     );
     expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2);
-    expect(processStdoutSpy).toHaveBeenCalledWith(
-      "Sorry, I can't find that tool.",
-    );
+    expect(getWrittenOutput()).toBe("Sorry, I can't find that tool.\n");
   });
 
   it('should exit when max session turns are exceeded', async () => {
@@ -506,7 +574,7 @@ describe('runNonInteractive', () => {
     );
 
     // 6. Assert the final output is correct
-    expect(processStdoutSpy).toHaveBeenCalledWith('Summary complete.');
+    expect(getWrittenOutput()).toBe('Summary complete.\n');
   });
 
   it('should process input and write JSON output with stats', async () => {
@@ -850,7 +918,7 @@ describe('runNonInteractive', () => {
       'prompt-id-slash',
     );
 
-    expect(processStdoutSpy).toHaveBeenCalledWith('Response from command');
+    expect(getWrittenOutput()).toBe('Response from command\n');
   });
 
   it('should throw FatalInputError if a command requires confirmation', async () => {
@@ -905,7 +973,7 @@ describe('runNonInteractive', () => {
       'prompt-id-unknown',
     );
 
-    expect(processStdoutSpy).toHaveBeenCalledWith('Response to unknown');
+    expect(getWrittenOutput()).toBe('Response to unknown\n');
   });
 
   it('should throw for unhandled command result types', async () => {
@@ -962,7 +1030,7 @@ describe('runNonInteractive', () => {
 
     expect(mockAction).toHaveBeenCalledWith(expect.any(Object), 'arg1 arg2');
 
-    expect(processStdoutSpy).toHaveBeenCalledWith('Acknowledged');
+    expect(getWrittenOutput()).toBe('Acknowledged\n');
   });
 
   it('should instantiate CommandService with correct loaders for slash commands', async () => {
@@ -1073,7 +1141,7 @@ describe('runNonInteractive', () => {
       expect.objectContaining({ name: 'ShellTool' }),
       expect.any(AbortSignal),
     );
-    expect(processStdoutSpy).toHaveBeenCalledWith('file.txt');
+    expect(getWrittenOutput()).toBe('file.txt\n');
   });
 
   describe('CoreEvents Integration', () => {
diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts
index 7b89732b10..efb0e3186d 100644
--- a/packages/cli/src/nonInteractiveCli.ts
+++ b/packages/cli/src/nonInteractiveCli.ts
@@ -40,6 +40,7 @@ import {
   handleCancellationError,
   handleMaxTurnsExceededError,
 } from './utils/errors.js';
+import { TextOutput } from './ui/utils/textOutput.js';
 
 export async function runNonInteractive(
   config: Config,
@@ -52,6 +53,7 @@ export async function runNonInteractive(
       stderr: true,
       debugMode: config.getDebugMode(),
     });
+    const textOutput = new TextOutput();
 
     const handleUserFeedback = (payload: UserFeedbackPayload) => {
       const prefix = payload.severity.toUpperCase();
@@ -183,7 +185,9 @@ export async function runNonInteractive(
             } else if (config.getOutputFormat() === OutputFormat.JSON) {
               responseText += event.value;
             } else {
-              process.stdout.write(event.value);
+              if (event.value) {
+                textOutput.write(event.value);
+              }
             }
           } else if (event.type === GeminiEventType.ToolCallRequest) {
             if (streamFormatter) {
@@ -220,6 +224,7 @@ export async function runNonInteractive(
         }
 
         if (toolCallRequests.length > 0) {
+          textOutput.ensureTrailingNewline();
           const toolResponseParts: Part[] = [];
           const completedToolCalls: CompletedToolCall[] = [];
 
@@ -297,9 +302,9 @@ export async function runNonInteractive(
           } else if (config.getOutputFormat() === OutputFormat.JSON) {
             const formatter = new JsonFormatter();
             const stats = uiTelemetryService.getMetrics();
-            process.stdout.write(formatter.format(responseText, stats));
+            textOutput.write(formatter.format(responseText, stats));
           } else {
-            process.stdout.write('\n'); // Ensure a final newline
+            textOutput.ensureTrailingNewline(); // Ensure a final newline
           }
           return;
         }
diff --git a/packages/cli/src/ui/utils/__snapshots__/textOutput.test.ts.snap b/packages/cli/src/ui/utils/__snapshots__/textOutput.test.ts.snap
new file mode 100644
index 0000000000..4618d553b3
--- /dev/null
+++ b/packages/cli/src/ui/utils/__snapshots__/textOutput.test.ts.snap
@@ -0,0 +1,23 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`TextOutput > should correctly handle ANSI escape codes when determining line breaks 1`] = `
+"[34mhello[39m
+[1mworld[22m[34m
+[39mnext"
+`;
+
+exports[`TextOutput > should handle ANSI codes that do not end with a newline 1`] = `
+"hello[34m
+world"
+`;
+
+exports[`TextOutput > should handle a sequence of calls correctly 1`] = `
+"first
+second part
+third"
+`;
+
+exports[`TextOutput > should handle empty strings with ANSI codes 1`] = `
+"hello[34m[39m
+world"
+`;
diff --git a/packages/cli/src/ui/utils/textOutput.test.ts b/packages/cli/src/ui/utils/textOutput.test.ts
new file mode 100644
index 0000000000..b8a0882d64
--- /dev/null
+++ b/packages/cli/src/ui/utils/textOutput.test.ts
@@ -0,0 +1,99 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/// <reference types="vitest/globals" />
+
+import type { MockInstance } from 'vitest';
+import { vi } from 'vitest';
+import { TextOutput } from './textOutput.js';
+
+describe('TextOutput', () => {
+  let stdoutSpy: MockInstance<typeof process.stdout.write>;
+  let textOutput: TextOutput;
+
+  beforeEach(() => {
+    stdoutSpy = vi
+      .spyOn(process.stdout, 'write')
+      .mockImplementation(() => true);
+    textOutput = new TextOutput();
+  });
+
+  afterEach(() => {
+    stdoutSpy.mockRestore();
+  });
+
+  const getWrittenOutput = () => stdoutSpy.mock.calls.map((c) => c[0]).join('');
+
+  it('write() should call process.stdout.write', () => {
+    textOutput.write('hello');
+    expect(stdoutSpy).toHaveBeenCalledWith('hello');
+  });
+
+  it('write() should not call process.stdout.write for empty strings', () => {
+    textOutput.write('');
+    expect(stdoutSpy).not.toHaveBeenCalled();
+  });
+
+  it('writeOnNewLine() should not add a newline if the last char was a newline', () => {
+    // Default state starts at the beginning of a line
+    textOutput.writeOnNewLine('hello');
+    expect(getWrittenOutput()).toBe('hello');
+  });
+
+  it('writeOnNewLine() should add a newline if the last char was not a newline', () => {
+    textOutput.write('previous');
+    textOutput.writeOnNewLine('hello');
+    expect(getWrittenOutput()).toBe('previous\nhello');
+  });
+
+  it('ensureTrailingNewline() should add a newline if one is missing', () => {
+    textOutput.write('hello');
+    textOutput.ensureTrailingNewline();
+    expect(getWrittenOutput()).toBe('hello\n');
+  });
+
+  it('ensureTrailingNewline() should not add a newline if one already exists', () => {
+    textOutput.write('hello\n');
+    textOutput.ensureTrailingNewline();
+    expect(getWrittenOutput()).toBe('hello\n');
+  });
+
+  it('should handle a sequence of calls correctly', () => {
+    textOutput.write('first');
+    textOutput.writeOnNewLine('second');
+    textOutput.write(' part');
+    textOutput.ensureTrailingNewline();
+    textOutput.ensureTrailingNewline(); // second call should do nothing
+    textOutput.write('third');
+
+    expect(getWrittenOutput()).toMatchSnapshot();
+  });
+
+  it('should correctly handle ANSI escape codes when determining line breaks', () => {
+    const blue = (s: string) => `\u001b[34m${s}\u001b[39m`;
+    const bold = (s: string) => `\u001b[1m${s}\u001b[22m`;
+
+    textOutput.write(blue('hello'));
+    textOutput.writeOnNewLine(bold('world'));
+    textOutput.write(blue('\n'));
+    textOutput.writeOnNewLine('next');
+
+    expect(getWrittenOutput()).toMatchSnapshot();
+  });
+
+  it('should handle empty strings with ANSI codes', () => {
+    textOutput.write('hello');
+    textOutput.write('\u001b[34m\u001b[39m'); // Empty blue string
+    textOutput.writeOnNewLine('world');
+    expect(getWrittenOutput()).toMatchSnapshot();
+  });
+
+  it('should handle ANSI codes that do not end with a newline', () => {
+    textOutput.write('hello\u001b[34m');
+    textOutput.writeOnNewLine('world');
+    expect(getWrittenOutput()).toMatchSnapshot();
+  });
+});
diff --git a/packages/cli/src/ui/utils/textOutput.ts b/packages/cli/src/ui/utils/textOutput.ts
new file mode 100644
index 0000000000..420f774044
--- /dev/null
+++ b/packages/cli/src/ui/utils/textOutput.ts
@@ -0,0 +1,54 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * A utility to manage writing text to stdout, ensuring that newlines
+ * are handled consistently and robustly across the application.
+ */
+
+import stripAnsi from 'strip-ansi';
+
+export class TextOutput {
+  private atStartOfLine = true;
+
+  /**
+   * Writes a string to stdout.
+   * @param str The string to write.
+   */
+  write(str: string): void {
+    if (str.length === 0) {
+      return;
+    }
+    process.stdout.write(str);
+    const strippedStr = stripAnsi(str);
+    if (strippedStr.length > 0) {
+      this.atStartOfLine = strippedStr.endsWith('\n');
+    }
+  }
+
+  /**
+   * Writes a string to stdout, ensuring it starts on a new line.
+   * If the previous output did not end with a newline, one will be added.
+   * This prevents adding extra blank lines if a newline already exists.
+   * @param str The string to write.
+   */
+  writeOnNewLine(str: string): void {
+    if (!this.atStartOfLine) {
+      this.write('\n');
+    }
+    this.write(str);
+  }
+
+  /**
+   * Ensures that the output ends with a newline. If the last character
+   * written was not a newline, one will be added.
+   */
+  ensureTrailingNewline(): void {
+    if (!this.atStartOfLine) {
+      this.write('\n');
+    }
+  }
+}

From 23c906b0855e4553cc47321c040e4b28e6c60b15 Mon Sep 17 00:00:00 2001
From: Jack Wotherspoon <jackwoth@google.com>
Date: Mon, 27 Oct 2025 12:57:12 -0400
Subject: [PATCH 31/73] fix: user configured oauth scopes should take
 precedence over discovered scopes (#12088)

---
 packages/core/src/mcp/oauth-provider.test.ts | 178 +++++++++++++++++++
 packages/core/src/mcp/oauth-provider.ts      |   4 +-
 2 files changed, 180 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/mcp/oauth-provider.test.ts b/packages/core/src/mcp/oauth-provider.test.ts
index e23c25d07d..8a156b28f0 100644
--- a/packages/core/src/mcp/oauth-provider.test.ts
+++ b/packages/core/src/mcp/oauth-provider.test.ts
@@ -1182,5 +1182,183 @@ describe('MCPOAuthProvider', () => {
       expect(url.hash).toBe('#login');
       expect(url.pathname).toBe('/authorize');
     });
+
+    it('should use user-configured scopes over discovered scopes', async () => {
+      let capturedUrl: string | undefined;
+      mockOpenBrowserSecurely.mockImplementation((url: string) => {
+        capturedUrl = url;
+        return Promise.resolve();
+      });
+
+      const configWithUserScopes: MCPOAuthConfig = {
+        ...mockConfig,
+        clientId: 'test-client-id',
+        clientSecret: 'test-client-secret',
+        scopes: ['user-scope'],
+      };
+      delete configWithUserScopes.authorizationUrl;
+      delete configWithUserScopes.tokenUrl;
+
+      const mockResourceMetadata = {
+        authorization_servers: ['https://discovered.auth.com'],
+      };
+
+      const mockAuthServerMetadata = {
+        authorization_endpoint: 'https://discovered.auth.com/authorize',
+        token_endpoint: 'https://discovered.auth.com/token',
+        scopes_supported: ['discovered-scope'],
+      };
+
+      mockFetch
+        .mockResolvedValueOnce(createMockResponse({ ok: true, status: 200 }))
+        .mockResolvedValueOnce(
+          createMockResponse({
+            ok: true,
+            contentType: 'application/json',
+            text: JSON.stringify(mockResourceMetadata),
+            json: mockResourceMetadata,
+          }),
+        )
+        .mockResolvedValueOnce(
+          createMockResponse({
+            ok: true,
+            contentType: 'application/json',
+            text: JSON.stringify(mockAuthServerMetadata),
+            json: mockAuthServerMetadata,
+          }),
+        );
+
+      // Setup callback handler
+      let callbackHandler: unknown;
+      vi.mocked(http.createServer).mockImplementation((handler) => {
+        callbackHandler = handler;
+        return mockHttpServer as unknown as http.Server;
+      });
+
+      mockHttpServer.listen.mockImplementation((port, callback) => {
+        callback?.();
+        setTimeout(() => {
+          const mockReq = {
+            url: '/oauth/callback?code=auth_code&state=bW9ja19zdGF0ZV8xNl9ieXRlcw',
+          };
+          const mockRes = { writeHead: vi.fn(), end: vi.fn() };
+          (callbackHandler as (req: unknown, res: unknown) => void)(
+            mockReq,
+            mockRes,
+          );
+        }, 10);
+      });
+
+      // Mock token exchange
+      mockFetch.mockResolvedValueOnce(
+        createMockResponse({
+          ok: true,
+          contentType: 'application/json',
+          text: JSON.stringify(mockTokenResponse),
+          json: mockTokenResponse,
+        }),
+      );
+
+      const authProvider = new MCPOAuthProvider();
+      await authProvider.authenticate(
+        'test-server',
+        configWithUserScopes,
+        'https://api.example.com',
+      );
+
+      expect(capturedUrl).toBeDefined();
+      const url = new URL(capturedUrl!);
+      expect(url.searchParams.get('scope')).toBe('user-scope');
+    });
+
+    it('should use discovered scopes when no user-configured scopes are provided', async () => {
+      let capturedUrl: string | undefined;
+      mockOpenBrowserSecurely.mockImplementation((url: string) => {
+        capturedUrl = url;
+        return Promise.resolve();
+      });
+
+      const configWithoutScopes: MCPOAuthConfig = {
+        ...mockConfig,
+        clientId: 'test-client-id',
+        clientSecret: 'test-client-secret',
+      };
+      delete configWithoutScopes.scopes;
+      delete configWithoutScopes.authorizationUrl;
+      delete configWithoutScopes.tokenUrl;
+
+      const mockResourceMetadata = {
+        authorization_servers: ['https://discovered.auth.com'],
+      };
+
+      const mockAuthServerMetadata = {
+        authorization_endpoint: 'https://discovered.auth.com/authorize',
+        token_endpoint: 'https://discovered.auth.com/token',
+        scopes_supported: ['discovered-scope-1', 'discovered-scope-2'],
+      };
+
+      mockFetch
+        .mockResolvedValueOnce(createMockResponse({ ok: true, status: 200 }))
+        .mockResolvedValueOnce(
+          createMockResponse({
+            ok: true,
+            contentType: 'application/json',
+            text: JSON.stringify(mockResourceMetadata),
+            json: mockResourceMetadata,
+          }),
+        )
+        .mockResolvedValueOnce(
+          createMockResponse({
+            ok: true,
+            contentType: 'application/json',
+            text: JSON.stringify(mockAuthServerMetadata),
+            json: mockAuthServerMetadata,
+          }),
+        );
+
+      // Setup callback handler
+      let callbackHandler: unknown;
+      vi.mocked(http.createServer).mockImplementation((handler) => {
+        callbackHandler = handler;
+        return mockHttpServer as unknown as http.Server;
+      });
+
+      mockHttpServer.listen.mockImplementation((port, callback) => {
+        callback?.();
+        setTimeout(() => {
+          const mockReq = {
+            url: '/oauth/callback?code=auth_code&state=bW9ja19zdGF0ZV8xNl9ieXRlcw',
+          };
+          const mockRes = { writeHead: vi.fn(), end: vi.fn() };
+          (callbackHandler as (req: unknown, res: unknown) => void)(
+            mockReq,
+            mockRes,
+          );
+        }, 10);
+      });
+
+      // Mock token exchange
+      mockFetch.mockResolvedValueOnce(
+        createMockResponse({
+          ok: true,
+          contentType: 'application/json',
+          text: JSON.stringify(mockTokenResponse),
+          json: mockTokenResponse,
+        }),
+      );
+
+      const authProvider = new MCPOAuthProvider();
+      await authProvider.authenticate(
+        'test-server',
+        configWithoutScopes,
+        'https://api.example.com',
+      );
+
+      expect(capturedUrl).toBeDefined();
+      const url = new URL(capturedUrl!);
+      expect(url.searchParams.get('scope')).toBe(
+        'discovered-scope-1 discovered-scope-2',
+      );
+    });
   });
 });
diff --git a/packages/core/src/mcp/oauth-provider.ts b/packages/core/src/mcp/oauth-provider.ts
index f7051cd4f8..3b67882c09 100644
--- a/packages/core/src/mcp/oauth-provider.ts
+++ b/packages/core/src/mcp/oauth-provider.ts
@@ -630,7 +630,7 @@ export class MCPOAuthProvider {
                 ...config,
                 authorizationUrl: discoveredConfig.authorizationUrl,
                 tokenUrl: discoveredConfig.tokenUrl,
-                scopes: discoveredConfig.scopes || config.scopes || [],
+                scopes: config.scopes || discoveredConfig.scopes || [],
                 // Preserve existing client credentials
                 clientId: config.clientId,
                 clientSecret: config.clientSecret,
@@ -654,7 +654,7 @@ export class MCPOAuthProvider {
             ...config,
             authorizationUrl: discoveredConfig.authorizationUrl,
             tokenUrl: discoveredConfig.tokenUrl,
-            scopes: discoveredConfig.scopes || config.scopes || [],
+            scopes: config.scopes || discoveredConfig.scopes || [],
             registrationUrl: discoveredConfig.registrationUrl,
             // Preserve existing client credentials
             clientId: config.clientId,

From 541eeb7a50254b9bca0545992906a313d49d00c0 Mon Sep 17 00:00:00 2001
From: joshualitt <joshualitt@google.com>
Date: Mon, 27 Oct 2025 09:59:08 -0700
Subject: [PATCH 32/73] feat(core, cli): Implement sequential approval.
 (#11593)

---
 packages/a2a-server/src/agent/task.test.ts    | 121 +++++-
 packages/a2a-server/src/agent/task.ts         |  19 +-
 packages/a2a-server/src/http/app.test.ts      | 219 +++++++++--
 .../cli/src/ui/hooks/useGeminiStream.test.tsx | 190 +++++++---
 packages/cli/src/ui/hooks/useGeminiStream.ts  | 165 ++++++---
 .../cli/src/ui/hooks/useReactToolScheduler.ts |  69 ++--
 .../cli/src/ui/hooks/useToolScheduler.test.ts | 190 +++++++---
 .../core/src/core/coreToolScheduler.test.ts   | 290 ++++++++++++++-
 packages/core/src/core/coreToolScheduler.ts   | 348 ++++++++++++------
 9 files changed, 1272 insertions(+), 339 deletions(-)

diff --git a/packages/a2a-server/src/agent/task.test.ts b/packages/a2a-server/src/agent/task.test.ts
index 513867f4e2..1bf26d8bc8 100644
--- a/packages/a2a-server/src/agent/task.test.ts
+++ b/packages/a2a-server/src/agent/task.test.ts
@@ -4,11 +4,12 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { describe, it, expect, vi } from 'vitest';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { Task } from './task.js';
 import type { Config, ToolCallRequestInfo } from '@google/gemini-cli-core';
 import { createMockConfig } from '../utils/testing_utils.js';
 import type { ExecutionEventBus } from '@a2a-js/sdk/server';
+import type { ToolCall } from '@google/gemini-cli-core';
 
 describe('Task', () => {
   it('scheduleToolCalls should not modify the input requests array', async () => {
@@ -94,4 +95,122 @@ describe('Task', () => {
       );
     });
   });
+
+  describe('_schedulerToolCallsUpdate', () => {
+    let task: Task;
+    type SpyInstance = ReturnType<typeof vi.spyOn>;
+    let setTaskStateAndPublishUpdateSpy: SpyInstance;
+
+    beforeEach(() => {
+      const mockConfig = createMockConfig();
+      const mockEventBus: ExecutionEventBus = {
+        publish: vi.fn(),
+        on: vi.fn(),
+        off: vi.fn(),
+        once: vi.fn(),
+        removeAllListeners: vi.fn(),
+        finished: vi.fn(),
+      };
+
+      // @ts-expect-error - Calling private constructor
+      task = new Task(
+        'task-id',
+        'context-id',
+        mockConfig as Config,
+        mockEventBus,
+      );
+
+      // Spy on the method we want to check calls for
+      setTaskStateAndPublishUpdateSpy = vi.spyOn(
+        task,
+        'setTaskStateAndPublishUpdate',
+      );
+    });
+
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    it('should set state to input-required when a tool is awaiting approval and none are executing', () => {
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      // The last call should be the final state update
+      expect(setTaskStateAndPublishUpdateSpy).toHaveBeenLastCalledWith(
+        'input-required',
+        { kind: 'state-change' },
+        undefined,
+        undefined,
+        true, // final: true
+      );
+    });
+
+    it('should NOT set state to input-required if a tool is awaiting approval but another is executing', () => {
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+        { request: { callId: '2' }, status: 'executing' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      // It will be called for status updates, but not with final: true
+      const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+    });
+
+    it('should set state to input-required once an executing tool finishes, leaving one awaiting approval', () => {
+      const initialToolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+        { request: { callId: '2' }, status: 'executing' },
+      ] as ToolCall[];
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(initialToolCalls);
+
+      // No final call yet
+      let finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+
+      // Now, the executing tool finishes. The scheduler would call _resolveToolCall for it.
+      // @ts-expect-error - Calling private method
+      task._resolveToolCall('2');
+
+      // Then another update comes in for the awaiting tool (e.g., a re-check)
+      const subsequentToolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(subsequentToolCalls);
+
+      // NOW we should get the final call
+      finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeDefined();
+      expect(finalCall?.[0]).toBe('input-required');
+    });
+
+    it('should NOT set state to input-required if skipFinalTrueAfterInlineEdit is true', () => {
+      task.skipFinalTrueAfterInlineEdit = true;
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+    });
+  });
 });
diff --git a/packages/a2a-server/src/agent/task.ts b/packages/a2a-server/src/agent/task.ts
index a7b0e288c9..eee5e736d6 100644
--- a/packages/a2a-server/src/agent/task.ts
+++ b/packages/a2a-server/src/agent/task.ts
@@ -40,7 +40,6 @@ import type {
 import { v4 as uuidv4 } from 'uuid';
 import { logger } from '../utils/logger.js';
 import * as fs from 'node:fs';
-
 import { CoderAgentEvent } from '../types.js';
 import type {
   CoderAgentMessage,
@@ -373,11 +372,11 @@ export class Task {
 
       // Only send an update if the status has actually changed.
       if (hasChanged) {
-        const message = this.toolStatusMessage(tc, this.id, this.contextId);
         const coderAgentMessage: CoderAgentMessage =
           tc.status === 'awaiting_approval'
             ? { kind: CoderAgentEvent.ToolCallConfirmationEvent }
             : { kind: CoderAgentEvent.ToolCallUpdateEvent };
+        const message = this.toolStatusMessage(tc, this.id, this.contextId);
 
         const event = this._createStatusUpdateEvent(
           this.taskState,
@@ -404,20 +403,16 @@ export class Task {
     const isAwaitingApproval = allPendingStatuses.some(
       (status) => status === 'awaiting_approval',
     );
-    const allPendingAreStable = allPendingStatuses.every(
-      (status) =>
-        status === 'awaiting_approval' ||
-        status === 'success' ||
-        status === 'error' ||
-        status === 'cancelled',
+    const isExecuting = allPendingStatuses.some(
+      (status) => status === 'executing',
     );
 
-    // 1. Are any pending tool calls awaiting_approval
-    // 2. Are all pending tool calls in a stable state (i.e. not in validing or executing)
-    // 3. After an inline edit, the edited tool call will send awaiting_approval THEN scheduled. We wait for the next update in this case.
+    // The turn is complete and requires user input if at least one tool
+    // is waiting for the user's decision, and no other tool is actively
+    // running in the background.
     if (
       isAwaitingApproval &&
-      allPendingAreStable &&
+      !isExecuting &&
       !this.skipFinalTrueAfterInlineEdit
     ) {
       this.skipFinalTrueAfterInlineEdit = false;
diff --git a/packages/a2a-server/src/http/app.test.ts b/packages/a2a-server/src/http/app.test.ts
index 70d90f78cb..15b386bd3d 100644
--- a/packages/a2a-server/src/http/app.test.ts
+++ b/packages/a2a-server/src/http/app.test.ts
@@ -313,7 +313,7 @@ describe('E2E Tests', () => {
     expect(workingEvent.kind).toBe('status-update');
     expect(workingEvent.status.state).toBe('working');
 
-    // State Update: Validate each tool call
+    // State Update: Validate the first tool call
     const toolCallValidateEvent1 = events[3].result as TaskStatusUpdateEvent;
     expect(toolCallValidateEvent1.metadata?.['coderAgent']).toMatchObject({
       kind: 'tool-call-update',
@@ -326,47 +326,218 @@ describe('E2E Tests', () => {
         },
       },
     ]);
-    const toolCallValidateEvent2 = events[4].result as TaskStatusUpdateEvent;
-    expect(toolCallValidateEvent2.metadata?.['coderAgent']).toMatchObject({
+
+    // --- Assert the event stream ---
+    // 1. Initial "submitted" status.
+    expect((events[0].result as TaskStatusUpdateEvent).status.state).toBe(
+      'submitted',
+    );
+
+    // 2. "working" status after receiving the user prompt.
+    expect((events[1].result as TaskStatusUpdateEvent).status.state).toBe(
+      'working',
+    );
+
+    // 3. A "state-change" event from the agent.
+    expect(events[2].result.metadata?.['coderAgent']).toMatchObject({
+      kind: 'state-change',
+    });
+
+    // 4. Tool 1 is validating.
+    const toolCallUpdate1 = events[3].result as TaskStatusUpdateEvent;
+    expect(toolCallUpdate1.metadata?.['coderAgent']).toMatchObject({
       kind: 'tool-call-update',
     });
-    expect(toolCallValidateEvent2.status.message?.parts).toMatchObject([
+    expect(toolCallUpdate1.status.message?.parts).toMatchObject([
       {
         data: {
+          request: { callId: 'test-call-id-1' },
           status: 'validating',
-          request: { callId: 'test-call-id-2' },
         },
       },
     ]);
 
-    // State Update: Set each tool call to awaiting
-    const toolCallAwaitEvent1 = events[5].result as TaskStatusUpdateEvent;
-    expect(toolCallAwaitEvent1.metadata?.['coderAgent']).toMatchObject({
-      kind: 'tool-call-confirmation',
+    // 5. Tool 2 is validating.
+    const toolCallUpdate2 = events[4].result as TaskStatusUpdateEvent;
+    expect(toolCallUpdate2.metadata?.['coderAgent']).toMatchObject({
+      kind: 'tool-call-update',
     });
-    expect(toolCallAwaitEvent1.status.message?.parts).toMatchObject([
+    expect(toolCallUpdate2.status.message?.parts).toMatchObject([
       {
         data: {
-          status: 'awaiting_approval',
-          request: { callId: 'test-call-id-1' },
-        },
-      },
-    ]);
-    const toolCallAwaitEvent2 = events[6].result as TaskStatusUpdateEvent;
-    expect(toolCallAwaitEvent2.metadata?.['coderAgent']).toMatchObject({
-      kind: 'tool-call-confirmation',
-    });
-    expect(toolCallAwaitEvent2.status.message?.parts).toMatchObject([
-      {
-        data: {
-          status: 'awaiting_approval',
           request: { callId: 'test-call-id-2' },
+          status: 'validating',
         },
       },
     ]);
 
+    // 6. Tool 1 is awaiting approval.
+    const toolCallAwaitEvent = events[5].result as TaskStatusUpdateEvent;
+    expect(toolCallAwaitEvent.metadata?.['coderAgent']).toMatchObject({
+      kind: 'tool-call-confirmation',
+    });
+    expect(toolCallAwaitEvent.status.message?.parts).toMatchObject([
+      {
+        data: {
+          request: { callId: 'test-call-id-1' },
+          status: 'awaiting_approval',
+        },
+      },
+    ]);
+
+    // 7. The final event is "input-required".
+    const finalEvent = events[6].result as TaskStatusUpdateEvent;
+    expect(finalEvent.final).toBe(true);
+    expect(finalEvent.status.state).toBe('input-required');
+
+    // The scheduler now waits for approval, so no more events are sent.
+    assertUniqueFinalEventIsLast(events);
+    expect(events.length).toBe(7);
+  });
+
+  it('should handle multiple tool calls sequentially in YOLO mode', async () => {
+    // Set YOLO mode to auto-approve tools and test sequential execution.
+    getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO);
+
+    // First call yields the tool request
+    sendMessageStreamSpy.mockImplementationOnce(async function* () {
+      yield* [
+        {
+          type: GeminiEventType.ToolCallRequest,
+          value: {
+            callId: 'test-call-id-1',
+            name: 'test-tool-1',
+            args: {},
+          },
+        },
+        {
+          type: GeminiEventType.ToolCallRequest,
+          value: {
+            callId: 'test-call-id-2',
+            name: 'test-tool-2',
+            args: {},
+          },
+        },
+      ];
+    });
+    // Subsequent calls yield nothing, as the tools will "succeed".
+    sendMessageStreamSpy.mockImplementation(async function* () {
+      yield* [{ type: 'content', value: 'All tools executed.' }];
+    });
+
+    const mockTool1 = new MockTool({
+      name: 'test-tool-1',
+      displayName: 'Test Tool 1',
+      shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
+      execute: vi
+        .fn()
+        .mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }),
+    });
+    const mockTool2 = new MockTool({
+      name: 'test-tool-2',
+      displayName: 'Test Tool 2',
+      shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
+      execute: vi
+        .fn()
+        .mockResolvedValue({ llmContent: 'tool 2 done', returnDisplay: '' }),
+    });
+
+    getToolRegistrySpy.mockReturnValue({
+      getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]),
+      getToolsByServer: vi.fn().mockReturnValue([]),
+      getTool: vi.fn().mockImplementation((name: string) => {
+        if (name === 'test-tool-1') return mockTool1;
+        if (name === 'test-tool-2') return mockTool2;
+        return undefined;
+      }),
+    });
+
+    const agent = request.agent(app);
+    const res = await agent
+      .post('/')
+      .send(
+        createStreamMessageRequest(
+          'run two tools',
+          'a2a-multi-tool-test-message',
+        ),
+      )
+      .set('Content-Type', 'application/json')
+      .expect(200);
+
+    const events = streamToSSEEvents(res.text);
+    assertTaskCreationAndWorkingStatus(events);
+
+    // --- Assert the sequential execution flow ---
+    const eventStream = events.slice(2).map((e) => {
+      const update = e.result as TaskStatusUpdateEvent;
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const agentData = update.metadata?.['coderAgent'] as any;
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const toolData = update.status.message?.parts[0] as any;
+      if (!toolData) {
+        return { kind: agentData.kind };
+      }
+      return {
+        kind: agentData.kind,
+        status: toolData.data?.status,
+        callId: toolData.data?.request.callId,
+      };
+    });
+
+    const expectedFlow = [
+      // Initial state change
+      { kind: 'state-change', status: undefined, callId: undefined },
+      // Tool 1 Lifecycle
+      {
+        kind: 'tool-call-update',
+        status: 'validating',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'scheduled',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'executing',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'success',
+        callId: 'test-call-id-1',
+      },
+      // Tool 2 Lifecycle
+      {
+        kind: 'tool-call-update',
+        status: 'validating',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'scheduled',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'executing',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'success',
+        callId: 'test-call-id-2',
+      },
+      // Final updates
+      { kind: 'state-change', status: undefined, callId: undefined },
+      { kind: 'text-content', status: undefined, callId: undefined },
+    ];
+
+    // Use `toContainEqual` for flexibility if other events are interspersed.
+    expect(eventStream).toEqual(expect.arrayContaining(expectedFlow));
+
     assertUniqueFinalEventIsLast(events);
-    expect(events.length).toBe(8);
   });
 
   it('should handle tool calls that do not require approval', async () => {
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 14a596c9e1..37698a09b9 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -37,7 +37,7 @@ import {
 } from '@google/gemini-cli-core';
 import type { Part, PartListUnion } from '@google/genai';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
-import type { HistoryItem, SlashCommandProcessorResult } from '../types.js';
+import type { SlashCommandProcessorResult } from '../types.js';
 import { MessageType, StreamingState } from '../types.js';
 import type { LoadedSettings } from '../../config/settings.js';
 
@@ -231,8 +231,9 @@ describe('useGeminiStream', () => {
     mockUseReactToolScheduler.mockReturnValue([
       [], // Default to empty array for toolCalls
       mockScheduleToolCalls,
-      mockCancelAllToolCalls,
       mockMarkToolsAsSubmitted,
+      vi.fn(), // setToolCallsForDisplay
+      mockCancelAllToolCalls,
     ]);
 
     // Reset mocks for GeminiClient instance methods (startChat and sendMessageStream)
@@ -259,38 +260,71 @@ describe('useGeminiStream', () => {
     initialToolCalls: TrackedToolCall[] = [],
     geminiClient?: any,
   ) => {
-    let currentToolCalls = initialToolCalls;
-    const setToolCalls = (newToolCalls: TrackedToolCall[]) => {
-      currentToolCalls = newToolCalls;
-    };
-
-    mockUseReactToolScheduler.mockImplementation(() => [
-      currentToolCalls,
-      mockScheduleToolCalls,
-      mockCancelAllToolCalls,
-      mockMarkToolsAsSubmitted,
-    ]);
-
     const client = geminiClient || mockConfig.getGeminiClient();
 
+    const initialProps = {
+      client,
+      history: [],
+      addItem: mockAddItem as unknown as UseHistoryManagerReturn['addItem'],
+      config: mockConfig,
+      onDebugMessage: mockOnDebugMessage,
+      handleSlashCommand: mockHandleSlashCommand as unknown as (
+        cmd: PartListUnion,
+      ) => Promise<SlashCommandProcessorResult | false>,
+      shellModeActive: false,
+      loadedSettings: mockLoadedSettings,
+      toolCalls: initialToolCalls,
+    };
+
     const { result, rerender } = renderHook(
-      (props: {
-        client: any;
-        history: HistoryItem[];
-        addItem: UseHistoryManagerReturn['addItem'];
-        config: Config;
-        onDebugMessage: (message: string) => void;
-        handleSlashCommand: (
-          cmd: PartListUnion,
-        ) => Promise<SlashCommandProcessorResult | false>;
-        shellModeActive: boolean;
-        loadedSettings: LoadedSettings;
-        toolCalls?: TrackedToolCall[]; // Allow passing updated toolCalls
-      }) => {
-        // Update the mock's return value if new toolCalls are passed in props
-        if (props.toolCalls) {
-          setToolCalls(props.toolCalls);
-        }
+      (props: typeof initialProps) => {
+        // This mock needs to be stateful. When setToolCallsForDisplay is called,
+        // it should trigger a rerender with the new state.
+        const mockSetToolCallsForDisplay = vi.fn((updater) => {
+          const newToolCalls =
+            typeof updater === 'function' ? updater(props.toolCalls) : updater;
+          rerender({ ...props, toolCalls: newToolCalls });
+        });
+
+        // Create a stateful mock for cancellation that updates the toolCalls state.
+        const statefulCancelAllToolCalls = vi.fn((...args) => {
+          // Call the original spy so `toHaveBeenCalled` checks still work.
+          mockCancelAllToolCalls(...args);
+
+          const newToolCalls = props.toolCalls.map((tc) => {
+            // Only cancel tools that are in a cancellable state.
+            if (
+              tc.status === 'awaiting_approval' ||
+              tc.status === 'executing' ||
+              tc.status === 'scheduled' ||
+              tc.status === 'validating'
+            ) {
+              // A real cancelled tool call has a response object.
+              // We need to simulate this to avoid type errors downstream.
+              return {
+                ...tc,
+                status: 'cancelled',
+                response: {
+                  callId: tc.request.callId,
+                  responseParts: [],
+                  resultDisplay: 'Request cancelled.',
+                },
+                responseSubmittedToGemini: true, // Mark as "processed"
+              } as any as TrackedCancelledToolCall;
+            }
+            return tc;
+          });
+          rerender({ ...props, toolCalls: newToolCalls });
+        });
+
+        mockUseReactToolScheduler.mockImplementation(() => [
+          props.toolCalls,
+          mockScheduleToolCalls,
+          mockMarkToolsAsSubmitted,
+          mockSetToolCallsForDisplay,
+          statefulCancelAllToolCalls, // Use the stateful mock
+        ]);
+
         return useGeminiStream(
           props.client,
           props.history,
@@ -313,19 +347,7 @@ describe('useGeminiStream', () => {
         );
       },
       {
-        initialProps: {
-          client,
-          history: [],
-          addItem: mockAddItem as unknown as UseHistoryManagerReturn['addItem'],
-          config: mockConfig,
-          onDebugMessage: mockOnDebugMessage,
-          handleSlashCommand: mockHandleSlashCommand as unknown as (
-            cmd: PartListUnion,
-          ) => Promise<SlashCommandProcessorResult | false>,
-          shellModeActive: false,
-          loadedSettings: mockLoadedSettings,
-          toolCalls: initialToolCalls,
-        },
+        initialProps,
       },
     );
     return {
@@ -452,7 +474,7 @@ describe('useGeminiStream', () => {
 
     mockUseReactToolScheduler.mockImplementation((onComplete) => {
       capturedOnComplete = onComplete;
-      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted];
+      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted, vi.fn()];
     });
 
     renderHook(() =>
@@ -535,7 +557,7 @@ describe('useGeminiStream', () => {
 
     mockUseReactToolScheduler.mockImplementation((onComplete) => {
       capturedOnComplete = onComplete;
-      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted];
+      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted, vi.fn()];
     });
 
     renderHook(() =>
@@ -647,7 +669,7 @@ describe('useGeminiStream', () => {
 
     mockUseReactToolScheduler.mockImplementation((onComplete) => {
       capturedOnComplete = onComplete;
-      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted];
+      return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted, vi.fn()];
     });
 
     renderHook(() =>
@@ -760,6 +782,7 @@ describe('useGeminiStream', () => {
         currentToolCalls,
         mockScheduleToolCalls,
         mockMarkToolsAsSubmitted,
+        vi.fn(), // setToolCallsForDisplay
       ];
     });
 
@@ -797,6 +820,7 @@ describe('useGeminiStream', () => {
         completedToolCalls,
         mockScheduleToolCalls,
         mockMarkToolsAsSubmitted,
+        vi.fn(), // setToolCallsForDisplay
       ];
     });
 
@@ -1031,7 +1055,7 @@ describe('useGeminiStream', () => {
       expect(result.current.streamingState).toBe(StreamingState.Idle);
     });
 
-    it('should not cancel if a tool call is in progress (not just responding)', async () => {
+    it('should cancel if a tool call is in progress', async () => {
       const toolCalls: TrackedToolCall[] = [
         {
           request: { callId: 'call1', name: 'tool1', args: {} },
@@ -1052,7 +1076,6 @@ describe('useGeminiStream', () => {
         } as TrackedExecutingToolCall,
       ];
 
-      const abortSpy = vi.spyOn(AbortController.prototype, 'abort');
       const { result } = renderTestHook(toolCalls);
 
       // State is `Responding` because a tool is running
@@ -1061,8 +1084,71 @@ describe('useGeminiStream', () => {
       // Try to cancel
       simulateEscapeKeyPress();
 
-      // Nothing should happen because the state is not `Responding`
-      expect(abortSpy).not.toHaveBeenCalled();
+      // The cancel function should be called
+      expect(mockCancelAllToolCalls).toHaveBeenCalled();
+    });
+
+    it('should cancel a request when a tool is awaiting confirmation', async () => {
+      const mockOnConfirm = vi.fn().mockResolvedValue(undefined);
+      const toolCalls: TrackedToolCall[] = [
+        {
+          request: {
+            callId: 'confirm-call',
+            name: 'some_tool',
+            args: {},
+            isClientInitiated: false,
+            prompt_id: 'prompt-id-1',
+          },
+          status: 'awaiting_approval',
+          responseSubmittedToGemini: false,
+          tool: {
+            name: 'some_tool',
+            description: 'a tool',
+            build: vi.fn().mockImplementation((_) => ({
+              getDescription: () => `Mock description`,
+            })),
+          } as any,
+          invocation: {
+            getDescription: () => `Mock description`,
+          } as unknown as AnyToolInvocation,
+          confirmationDetails: {
+            type: 'edit',
+            title: 'Confirm Edit',
+            onConfirm: mockOnConfirm,
+            fileName: 'file.txt',
+            filePath: '/test/file.txt',
+            fileDiff: 'fake diff',
+            originalContent: 'old',
+            newContent: 'new',
+          },
+        } as TrackedWaitingToolCall,
+      ];
+
+      const { result } = renderTestHook(toolCalls);
+
+      // State is `WaitingForConfirmation` because a tool is awaiting approval
+      expect(result.current.streamingState).toBe(
+        StreamingState.WaitingForConfirmation,
+      );
+
+      // Try to cancel
+      simulateEscapeKeyPress();
+
+      // The imperative cancel function should be called on the scheduler
+      expect(mockCancelAllToolCalls).toHaveBeenCalled();
+
+      // A cancellation message should be added to history
+      await waitFor(() => {
+        expect(mockAddItem).toHaveBeenCalledWith(
+          expect.objectContaining({
+            text: 'Request cancelled.',
+          }),
+          expect.any(Number),
+        );
+      });
+
+      // The final state should be idle
+      expect(result.current.streamingState).toBe(StreamingState.Idle);
     });
   });
 
@@ -1282,7 +1368,7 @@ describe('useGeminiStream', () => {
 
       mockUseReactToolScheduler.mockImplementation((onComplete) => {
         capturedOnComplete = onComplete;
-        return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted];
+        return [[], mockScheduleToolCalls, mockMarkToolsAsSubmitted, vi.fn()];
       });
 
       renderHook(() =>
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index a0190a3c4b..ae3a23c7eb 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -111,6 +111,7 @@ export const useGeminiStream = (
   const [initError, setInitError] = useState<string | null>(null);
   const abortControllerRef = useRef<AbortController | null>(null);
   const turnCancelledRef = useRef(false);
+  const activeQueryIdRef = useRef<string | null>(null);
   const [isResponding, setIsResponding] = useState<boolean>(false);
   const [thought, setThought] = useState<ThoughtSummary | null>(null);
   const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
@@ -126,47 +127,55 @@ export const useGeminiStream = (
     return new GitService(config.getProjectRoot(), storage);
   }, [config, storage]);
 
-  const [toolCalls, scheduleToolCalls, markToolsAsSubmitted] =
-    useReactToolScheduler(
-      async (completedToolCallsFromScheduler) => {
-        // This onComplete is called when ALL scheduled tools for a given batch are done.
-        if (completedToolCallsFromScheduler.length > 0) {
-          // Add the final state of these tools to the history for display.
-          addItem(
-            mapTrackedToolCallsToDisplay(
-              completedToolCallsFromScheduler as TrackedToolCall[],
-            ),
-            Date.now(),
-          );
-
-          // Record tool calls with full metadata before sending responses.
-          try {
-            const currentModel =
-              config.getGeminiClient().getCurrentSequenceModel() ??
-              config.getModel();
-            config
-              .getGeminiClient()
-              .getChat()
-              .recordCompletedToolCalls(
-                currentModel,
-                completedToolCallsFromScheduler,
-              );
-          } catch (error) {
-            console.error(
-              `Error recording completed tool call information: ${error}`,
-            );
-          }
-
-          // Handle tool response submission immediately when tools complete
-          await handleCompletedTools(
+  const [
+    toolCalls,
+    scheduleToolCalls,
+    markToolsAsSubmitted,
+    setToolCallsForDisplay,
+    cancelAllToolCalls,
+  ] = useReactToolScheduler(
+    async (completedToolCallsFromScheduler) => {
+      // This onComplete is called when ALL scheduled tools for a given batch are done.
+      if (completedToolCallsFromScheduler.length > 0) {
+        // Add the final state of these tools to the history for display.
+        addItem(
+          mapTrackedToolCallsToDisplay(
             completedToolCallsFromScheduler as TrackedToolCall[],
+          ),
+          Date.now(),
+        );
+
+        // Clear the live-updating display now that the final state is in history.
+        setToolCallsForDisplay([]);
+
+        // Record tool calls with full metadata before sending responses.
+        try {
+          const currentModel =
+            config.getGeminiClient().getCurrentSequenceModel() ??
+            config.getModel();
+          config
+            .getGeminiClient()
+            .getChat()
+            .recordCompletedToolCalls(
+              currentModel,
+              completedToolCallsFromScheduler,
+            );
+        } catch (error) {
+          console.error(
+            `Error recording completed tool call information: ${error}`,
           );
         }
-      },
-      config,
-      getPreferredEditor,
-      onEditorClose,
-    );
+
+        // Handle tool response submission immediately when tools complete
+        await handleCompletedTools(
+          completedToolCallsFromScheduler as TrackedToolCall[],
+        );
+      }
+    },
+    config,
+    getPreferredEditor,
+    onEditorClose,
+  );
 
   const pendingToolCallGroupDisplay = useMemo(
     () =>
@@ -265,27 +274,54 @@ export const useGeminiStream = (
   }, [streamingState, config, history]);
 
   const cancelOngoingRequest = useCallback(() => {
-    if (streamingState !== StreamingState.Responding) {
+    if (
+      streamingState !== StreamingState.Responding &&
+      streamingState !== StreamingState.WaitingForConfirmation
+    ) {
       return;
     }
     if (turnCancelledRef.current) {
       return;
     }
     turnCancelledRef.current = true;
-    abortControllerRef.current?.abort();
+
+    // A full cancellation means no tools have produced a final result yet.
+    // This determines if we show a generic "Request cancelled" message.
+    const isFullCancellation = !toolCalls.some(
+      (tc) => tc.status === 'success' || tc.status === 'error',
+    );
+
+    // Ensure we have an abort controller, creating one if it doesn't exist.
+    if (!abortControllerRef.current) {
+      abortControllerRef.current = new AbortController();
+    }
+
+    // The order is important here.
+    // 1. Fire the signal to interrupt any active async operations.
+    abortControllerRef.current.abort();
+    // 2. Call the imperative cancel to clear the queue of pending tools.
+    cancelAllToolCalls(abortControllerRef.current.signal);
+
     if (pendingHistoryItemRef.current) {
       addItem(pendingHistoryItemRef.current, Date.now());
     }
-    addItem(
-      {
-        type: MessageType.INFO,
-        text: 'Request cancelled.',
-      },
-      Date.now(),
-    );
     setPendingHistoryItem(null);
+
+    // If it was a full cancellation, add the info message now.
+    // Otherwise, we let handleCompletedTools figure out the next step,
+    // which might involve sending partial results back to the model.
+    if (isFullCancellation) {
+      addItem(
+        {
+          type: MessageType.INFO,
+          text: 'Request cancelled.',
+        },
+        Date.now(),
+      );
+      setIsResponding(false);
+    }
+
     onCancelSubmit();
-    setIsResponding(false);
     setShellInputFocused(false);
   }, [
     streamingState,
@@ -294,6 +330,8 @@ export const useGeminiStream = (
     onCancelSubmit,
     pendingHistoryItemRef,
     setShellInputFocused,
+    cancelAllToolCalls,
+    toolCalls,
   ]);
 
   useKeypress(
@@ -302,7 +340,11 @@ export const useGeminiStream = (
         cancelOngoingRequest();
       }
     },
-    { isActive: streamingState === StreamingState.Responding },
+    {
+      isActive:
+        streamingState === StreamingState.Responding ||
+        streamingState === StreamingState.WaitingForConfirmation,
+    },
   );
 
   const prepareQueryForGemini = useCallback(
@@ -764,6 +806,8 @@ export const useGeminiStream = (
       options?: { isContinuation: boolean },
       prompt_id?: string,
     ) => {
+      const queryId = `${Date.now()}-${Math.random()}`;
+      activeQueryIdRef.current = queryId;
       if (
         (streamingState === StreamingState.Responding ||
           streamingState === StreamingState.WaitingForConfirmation) &&
@@ -901,7 +945,9 @@ export const useGeminiStream = (
             );
           }
         } finally {
-          setIsResponding(false);
+          if (activeQueryIdRef.current === queryId) {
+            setIsResponding(false);
+          }
         }
       });
     },
@@ -963,10 +1009,6 @@ export const useGeminiStream = (
 
   const handleCompletedTools = useCallback(
     async (completedToolCallsFromScheduler: TrackedToolCall[]) => {
-      if (isResponding) {
-        return;
-      }
-
       const completedAndReadyToSubmitTools =
         completedToolCallsFromScheduler.filter(
           (
@@ -1028,6 +1070,19 @@ export const useGeminiStream = (
       );
 
       if (allToolsCancelled) {
+        // If the turn was cancelled via the imperative escape key flow,
+        // the cancellation message is added there. We check the ref to avoid duplication.
+        if (!turnCancelledRef.current) {
+          addItem(
+            {
+              type: MessageType.INFO,
+              text: 'Request cancelled.',
+            },
+            Date.now(),
+          );
+        }
+        setIsResponding(false);
+
         if (geminiClient) {
           // We need to manually add the function responses to the history
           // so the model knows the tools were cancelled.
@@ -1074,12 +1129,12 @@ export const useGeminiStream = (
       );
     },
     [
-      isResponding,
       submitQuery,
       markToolsAsSubmitted,
       geminiClient,
       performMemoryRefresh,
       modelSwitchedFromQuotaError,
+      addItem,
     ],
   );
 
diff --git a/packages/cli/src/ui/hooks/useReactToolScheduler.ts b/packages/cli/src/ui/hooks/useReactToolScheduler.ts
index 883690d79a..2c7c8fc4df 100644
--- a/packages/cli/src/ui/hooks/useReactToolScheduler.ts
+++ b/packages/cli/src/ui/hooks/useReactToolScheduler.ts
@@ -62,12 +62,20 @@ export type TrackedToolCall =
   | TrackedCompletedToolCall
   | TrackedCancelledToolCall;
 
+export type CancelAllFn = (signal: AbortSignal) => void;
+
 export function useReactToolScheduler(
   onComplete: (tools: CompletedToolCall[]) => Promise<void>,
   config: Config,
   getPreferredEditor: () => EditorType | undefined,
   onEditorClose: () => void,
-): [TrackedToolCall[], ScheduleFn, MarkToolsAsSubmittedFn] {
+): [
+  TrackedToolCall[],
+  ScheduleFn,
+  MarkToolsAsSubmittedFn,
+  React.Dispatch<React.SetStateAction<TrackedToolCall[]>>,
+  CancelAllFn,
+] {
   const [toolCallsForDisplay, setToolCallsForDisplay] = useState<
     TrackedToolCall[]
   >([]);
@@ -112,37 +120,36 @@ export function useReactToolScheduler(
   );
 
   const toolCallsUpdateHandler: ToolCallsUpdateHandler = useCallback(
-    (updatedCoreToolCalls: ToolCall[]) => {
-      setToolCallsForDisplay((prevTrackedCalls) =>
-        updatedCoreToolCalls.map((coreTc) => {
-          const existingTrackedCall = prevTrackedCalls.find(
-            (ptc) => ptc.request.callId === coreTc.request.callId,
-          );
-          // Start with the new core state, then layer on the existing UI state
-          // to ensure UI-only properties like pid are preserved.
+    (allCoreToolCalls: ToolCall[]) => {
+      setToolCallsForDisplay((prevTrackedCalls) => {
+        const prevCallsMap = new Map(
+          prevTrackedCalls.map((c) => [c.request.callId, c]),
+        );
+
+        return allCoreToolCalls.map((coreTc): TrackedToolCall => {
+          const existingTrackedCall = prevCallsMap.get(coreTc.request.callId);
+
           const responseSubmittedToGemini =
             existingTrackedCall?.responseSubmittedToGemini ?? false;
 
           if (coreTc.status === 'executing') {
+            // Preserve live output if it exists from a previous render.
+            const liveOutput = (existingTrackedCall as TrackedExecutingToolCall)
+              ?.liveOutput;
             return {
               ...coreTc,
               responseSubmittedToGemini,
-              liveOutput: (existingTrackedCall as TrackedExecutingToolCall)
-                ?.liveOutput,
+              liveOutput,
               pid: (coreTc as ExecutingToolCall).pid,
             };
+          } else {
+            return {
+              ...coreTc,
+              responseSubmittedToGemini,
+            };
           }
-
-          // For other statuses, explicitly set liveOutput and pid to undefined
-          // to ensure they are not carried over from a previous executing state.
-          return {
-            ...coreTc,
-            responseSubmittedToGemini,
-            liveOutput: undefined,
-            pid: undefined,
-          };
-        }),
-      );
+        });
+      });
     },
     [setToolCallsForDisplay],
   );
@@ -178,9 +185,10 @@ export function useReactToolScheduler(
       request: ToolCallRequestInfo | ToolCallRequestInfo[],
       signal: AbortSignal,
     ) => {
+      setToolCallsForDisplay([]);
       void scheduler.schedule(request, signal);
     },
-    [scheduler],
+    [scheduler, setToolCallsForDisplay],
   );
 
   const markToolsAsSubmitted: MarkToolsAsSubmittedFn = useCallback(
@@ -196,7 +204,20 @@ export function useReactToolScheduler(
     [],
   );
 
-  return [toolCallsForDisplay, schedule, markToolsAsSubmitted];
+  const cancelAllToolCalls = useCallback(
+    (signal: AbortSignal) => {
+      scheduler.cancelAll(signal);
+    },
+    [scheduler],
+  );
+
+  return [
+    toolCallsForDisplay,
+    schedule,
+    markToolsAsSubmitted,
+    setToolCallsForDisplay,
+    cancelAllToolCalls,
+  ];
 }
 
 /**
diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
index d80f8eceb2..11d1b7e7d8 100644
--- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
@@ -260,9 +260,15 @@ describe('useReactToolScheduler', () => {
       args: { param: 'value' },
     } as any;
 
+    let completedToolCalls: ToolCall[] = [];
+    onComplete.mockImplementation((calls) => {
+      completedToolCalls = calls;
+    });
+
     act(() => {
       schedule(request, new AbortController().signal);
     });
+
     await act(async () => {
       await vi.runAllTimersAsync();
     });
@@ -292,7 +298,110 @@ describe('useReactToolScheduler', () => {
         }),
       }),
     ]);
-    expect(result.current[0]).toEqual([]);
+    expect(completedToolCalls).toHaveLength(1);
+    expect(completedToolCalls[0].status).toBe('success');
+    expect(completedToolCalls[0].request).toBe(request);
+  });
+
+  it('should clear previous tool calls when scheduling new ones', async () => {
+    mockToolRegistry.getTool.mockReturnValue(mockTool);
+    (mockTool.execute as Mock).mockResolvedValue({
+      llmContent: 'Tool output',
+      returnDisplay: 'Formatted tool output',
+    } as ToolResult);
+
+    const { result } = renderScheduler();
+    const schedule = result.current[1];
+    const setToolCallsForDisplay = result.current[3];
+
+    // Manually set a tool call in the display.
+    const oldToolCall = {
+      request: { callId: 'oldCall' },
+      status: 'success',
+    } as any;
+    act(() => {
+      setToolCallsForDisplay([oldToolCall]);
+    });
+    expect(result.current[0]).toEqual([oldToolCall]);
+
+    const newRequest: ToolCallRequestInfo = {
+      callId: 'newCall',
+      name: 'mockTool',
+      args: {},
+    } as any;
+    act(() => {
+      schedule(newRequest, new AbortController().signal);
+    });
+
+    // After scheduling, the old call should be gone,
+    // and the new one should be in the display in its initial state.
+    expect(result.current[0].length).toBe(1);
+    expect(result.current[0][0].request.callId).toBe('newCall');
+    expect(result.current[0][0].request.callId).not.toBe('oldCall');
+
+    // Let the new call finish.
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    });
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    });
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    });
+    expect(onComplete).toHaveBeenCalled();
+  });
+
+  it('should cancel all running tool calls', async () => {
+    mockToolRegistry.getTool.mockReturnValue(mockTool);
+
+    let resolveExecute: (value: ToolResult) => void = () => {};
+    const executePromise = new Promise<ToolResult>((resolve) => {
+      resolveExecute = resolve;
+    });
+    (mockTool.execute as Mock).mockReturnValue(executePromise);
+    (mockTool.shouldConfirmExecute as Mock).mockResolvedValue(null);
+
+    const { result } = renderScheduler();
+    const schedule = result.current[1];
+    const cancelAllToolCalls = result.current[4];
+    const request: ToolCallRequestInfo = {
+      callId: 'cancelCall',
+      name: 'mockTool',
+      args: {},
+    } as any;
+
+    act(() => {
+      schedule(request, new AbortController().signal);
+    });
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    }); // validation
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    }); // scheduling
+
+    // At this point, the tool is 'executing' and waiting on the promise.
+    expect(result.current[0][0].status).toBe('executing');
+
+    const cancelController = new AbortController();
+    act(() => {
+      cancelAllToolCalls(cancelController.signal);
+    });
+
+    await act(async () => {
+      await vi.runAllTimersAsync();
+    });
+
+    expect(onComplete).toHaveBeenCalledWith([
+      expect.objectContaining({
+        status: 'cancelled',
+        request,
+      }),
+    ]);
+
+    // Clean up the pending promise to avoid open handles.
+    resolveExecute({ llmContent: 'output', returnDisplay: 'display' });
   });
 
   it('should handle tool not found', async () => {
@@ -305,6 +414,11 @@ describe('useReactToolScheduler', () => {
       args: {},
     } as any;
 
+    let completedToolCalls: ToolCall[] = [];
+    onComplete.mockImplementation((calls) => {
+      completedToolCalls = calls;
+    });
+
     act(() => {
       schedule(request, new AbortController().signal);
     });
@@ -315,24 +429,15 @@ describe('useReactToolScheduler', () => {
       await vi.runAllTimersAsync();
     });
 
-    expect(onComplete).toHaveBeenCalledWith([
-      expect.objectContaining({
-        status: 'error',
-        request,
-        response: expect.objectContaining({
-          error: expect.objectContaining({
-            message: expect.stringMatching(
-              /Tool "nonexistentTool" not found in registry/,
-            ),
-          }),
-        }),
-      }),
-    ]);
-    const errorMessage = onComplete.mock.calls[0][0][0].response.error.message;
-    expect(errorMessage).toContain('Did you mean one of:');
-    expect(errorMessage).toContain('"mockTool"');
-    expect(errorMessage).toContain('"anotherTool"');
-    expect(result.current[0]).toEqual([]);
+    expect(completedToolCalls).toHaveLength(1);
+    expect(completedToolCalls[0].status).toBe('error');
+    expect(completedToolCalls[0].request).toBe(request);
+    expect((completedToolCalls[0] as any).response.error.message).toContain(
+      'Tool "nonexistentTool" not found in registry',
+    );
+    expect((completedToolCalls[0] as any).response.error.message).toContain(
+      'Did you mean one of:',
+    );
   });
 
   it('should handle error during shouldConfirmExecute', async () => {
@@ -348,6 +453,11 @@ describe('useReactToolScheduler', () => {
       args: {},
     } as any;
 
+    let completedToolCalls: ToolCall[] = [];
+    onComplete.mockImplementation((calls) => {
+      completedToolCalls = calls;
+    });
+
     act(() => {
       schedule(request, new AbortController().signal);
     });
@@ -358,16 +468,10 @@ describe('useReactToolScheduler', () => {
       await vi.runAllTimersAsync();
     });
 
-    expect(onComplete).toHaveBeenCalledWith([
-      expect.objectContaining({
-        status: 'error',
-        request,
-        response: expect.objectContaining({
-          error: confirmError,
-        }),
-      }),
-    ]);
-    expect(result.current[0]).toEqual([]);
+    expect(completedToolCalls).toHaveLength(1);
+    expect(completedToolCalls[0].status).toBe('error');
+    expect(completedToolCalls[0].request).toBe(request);
+    expect((completedToolCalls[0] as any).response.error).toBe(confirmError);
   });
 
   it('should handle error during execute', async () => {
@@ -384,6 +488,11 @@ describe('useReactToolScheduler', () => {
       args: {},
     } as any;
 
+    let completedToolCalls: ToolCall[] = [];
+    onComplete.mockImplementation((calls) => {
+      completedToolCalls = calls;
+    });
+
     act(() => {
       schedule(request, new AbortController().signal);
     });
@@ -397,16 +506,10 @@ describe('useReactToolScheduler', () => {
       await vi.runAllTimersAsync();
     });
 
-    expect(onComplete).toHaveBeenCalledWith([
-      expect.objectContaining({
-        status: 'error',
-        request,
-        response: expect.objectContaining({
-          error: execError,
-        }),
-      }),
-    ]);
-    expect(result.current[0]).toEqual([]);
+    expect(completedToolCalls).toHaveLength(1);
+    expect(completedToolCalls[0].status).toBe('error');
+    expect(completedToolCalls[0].request).toBe(request);
+    expect((completedToolCalls[0] as any).response.error).toBe(execError);
   });
 
   it('should handle tool requiring confirmation - approved', async () => {
@@ -518,7 +621,7 @@ describe('useReactToolScheduler', () => {
               functionResponse: expect.objectContaining({
                 response: expect.objectContaining({
                   error:
-                    '[Operation Cancelled] Reason: User did not allow tool call',
+                    '[Operation Cancelled] Reason: User cancelled the operation.',
                 }),
               }),
             }),
@@ -705,7 +808,9 @@ describe('useReactToolScheduler', () => {
         ],
       }),
     });
-    expect(result.current[0]).toEqual([]);
+
+    expect(completedCalls).toHaveLength(2);
+    expect(completedCalls.every((t) => t.status === 'success')).toBe(true);
   });
 
   it('should queue if scheduling while already running', async () => {
@@ -774,7 +879,8 @@ describe('useReactToolScheduler', () => {
         response: expect.objectContaining({ resultDisplay: 'done display' }),
       }),
     ]);
-    expect(result.current[0]).toEqual([]);
+    const toolCalls = result.current[0];
+    expect(toolCalls).toHaveLength(0);
   });
 });
 
diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts
index e1e6aa2430..7dbf8021b8 100644
--- a/packages/core/src/core/coreToolScheduler.test.ts
+++ b/packages/core/src/core/coreToolScheduler.test.ts
@@ -288,6 +288,263 @@ describe('CoreToolScheduler', () => {
     expect(completedCalls[0].status).toBe('cancelled');
   });
 
+  it('should cancel all tools when cancelAll is called', async () => {
+    const mockTool1 = new MockTool({
+      name: 'mockTool1',
+      shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE,
+    });
+    const mockTool2 = new MockTool({ name: 'mockTool2' });
+    const mockTool3 = new MockTool({ name: 'mockTool3' });
+
+    const mockToolRegistry = {
+      getTool: (name: string) => {
+        if (name === 'mockTool1') return mockTool1;
+        if (name === 'mockTool2') return mockTool2;
+        if (name === 'mockTool3') return mockTool3;
+        return undefined;
+      },
+      getFunctionDeclarations: () => [],
+      tools: new Map(),
+      discovery: {},
+      registerTool: () => {},
+      getToolByName: (name: string) => {
+        if (name === 'mockTool1') return mockTool1;
+        if (name === 'mockTool2') return mockTool2;
+        if (name === 'mockTool3') return mockTool3;
+        return undefined;
+      },
+      getToolByDisplayName: () => undefined,
+      getTools: () => [],
+      discoverTools: async () => {},
+      getAllTools: () => [],
+      getToolsByServer: () => [],
+    } as unknown as ToolRegistry;
+
+    const onAllToolCallsComplete = vi.fn();
+    const onToolCallsUpdate = vi.fn();
+
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getUsageStatisticsEnabled: () => true,
+      getDebugMode: () => false,
+      getApprovalMode: () => ApprovalMode.DEFAULT,
+      getAllowedTools: () => [],
+      getContentGeneratorConfig: () => ({
+        model: 'test-model',
+        authType: 'oauth-personal',
+      }),
+      getShellExecutionConfig: () => ({
+        terminalWidth: 90,
+        terminalHeight: 30,
+      }),
+      storage: {
+        getProjectTempDir: () => '/tmp',
+      },
+      getTruncateToolOutputThreshold: () =>
+        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
+      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
+      getToolRegistry: () => mockToolRegistry,
+      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
+      getGeminiClient: () => null, // No client needed for these tests
+      getEnableMessageBusIntegration: () => false,
+      getMessageBus: () => null,
+      getPolicyEngine: () => null,
+    } as unknown as Config;
+
+    const scheduler = new CoreToolScheduler({
+      config: mockConfig,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+      getPreferredEditor: () => 'vscode',
+      onEditorClose: vi.fn(),
+    });
+
+    const abortController = new AbortController();
+    const requests = [
+      {
+        callId: '1',
+        name: 'mockTool1',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+      {
+        callId: '2',
+        name: 'mockTool2',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+      {
+        callId: '3',
+        name: 'mockTool3',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+    ];
+
+    // Don't await, let it run in the background
+    void scheduler.schedule(requests, abortController.signal);
+
+    // Wait for the first tool to be awaiting approval
+    await waitForStatus(onToolCallsUpdate, 'awaiting_approval');
+
+    // Cancel all operations
+    scheduler.cancelAll(abortController.signal);
+    abortController.abort(); // Also fire the signal
+
+    await vi.waitFor(() => {
+      expect(onAllToolCallsComplete).toHaveBeenCalled();
+    });
+
+    const completedCalls = onAllToolCallsComplete.mock
+      .calls[0][0] as ToolCall[];
+
+    expect(completedCalls).toHaveLength(3);
+    expect(completedCalls.find((c) => c.request.callId === '1')?.status).toBe(
+      'cancelled',
+    );
+    expect(completedCalls.find((c) => c.request.callId === '2')?.status).toBe(
+      'cancelled',
+    );
+    expect(completedCalls.find((c) => c.request.callId === '3')?.status).toBe(
+      'cancelled',
+    );
+  });
+
+  it('should cancel all tools in a batch when one is cancelled via confirmation', async () => {
+    const mockTool1 = new MockTool({
+      name: 'mockTool1',
+      shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE,
+    });
+    const mockTool2 = new MockTool({ name: 'mockTool2' });
+    const mockTool3 = new MockTool({ name: 'mockTool3' });
+
+    const mockToolRegistry = {
+      getTool: (name: string) => {
+        if (name === 'mockTool1') return mockTool1;
+        if (name === 'mockTool2') return mockTool2;
+        if (name === 'mockTool3') return mockTool3;
+        return undefined;
+      },
+      getFunctionDeclarations: () => [],
+      tools: new Map(),
+      discovery: {},
+      registerTool: () => {},
+      getToolByName: (name: string) => {
+        if (name === 'mockTool1') return mockTool1;
+        if (name === 'mockTool2') return mockTool2;
+        if (name === 'mockTool3') return mockTool3;
+        return undefined;
+      },
+      getToolByDisplayName: () => undefined,
+      getTools: () => [],
+      discoverTools: async () => {},
+      getAllTools: () => [],
+      getToolsByServer: () => [],
+    } as unknown as ToolRegistry;
+
+    const onAllToolCallsComplete = vi.fn();
+    const onToolCallsUpdate = vi.fn();
+
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getUsageStatisticsEnabled: () => true,
+      getDebugMode: () => false,
+      getApprovalMode: () => ApprovalMode.DEFAULT,
+      getAllowedTools: () => [],
+      getContentGeneratorConfig: () => ({
+        model: 'test-model',
+        authType: 'oauth-personal',
+      }),
+      getShellExecutionConfig: () => ({
+        terminalWidth: 90,
+        terminalHeight: 30,
+      }),
+      storage: {
+        getProjectTempDir: () => '/tmp',
+      },
+      getTruncateToolOutputThreshold: () =>
+        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
+      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
+      getToolRegistry: () => mockToolRegistry,
+      getUseSmartEdit: () => false,
+      getUseModelRouter: () => false,
+      getGeminiClient: () => null, // No client needed for these tests
+      getEnableMessageBusIntegration: () => false,
+      getMessageBus: () => null,
+      getPolicyEngine: () => null,
+    } as unknown as Config;
+
+    const scheduler = new CoreToolScheduler({
+      config: mockConfig,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+      getPreferredEditor: () => 'vscode',
+      onEditorClose: vi.fn(),
+    });
+
+    const abortController = new AbortController();
+    const requests = [
+      {
+        callId: '1',
+        name: 'mockTool1',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+      {
+        callId: '2',
+        name: 'mockTool2',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+      {
+        callId: '3',
+        name: 'mockTool3',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-id-1',
+      },
+    ];
+
+    // Don't await, let it run in the background
+    void scheduler.schedule(requests, abortController.signal);
+
+    // Wait for the first tool to be awaiting approval
+    const awaitingCall = (await waitForStatus(
+      onToolCallsUpdate,
+      'awaiting_approval',
+    )) as WaitingToolCall;
+
+    // Cancel the first tool via its confirmation handler
+    await awaitingCall.confirmationDetails.onConfirm(
+      ToolConfirmationOutcome.Cancel,
+    );
+    abortController.abort(); // User cancelling often involves an abort signal
+
+    await vi.waitFor(() => {
+      expect(onAllToolCallsComplete).toHaveBeenCalled();
+    });
+
+    const completedCalls = onAllToolCallsComplete.mock
+      .calls[0][0] as ToolCall[];
+
+    expect(completedCalls).toHaveLength(3);
+    expect(completedCalls.find((c) => c.request.callId === '1')?.status).toBe(
+      'cancelled',
+    );
+    expect(completedCalls.find((c) => c.request.callId === '2')?.status).toBe(
+      'cancelled',
+    );
+    expect(completedCalls.find((c) => c.request.callId === '3')?.status).toBe(
+      'cancelled',
+    );
+  });
+
   it('should mark tool call as cancelled when abort happens during confirmation error', async () => {
     const abortController = new AbortController();
     const abortError = new Error('Abort requested during confirmation');
@@ -1510,16 +1767,19 @@ describe('CoreToolScheduler request queueing', () => {
 
     await scheduler.schedule(requests, abortController.signal);
 
-    // Wait for all tools to be awaiting approval
+    // Wait for the FIRST tool to be awaiting approval
     await vi.waitFor(() => {
       const calls = onToolCallsUpdate.mock.calls.at(-1)?.[0] as ToolCall[];
+      // With the sequential scheduler, the update includes the active call and the queue.
       expect(calls?.length).toBe(3);
-      expect(calls?.every((call) => call.status === 'awaiting_approval')).toBe(
-        true,
-      );
+      expect(calls?.[0].status).toBe('awaiting_approval');
+      expect(calls?.[0].request.callId).toBe('1');
+      // Check that the other two are in the queue (still in 'validating' state)
+      expect(calls?.[1].status).toBe('validating');
+      expect(calls?.[2].status).toBe('validating');
     });
 
-    expect(pendingConfirmations.length).toBe(3);
+    expect(pendingConfirmations.length).toBe(1);
 
     // Approve the first tool with ProceedAlways
     const firstConfirmation = pendingConfirmations[0];
@@ -1528,15 +1788,16 @@ describe('CoreToolScheduler request queueing', () => {
     // Wait for all tools to be completed
     await vi.waitFor(() => {
       expect(onAllToolCallsComplete).toHaveBeenCalled();
-      const completedCalls = onAllToolCallsComplete.mock.calls.at(
-        -1,
-      )?.[0] as ToolCall[];
-      expect(completedCalls?.length).toBe(3);
-      expect(completedCalls?.every((call) => call.status === 'success')).toBe(
-        true,
-      );
     });
 
+    const completedCalls = onAllToolCallsComplete.mock.calls.at(
+      -1,
+    )?.[0] as ToolCall[];
+    expect(completedCalls?.length).toBe(3);
+    expect(completedCalls?.every((call) => call.status === 'success')).toBe(
+      true,
+    );
+
     // Verify approval mode was changed
     expect(approvalMode).toBe(ApprovalMode.AUTO_EDIT);
   });
@@ -1788,11 +2049,10 @@ describe('CoreToolScheduler Sequential Execution', () => {
       expect(onAllToolCallsComplete).toHaveBeenCalled();
     });
 
-    // Check that execute was called for all three tools initially
-    expect(executeFn).toHaveBeenCalledTimes(3);
+    // Check that execute was called for the first two tools only
+    expect(executeFn).toHaveBeenCalledTimes(2);
     expect(executeFn).toHaveBeenCalledWith({ call: 1 });
     expect(executeFn).toHaveBeenCalledWith({ call: 2 });
-    expect(executeFn).toHaveBeenCalledWith({ call: 3 });
 
     const completedCalls = onAllToolCallsComplete.mock
       .calls[0][0] as ToolCall[];
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index 5c1cb58fb7..a59de8698e 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -348,12 +348,15 @@ export class CoreToolScheduler {
   private onEditorClose: () => void;
   private isFinalizingToolCalls = false;
   private isScheduling = false;
+  private isCancelling = false;
   private requestQueue: Array<{
     request: ToolCallRequestInfo | ToolCallRequestInfo[];
     signal: AbortSignal;
     resolve: () => void;
     reject: (reason?: Error) => void;
   }> = [];
+  private toolCallQueue: ToolCall[] = [];
+  private completedToolCallsForBatch: CompletedToolCall[] = [];
 
   constructor(options: CoreToolSchedulerOptions) {
     this.config = options.config;
@@ -398,30 +401,36 @@ export class CoreToolScheduler {
   private setStatusInternal(
     targetCallId: string,
     status: 'success',
+    signal: AbortSignal,
     response: ToolCallResponseInfo,
   ): void;
   private setStatusInternal(
     targetCallId: string,
     status: 'awaiting_approval',
+    signal: AbortSignal,
     confirmationDetails: ToolCallConfirmationDetails,
   ): void;
   private setStatusInternal(
     targetCallId: string,
     status: 'error',
+    signal: AbortSignal,
     response: ToolCallResponseInfo,
   ): void;
   private setStatusInternal(
     targetCallId: string,
     status: 'cancelled',
+    signal: AbortSignal,
     reason: string,
   ): void;
   private setStatusInternal(
     targetCallId: string,
     status: 'executing' | 'scheduled' | 'validating',
+    signal: AbortSignal,
   ): void;
   private setStatusInternal(
     targetCallId: string,
     newStatus: Status,
+    signal: AbortSignal,
     auxiliaryData?: unknown,
   ): void {
     this.toolCalls = this.toolCalls.map((currentCall) => {
@@ -561,7 +570,6 @@ export class CoreToolScheduler {
       }
     });
     this.notifyToolCallsUpdate();
-    this.checkAndNotifyCompletion();
   }
 
   private setArgsInternal(targetCallId: string, args: unknown): void {
@@ -692,11 +700,43 @@ export class CoreToolScheduler {
     return this._schedule(request, signal);
   }
 
+  cancelAll(signal: AbortSignal): void {
+    if (this.isCancelling) {
+      return;
+    }
+    this.isCancelling = true;
+    // Cancel the currently active tool call, if there is one.
+    if (this.toolCalls.length > 0) {
+      const activeCall = this.toolCalls[0];
+      // Only cancel if it's in a cancellable state.
+      if (
+        activeCall.status === 'awaiting_approval' ||
+        activeCall.status === 'executing' ||
+        activeCall.status === 'scheduled' ||
+        activeCall.status === 'validating'
+      ) {
+        this.setStatusInternal(
+          activeCall.request.callId,
+          'cancelled',
+          signal,
+          'User cancelled the operation.',
+        );
+      }
+    }
+
+    // Clear the queue and mark all queued items as cancelled for completion reporting.
+    this._cancelAllQueuedCalls();
+
+    // Finalize the batch immediately.
+    void this.checkAndNotifyCompletion(signal);
+  }
+
   private async _schedule(
     request: ToolCallRequestInfo | ToolCallRequestInfo[],
     signal: AbortSignal,
   ): Promise<void> {
     this.isScheduling = true;
+    this.isCancelling = false;
     try {
       if (this.isRunning()) {
         throw new Error(
@@ -704,6 +744,7 @@ export class CoreToolScheduler {
         );
       }
       const requestsToProcess = Array.isArray(request) ? request : [request];
+      this.completedToolCallsForBatch = [];
 
       const newToolCalls: ToolCall[] = requestsToProcess.map(
         (reqInfo): ToolCall => {
@@ -753,45 +794,74 @@ export class CoreToolScheduler {
         },
       );
 
-      this.toolCalls = this.toolCalls.concat(newToolCalls);
-      this.notifyToolCallsUpdate();
+      this.toolCallQueue.push(...newToolCalls);
+      await this._processNextInQueue(signal);
+    } finally {
+      this.isScheduling = false;
+    }
+  }
 
-      for (const toolCall of newToolCalls) {
-        if (toolCall.status !== 'validating') {
-          continue;
+  private async _processNextInQueue(signal: AbortSignal): Promise<void> {
+    // If there's already a tool being processed, or the queue is empty, stop.
+    if (this.toolCalls.length > 0 || this.toolCallQueue.length === 0) {
+      return;
+    }
+
+    // If cancellation happened between steps, handle it.
+    if (signal.aborted) {
+      this._cancelAllQueuedCalls();
+      // Finalize the batch.
+      await this.checkAndNotifyCompletion(signal);
+      return;
+    }
+
+    const toolCall = this.toolCallQueue.shift()!;
+
+    // This is now the single active tool call.
+    this.toolCalls = [toolCall];
+    this.notifyToolCallsUpdate();
+
+    // Handle tools that were already errored during creation.
+    if (toolCall.status === 'error') {
+      // An error during validation means this "active" tool is already complete.
+      // We need to check for batch completion to either finish or process the next in queue.
+      await this.checkAndNotifyCompletion(signal);
+      return;
+    }
+
+    // This logic is moved from the old `for` loop in `_schedule`.
+    if (toolCall.status === 'validating') {
+      const { request: reqInfo, invocation } = toolCall;
+
+      try {
+        if (signal.aborted) {
+          this.setStatusInternal(
+            reqInfo.callId,
+            'cancelled',
+            signal,
+            'Tool call cancelled by user.',
+          );
+          // The completion check will handle the cascade.
+          await this.checkAndNotifyCompletion(signal);
+          return;
         }
 
-        const validatingCall = toolCall as ValidatingToolCall;
-        const { request: reqInfo, invocation } = validatingCall;
+        const confirmationDetails =
+          await invocation.shouldConfirmExecute(signal);
 
-        try {
-          if (signal.aborted) {
-            this.setStatusInternal(
-              reqInfo.callId,
-              'cancelled',
-              'Tool call cancelled by user.',
-            );
-            continue;
-          }
-
-          const confirmationDetails =
-            await invocation.shouldConfirmExecute(signal);
-
-          if (!confirmationDetails) {
+        if (!confirmationDetails) {
+          this.setToolCallOutcome(
+            reqInfo.callId,
+            ToolConfirmationOutcome.ProceedAlways,
+          );
+          this.setStatusInternal(reqInfo.callId, 'scheduled', signal);
+        } else {
+          if (this.isAutoApproved(toolCall)) {
             this.setToolCallOutcome(
               reqInfo.callId,
               ToolConfirmationOutcome.ProceedAlways,
             );
-            this.setStatusInternal(reqInfo.callId, 'scheduled');
-            continue;
-          }
-
-          if (this.isAutoApproved(validatingCall)) {
-            this.setToolCallOutcome(
-              reqInfo.callId,
-              ToolConfirmationOutcome.ProceedAlways,
-            );
-            this.setStatusInternal(reqInfo.callId, 'scheduled');
+            this.setStatusInternal(reqInfo.callId, 'scheduled', signal);
           } else {
             // Allow IDE to resolve confirmation
             if (
@@ -835,35 +905,36 @@ export class CoreToolScheduler {
             this.setStatusInternal(
               reqInfo.callId,
               'awaiting_approval',
+              signal,
               wrappedConfirmationDetails,
             );
           }
-        } catch (error) {
-          if (signal.aborted) {
-            this.setStatusInternal(
-              reqInfo.callId,
-              'cancelled',
-              'Tool call cancelled by user.',
-            );
-            continue;
-          }
-
+        }
+      } catch (error) {
+        if (signal.aborted) {
+          this.setStatusInternal(
+            reqInfo.callId,
+            'cancelled',
+            signal,
+            'Tool call cancelled by user.',
+          );
+          await this.checkAndNotifyCompletion(signal);
+        } else {
           this.setStatusInternal(
             reqInfo.callId,
             'error',
+            signal,
             createErrorResponse(
               reqInfo,
               error instanceof Error ? error : new Error(String(error)),
               ToolErrorType.UNHANDLED_EXCEPTION,
             ),
           );
+          await this.checkAndNotifyCompletion(signal);
         }
       }
-      await this.attemptExecutionOfScheduledCalls(signal);
-      void this.checkAndNotifyCompletion();
-    } finally {
-      this.isScheduling = false;
     }
+    await this.attemptExecutionOfScheduledCalls(signal);
   }
 
   async handleConfirmationResponse(
@@ -881,18 +952,12 @@ export class CoreToolScheduler {
       await originalOnConfirm(outcome);
     }
 
-    if (outcome === ToolConfirmationOutcome.ProceedAlways) {
-      await this.autoApproveCompatiblePendingTools(signal, callId);
-    }
-
     this.setToolCallOutcome(callId, outcome);
 
     if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) {
-      this.setStatusInternal(
-        callId,
-        'cancelled',
-        'User did not allow tool call',
-      );
+      // Instead of just cancelling one tool, trigger the full cancel cascade.
+      this.cancelAll(signal);
+      return; // `cancelAll` calls `checkAndNotifyCompletion`, so we can exit here.
     } else if (outcome === ToolConfirmationOutcome.ModifyWithEditor) {
       const waitingToolCall = toolCall as WaitingToolCall;
       if (isModifiableDeclarativeTool(waitingToolCall.tool)) {
@@ -902,7 +967,7 @@ export class CoreToolScheduler {
           return;
         }
 
-        this.setStatusInternal(callId, 'awaiting_approval', {
+        this.setStatusInternal(callId, 'awaiting_approval', signal, {
           ...waitingToolCall.confirmationDetails,
           isModifying: true,
         } as ToolCallConfirmationDetails);
@@ -917,7 +982,7 @@ export class CoreToolScheduler {
           this.onEditorClose,
         );
         this.setArgsInternal(callId, updatedParams);
-        this.setStatusInternal(callId, 'awaiting_approval', {
+        this.setStatusInternal(callId, 'awaiting_approval', signal, {
           ...waitingToolCall.confirmationDetails,
           fileDiff: updatedDiff,
           isModifying: false,
@@ -932,7 +997,7 @@ export class CoreToolScheduler {
           signal,
         );
       }
-      this.setStatusInternal(callId, 'scheduled');
+      this.setStatusInternal(callId, 'scheduled', signal);
     }
     await this.attemptExecutionOfScheduledCalls(signal);
   }
@@ -974,10 +1039,15 @@ export class CoreToolScheduler {
     );
 
     this.setArgsInternal(toolCall.request.callId, updatedParams);
-    this.setStatusInternal(toolCall.request.callId, 'awaiting_approval', {
-      ...toolCall.confirmationDetails,
-      fileDiff: updatedDiff,
-    });
+    this.setStatusInternal(
+      toolCall.request.callId,
+      'awaiting_approval',
+      signal,
+      {
+        ...toolCall.confirmationDetails,
+        fileDiff: updatedDiff,
+      },
+    );
   }
 
   private async attemptExecutionOfScheduledCalls(
@@ -1002,7 +1072,7 @@ export class CoreToolScheduler {
         const scheduledCall = toolCall;
         const { callId, name: toolName } = scheduledCall.request;
         const invocation = scheduledCall.invocation;
-        this.setStatusInternal(callId, 'executing');
+        this.setStatusInternal(callId, 'executing', signal);
 
         const liveOutputCallback =
           scheduledCall.tool.canUpdateOutput && this.outputUpdateHandler
@@ -1055,12 +1125,10 @@ export class CoreToolScheduler {
             this.setStatusInternal(
               callId,
               'cancelled',
+              signal,
               'User cancelled tool execution.',
             );
-            continue;
-          }
-
-          if (toolResult.error === undefined) {
+          } else if (toolResult.error === undefined) {
             let content = toolResult.llmContent;
             let outputFile: string | undefined = undefined;
             const contentLength =
@@ -1116,7 +1184,7 @@ export class CoreToolScheduler {
               outputFile,
               contentLength,
             };
-            this.setStatusInternal(callId, 'success', successResponse);
+            this.setStatusInternal(callId, 'success', signal, successResponse);
           } else {
             // It is a failure
             const error = new Error(toolResult.error.message);
@@ -1125,19 +1193,21 @@ export class CoreToolScheduler {
               error,
               toolResult.error.type,
             );
-            this.setStatusInternal(callId, 'error', errorResponse);
+            this.setStatusInternal(callId, 'error', signal, errorResponse);
           }
         } catch (executionError: unknown) {
           if (signal.aborted) {
             this.setStatusInternal(
               callId,
               'cancelled',
+              signal,
               'User cancelled tool execution.',
             );
           } else {
             this.setStatusInternal(
               callId,
               'error',
+              signal,
               createErrorResponse(
                 scheduledCall.request,
                 executionError instanceof Error
@@ -1148,45 +1218,126 @@ export class CoreToolScheduler {
             );
           }
         }
+        await this.checkAndNotifyCompletion(signal);
       }
     }
   }
 
-  private async checkAndNotifyCompletion(): Promise<void> {
-    const allCallsAreTerminal = this.toolCalls.every(
-      (call) =>
-        call.status === 'success' ||
-        call.status === 'error' ||
-        call.status === 'cancelled',
-    );
+  private async checkAndNotifyCompletion(signal: AbortSignal): Promise<void> {
+    // This method is now only concerned with the single active tool call.
+    if (this.toolCalls.length === 0) {
+      // It's possible to be called when a batch is cancelled before any tool has started.
+      if (signal.aborted && this.toolCallQueue.length > 0) {
+        this._cancelAllQueuedCalls();
+      }
+    } else {
+      const activeCall = this.toolCalls[0];
+      const isTerminal =
+        activeCall.status === 'success' ||
+        activeCall.status === 'error' ||
+        activeCall.status === 'cancelled';
 
-    if (this.toolCalls.length > 0 && allCallsAreTerminal) {
-      const completedCalls = [...this.toolCalls] as CompletedToolCall[];
+      // If the active tool is not in a terminal state (e.g., it's 'executing' or 'awaiting_approval'),
+      // then the scheduler is still busy or paused. We should not proceed.
+      if (!isTerminal) {
+        return;
+      }
+
+      // The active tool is finished. Move it to the completed batch.
+      const completedCall = activeCall as CompletedToolCall;
+      this.completedToolCallsForBatch.push(completedCall);
+      logToolCall(this.config, new ToolCallEvent(completedCall));
+
+      // Clear the active tool slot. This is crucial for the sequential processing.
       this.toolCalls = [];
+    }
 
-      for (const call of completedCalls) {
-        logToolCall(this.config, new ToolCallEvent(call));
+    // Now, check if the entire batch is complete.
+    // The batch is complete if the queue is empty or the operation was cancelled.
+    if (this.toolCallQueue.length === 0 || signal.aborted) {
+      if (signal.aborted) {
+        this._cancelAllQueuedCalls();
+      }
+
+      // If there's nothing to report and we weren't cancelled, we can stop.
+      // But if we were cancelled, we must proceed to potentially start the next queued request.
+      if (this.completedToolCallsForBatch.length === 0 && !signal.aborted) {
+        return;
       }
 
       if (this.onAllToolCallsComplete) {
         this.isFinalizingToolCalls = true;
-        await this.onAllToolCallsComplete(completedCalls);
+        // Use the batch array, not the (now empty) active array.
+        await this.onAllToolCallsComplete(this.completedToolCallsForBatch);
+        this.completedToolCallsForBatch = []; // Clear after reporting.
         this.isFinalizingToolCalls = false;
       }
+      this.isCancelling = false;
       this.notifyToolCallsUpdate();
-      // After completion, process the next item in the queue.
+
+      // After completion of the entire batch, process the next item in the main request queue.
       if (this.requestQueue.length > 0) {
         const next = this.requestQueue.shift()!;
         this._schedule(next.request, next.signal)
           .then(next.resolve)
           .catch(next.reject);
       }
+    } else {
+      // The batch is not yet complete, so continue processing the current batch sequence.
+      await this._processNextInQueue(signal);
+    }
+  }
+
+  private _cancelAllQueuedCalls(): void {
+    while (this.toolCallQueue.length > 0) {
+      const queuedCall = this.toolCallQueue.shift()!;
+      // Don't cancel tools that already errored during validation.
+      if (queuedCall.status === 'error') {
+        this.completedToolCallsForBatch.push(queuedCall);
+        continue;
+      }
+      const durationMs =
+        'startTime' in queuedCall && queuedCall.startTime
+          ? Date.now() - queuedCall.startTime
+          : undefined;
+      const errorMessage =
+        '[Operation Cancelled] User cancelled the operation.';
+      this.completedToolCallsForBatch.push({
+        request: queuedCall.request,
+        tool: queuedCall.tool,
+        invocation: queuedCall.invocation,
+        status: 'cancelled',
+        response: {
+          callId: queuedCall.request.callId,
+          responseParts: [
+            {
+              functionResponse: {
+                id: queuedCall.request.callId,
+                name: queuedCall.request.name,
+                response: {
+                  error: errorMessage,
+                },
+              },
+            },
+          ],
+          resultDisplay: undefined,
+          error: undefined,
+          errorType: undefined,
+          contentLength: errorMessage.length,
+        },
+        durationMs,
+        outcome: ToolConfirmationOutcome.Cancel,
+      });
     }
   }
 
   private notifyToolCallsUpdate(): void {
     if (this.onToolCallsUpdate) {
-      this.onToolCallsUpdate([...this.toolCalls]);
+      this.onToolCallsUpdate([
+        ...this.completedToolCallsForBatch,
+        ...this.toolCalls,
+        ...this.toolCallQueue,
+      ]);
     }
   }
 
@@ -1215,35 +1366,4 @@ export class CoreToolScheduler {
 
     return doesToolInvocationMatch(tool, invocation, allowedTools);
   }
-
-  private async autoApproveCompatiblePendingTools(
-    signal: AbortSignal,
-    triggeringCallId: string,
-  ): Promise<void> {
-    const pendingTools = this.toolCalls.filter(
-      (call) =>
-        call.status === 'awaiting_approval' &&
-        call.request.callId !== triggeringCallId,
-    ) as WaitingToolCall[];
-
-    for (const pendingTool of pendingTools) {
-      try {
-        const stillNeedsConfirmation =
-          await pendingTool.invocation.shouldConfirmExecute(signal);
-
-        if (!stillNeedsConfirmation) {
-          this.setToolCallOutcome(
-            pendingTool.request.callId,
-            ToolConfirmationOutcome.ProceedAlways,
-          );
-          this.setStatusInternal(pendingTool.request.callId, 'scheduled');
-        }
-      } catch (error) {
-        console.error(
-          `Error checking confirmation for tool ${pendingTool.request.callId}:`,
-          error,
-        );
-      }
-    }
-  }
 }

From 5ded674ad6071fbfade3a56f75894c613b24b580 Mon Sep 17 00:00:00 2001
From: Riddhi Dutta <rite2riddhi@gmail.com>
Date: Mon, 27 Oct 2025 22:43:17 +0530
Subject: [PATCH 33/73] Refactor vim.test.ts: Use Parameterized Tests (#11969)

---
 packages/cli/src/ui/hooks/vim.test.tsx | 646 ++++++++++---------------
 1 file changed, 254 insertions(+), 392 deletions(-)

diff --git a/packages/cli/src/ui/hooks/vim.test.tsx b/packages/cli/src/ui/hooks/vim.test.tsx
index 7588899b87..b767d04cb8 100644
--- a/packages/cli/src/ui/hooks/vim.test.tsx
+++ b/packages/cli/src/ui/hooks/vim.test.tsx
@@ -14,6 +14,7 @@ import type { Key } from './useKeypress.js';
 import type {
   TextBuffer,
   TextBufferState,
+  TextBufferAction,
 } from '../components/shared/text-buffer.js';
 import { textBufferReducer } from '../components/shared/text-buffer.js';
 
@@ -1355,12 +1356,249 @@ describe('useVim hook', () => {
   // Line operations (dd, cc) are tested in text-buffer.test.ts
 
   describe('Reducer-based integration tests', () => {
-    describe('de (delete word end)', () => {
-      it('should delete from cursor to end of current word', () => {
+    type VimActionType =
+      | 'vim_delete_word_end'
+      | 'vim_delete_word_backward'
+      | 'vim_change_word_forward'
+      | 'vim_change_word_end'
+      | 'vim_change_word_backward'
+      | 'vim_change_line'
+      | 'vim_delete_line'
+      | 'vim_delete_to_end_of_line'
+      | 'vim_change_to_end_of_line';
+
+    type VimReducerTestCase = {
+      command: string;
+      desc: string;
+      lines: string[];
+      cursorRow: number;
+      cursorCol: number;
+      actionType: VimActionType;
+      count?: number;
+      expectedLines: string[];
+      expectedCursorRow: number;
+      expectedCursorCol: number;
+    };
+
+    const testCases: VimReducerTestCase[] = [
+      {
+        command: 'de',
+        desc: 'delete from cursor to end of current word',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 1,
+        actionType: 'vim_delete_word_end' as const,
+        count: 1,
+        expectedLines: ['h world test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 1,
+      },
+      {
+        command: 'de',
+        desc: 'delete multiple word ends with count',
+        lines: ['hello world test more'],
+        cursorRow: 0,
+        cursorCol: 1,
+        actionType: 'vim_delete_word_end' as const,
+        count: 2,
+        expectedLines: ['h test more'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 1,
+      },
+      {
+        command: 'db',
+        desc: 'delete from cursor to start of previous word',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 11,
+        actionType: 'vim_delete_word_backward' as const,
+        count: 1,
+        expectedLines: ['hello  test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 6,
+      },
+      {
+        command: 'db',
+        desc: 'delete multiple words backward with count',
+        lines: ['hello world test more'],
+        cursorRow: 0,
+        cursorCol: 17,
+        actionType: 'vim_delete_word_backward' as const,
+        count: 2,
+        expectedLines: ['hello more'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 6,
+      },
+      {
+        command: 'cw',
+        desc: 'delete from cursor to start of next word',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 0,
+        actionType: 'vim_change_word_forward' as const,
+        count: 1,
+        expectedLines: ['world test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'cw',
+        desc: 'change multiple words with count',
+        lines: ['hello world test more'],
+        cursorRow: 0,
+        cursorCol: 0,
+        actionType: 'vim_change_word_forward' as const,
+        count: 2,
+        expectedLines: ['test more'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'ce',
+        desc: 'change from cursor to end of current word',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 1,
+        actionType: 'vim_change_word_end' as const,
+        count: 1,
+        expectedLines: ['h world test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 1,
+      },
+      {
+        command: 'ce',
+        desc: 'change multiple word ends with count',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 1,
+        actionType: 'vim_change_word_end' as const,
+        count: 2,
+        expectedLines: ['h test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 1,
+      },
+      {
+        command: 'cb',
+        desc: 'change from cursor to start of previous word',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 11,
+        actionType: 'vim_change_word_backward' as const,
+        count: 1,
+        expectedLines: ['hello  test'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 6,
+      },
+      {
+        command: 'cc',
+        desc: 'clear the line and place cursor at the start',
+        lines: ['  hello world'],
+        cursorRow: 0,
+        cursorCol: 5,
+        actionType: 'vim_change_line' as const,
+        count: 1,
+        expectedLines: [''],
+        expectedCursorRow: 0,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'dd',
+        desc: 'delete the current line',
+        lines: ['line1', 'line2', 'line3'],
+        cursorRow: 1,
+        cursorCol: 2,
+        actionType: 'vim_delete_line' as const,
+        count: 1,
+        expectedLines: ['line1', 'line3'],
+        expectedCursorRow: 1,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'dd',
+        desc: 'delete multiple lines with count',
+        lines: ['line1', 'line2', 'line3', 'line4'],
+        cursorRow: 1,
+        cursorCol: 2,
+        actionType: 'vim_delete_line' as const,
+        count: 2,
+        expectedLines: ['line1', 'line4'],
+        expectedCursorRow: 1,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'dd',
+        desc: 'handle deleting last line',
+        lines: ['only line'],
+        cursorRow: 0,
+        cursorCol: 3,
+        actionType: 'vim_delete_line' as const,
+        count: 1,
+        expectedLines: [''],
+        expectedCursorRow: 0,
+        expectedCursorCol: 0,
+      },
+      {
+        command: 'D',
+        desc: 'delete from cursor to end of line',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 6,
+        actionType: 'vim_delete_to_end_of_line' as const,
+        expectedLines: ['hello '],
+        expectedCursorRow: 0,
+        expectedCursorCol: 6,
+      },
+      {
+        command: 'D',
+        desc: 'handle D at end of line',
+        lines: ['hello world'],
+        cursorRow: 0,
+        cursorCol: 11,
+        actionType: 'vim_delete_to_end_of_line' as const,
+        expectedLines: ['hello world'],
+        expectedCursorRow: 0,
+        expectedCursorCol: 11,
+      },
+      {
+        command: 'C',
+        desc: 'change from cursor to end of line',
+        lines: ['hello world test'],
+        cursorRow: 0,
+        cursorCol: 6,
+        actionType: 'vim_change_to_end_of_line' as const,
+        expectedLines: ['hello '],
+        expectedCursorRow: 0,
+        expectedCursorCol: 6,
+      },
+      {
+        command: 'C',
+        desc: 'handle C at beginning of line',
+        lines: ['hello world'],
+        cursorRow: 0,
+        cursorCol: 0,
+        actionType: 'vim_change_to_end_of_line' as const,
+        expectedLines: [''],
+        expectedCursorRow: 0,
+        expectedCursorCol: 0,
+      },
+    ];
+
+    it.each(testCases)(
+      '$command: should $desc',
+      ({
+        lines,
+        cursorRow,
+        cursorCol,
+        actionType,
+        count,
+        expectedLines,
+        expectedCursorRow,
+        expectedCursorCol,
+      }: VimReducerTestCase) => {
         const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 1, // cursor on 'e' in "hello"
+          lines,
+          cursorRow,
+          cursorCol,
           preferredCol: null,
           undoStack: [],
           redoStack: [],
@@ -1368,394 +1606,18 @@ describe('useVim hook', () => {
           selectionAnchor: null,
         });
 
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_word_end',
-          payload: { count: 1 },
-        });
+        const action = (
+          count
+            ? { type: actionType, payload: { count } }
+            : { type: actionType }
+        ) as TextBufferAction;
 
-        // Should delete "ello" (from cursor to end of word), leaving "h world test"
-        expect(result.lines).toEqual(['h world test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(1);
-      });
+        const result = textBufferReducer(initialState, action);
 
-      it('should delete multiple word ends with count', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test more'],
-          cursorRow: 0,
-          cursorCol: 1, // cursor on 'e' in "hello"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_word_end',
-          payload: { count: 2 },
-        });
-
-        // Should delete "ello world" (to end of second word), leaving "h test more"
-        expect(result.lines).toEqual(['h test more']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(1);
-      });
-    });
-
-    describe('db (delete word backward)', () => {
-      it('should delete from cursor to start of previous word', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 11, // cursor on 't' in "test"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_word_backward',
-          payload: { count: 1 },
-        });
-
-        // Should delete "world" (previous word only), leaving "hello  test"
-        expect(result.lines).toEqual(['hello  test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(6);
-      });
-
-      it('should delete multiple words backward with count', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test more'],
-          cursorRow: 0,
-          cursorCol: 17, // cursor on 'm' in "more"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_word_backward',
-          payload: { count: 2 },
-        });
-
-        // Should delete "world test " (two words backward), leaving "hello more"
-        expect(result.lines).toEqual(['hello more']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(6);
-      });
-    });
-
-    describe('cw (change word forward)', () => {
-      it('should delete from cursor to start of next word', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 0, // cursor on 'h' in "hello"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_word_forward',
-          payload: { count: 1 },
-        });
-
-        // Should delete "hello " (word + space), leaving "world test"
-        expect(result.lines).toEqual(['world test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(0);
-      });
-
-      it('should change multiple words with count', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test more'],
-          cursorRow: 0,
-          cursorCol: 0,
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_word_forward',
-          payload: { count: 2 },
-        });
-
-        // Should delete "hello world " (two words), leaving "test more"
-        expect(result.lines).toEqual(['test more']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(0);
-      });
-    });
-
-    describe('ce (change word end)', () => {
-      it('should change from cursor to end of current word', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 1, // cursor on 'e' in "hello"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_word_end',
-          payload: { count: 1 },
-        });
-
-        // Should delete "ello" (from cursor to end of word), leaving "h world test"
-        expect(result.lines).toEqual(['h world test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(1);
-      });
-
-      it('should change multiple word ends with count', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 1, // cursor on 'e' in "hello"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_word_end',
-          payload: { count: 2 },
-        });
-
-        // Should delete "ello world" (to end of second word), leaving "h test"
-        expect(result.lines).toEqual(['h test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(1);
-      });
-    });
-
-    describe('cb (change word backward)', () => {
-      it('should change from cursor to start of previous word', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 11, // cursor on 't' in "test"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_word_backward',
-          payload: { count: 1 },
-        });
-
-        // Should delete "world" (previous word only), leaving "hello  test"
-        expect(result.lines).toEqual(['hello  test']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(6);
-      });
-    });
-
-    describe('cc (change line)', () => {
-      it('should clear the line and place cursor at the start', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['  hello world'],
-          cursorRow: 0,
-          cursorCol: 5, // cursor on 'o'
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_line',
-          payload: { count: 1 },
-        });
-
-        expect(result.lines).toEqual(['']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(0);
-      });
-    });
-
-    describe('dd (delete line)', () => {
-      it('should delete the current line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['line1', 'line2', 'line3'],
-          cursorRow: 1,
-          cursorCol: 2,
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_line',
-          payload: { count: 1 },
-        });
-
-        expect(result.lines).toEqual(['line1', 'line3']);
-        expect(result.cursorRow).toBe(1);
-        expect(result.cursorCol).toBe(0);
-      });
-
-      it('should delete multiple lines with count', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['line1', 'line2', 'line3', 'line4'],
-          cursorRow: 1,
-          cursorCol: 2,
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_line',
-          payload: { count: 2 },
-        });
-
-        // Should delete lines 1 and 2
-        expect(result.lines).toEqual(['line1', 'line4']);
-        expect(result.cursorRow).toBe(1);
-        expect(result.cursorCol).toBe(0);
-      });
-
-      it('should handle deleting last line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['only line'],
-          cursorRow: 0,
-          cursorCol: 3,
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_line',
-          payload: { count: 1 },
-        });
-
-        // Should leave an empty line when deleting the only line
-        expect(result.lines).toEqual(['']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(0);
-      });
-    });
-
-    describe('D (delete to end of line)', () => {
-      it('should delete from cursor to end of line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 6, // cursor on 'w' in "world"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_to_end_of_line',
-        });
-
-        // Should delete "world test", leaving "hello "
-        expect(result.lines).toEqual(['hello ']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(6);
-      });
-
-      it('should handle D at end of line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world'],
-          cursorRow: 0,
-          cursorCol: 11, // cursor at end
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_delete_to_end_of_line',
-        });
-
-        // Should not change anything when at end of line
-        expect(result.lines).toEqual(['hello world']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(11);
-      });
-    });
-
-    describe('C (change to end of line)', () => {
-      it('should change from cursor to end of line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world test'],
-          cursorRow: 0,
-          cursorCol: 6, // cursor on 'w' in "world"
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_to_end_of_line',
-        });
-
-        // Should delete "world test", leaving "hello "
-        expect(result.lines).toEqual(['hello ']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(6);
-      });
-
-      it('should handle C at beginning of line', () => {
-        const initialState = createMockTextBufferState({
-          lines: ['hello world'],
-          cursorRow: 0,
-          cursorCol: 0,
-          preferredCol: null,
-          undoStack: [],
-          redoStack: [],
-          clipboard: null,
-          selectionAnchor: null,
-        });
-
-        const result = textBufferReducer(initialState, {
-          type: 'vim_change_to_end_of_line',
-        });
-
-        // Should delete entire line content
-        expect(result.lines).toEqual(['']);
-        expect(result.cursorRow).toBe(0);
-        expect(result.cursorCol).toBe(0);
-      });
-    });
+        expect(result.lines).toEqual(expectedLines);
+        expect(result.cursorRow).toBe(expectedCursorRow);
+        expect(result.cursorCol).toBe(expectedCursorCol);
+      },
+    );
   });
 });

From e115083fac3799ac91b40d2915d3a2eab4103bc8 Mon Sep 17 00:00:00 2001
From: Jerop Kipruto <jerop@google.com>
Date: Mon, 27 Oct 2025 13:33:29 -0400
Subject: [PATCH 34/73] docs(github): revamp pull request template (#11949)

---
 .github/pull_request_template.md | 63 ++++++++++++++++----------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 773e4cc871..37d896381d 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,41 +1,42 @@
-## TLDR
+## Summary
 
-<!-- Add a brief description of what this pull request changes and why and any important things for reviewers to look at -->
+<!-- Concisely describe what this PR changes and why. Focus on impact and
+urgency. -->
 
-## Dive Deeper
+## Details
 
-<!-- more thoughts and in-depth discussion here -->
+<!-- Add any extra context and design decisions. Keep it brief but complete. -->
 
-## Reviewer Test Plan
+## Related Issues
 
-<!-- when a person reviews your code they should ideally be pulling and running that code. How would they validate your change works and if relevant what are some good classes of example prompts and ways they can exercise your changes -->
+<!-- Use keywords to auto-close issues (Closes #123, Fixes #456). If this PR is
+only related to an issue or is a partial fix, simply reference the issue number
+without a keyword (Related to #123). -->
 
-## Testing Matrix
+## How to Validate
 
-<!-- Before submitting please validate your changes on as many of these options as possible -->
+<!-- List exact steps for reviewers to validate the change. Include commands,
+expected results, and edge cases. -->
 
-|          | 🍏  | 🪟  | 🐧  |
-| -------- | --- | --- | --- |
-| npm run  | ❓  | ❓  | ❓  |
-| npx      | ❓  | ❓  | ❓  |
-| Docker   | ❓  | ❓  | ❓  |
-| Podman   | ❓  | -   | -   |
-| Seatbelt | ❓  | -   | -   |
+## Pre-Merge Checklist
 
-## Linked issues / bugs
+<!-- Check all that apply before requesting review or merging. -->
 
-<!--
-Link to any related issues or bugs.
-
-**If this PR fully resolves the issue, use one of the following keywords to automatically close the issue when this PR is merged:**
-
-- Closes #<issue_number>
-- Fixes #<issue_number>
-- Resolves #<issue_number>
-
-*Example: `Resolves #123`*
-
-**If this PR is only related to an issue or is a partial fix, simply reference the issue number without a keyword:**
-
-*Example: `This PR makes progress on #456` or `Related to #789`*
--->
+- [ ] Updated relevant documentation and README (if needed)
+- [ ] Added/updated tests (if needed)
+- [ ] Noted breaking changes (if any)
+- [ ] Validated on required platforms/methods:
+  - [ ] MacOS
+    - [ ] npm run
+    - [ ] npx
+    - [ ] Docker
+    - [ ] Podman
+    - [ ] Seatbelt
+  - [ ] Windows
+    - [ ] npm run
+    - [ ] npx
+    - [ ] Docker
+  - [ ] Linux
+    - [ ] npm run
+    - [ ] npx
+    - [ ] Docker

From 0e4dce23b245eac8855fcd003143d53b96bf7af0 Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Mon, 27 Oct 2025 11:35:16 -0700
Subject: [PATCH 35/73] use debugLogger instead of console (#12095)

---
 packages/cli/src/ui/AppContainer.tsx |  2 +-
 packages/core/src/tools/glob.ts      |  7 ++++---
 packages/core/src/tools/grep.ts      |  4 ++--
 packages/core/src/tools/ls.test.ts   | 11 -----------
 packages/core/src/tools/ls.ts        |  3 ++-
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index 426543d772..ae0f43b418 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -590,7 +590,7 @@ Logging in with Google... Please restart Gemini CLI to continue.
         },
         Date.now(),
       );
-      console.error('Error refreshing memory:', error);
+      debugLogger.warn('Error refreshing memory:', error);
     }
   }, [config, historyManager, settings.merged]);
 
diff --git a/packages/core/src/tools/glob.ts b/packages/core/src/tools/glob.ts
index 0dbd71e479..f090056654 100644
--- a/packages/core/src/tools/glob.ts
+++ b/packages/core/src/tools/glob.ts
@@ -15,6 +15,8 @@ import { type Config } from '../config/config.js';
 import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
 import { ToolErrorType } from './tool-error.js';
 import { GLOB_TOOL_NAME } from './tool-names.js';
+import { getErrorMessage } from '../utils/errors.js';
+import { debugLogger } from '../utils/debugLogger.js';
 
 // Subset of 'Path' interface provided by 'glob' that we can implement for testing
 export interface GlobPath {
@@ -238,9 +240,8 @@ class GlobToolInvocation extends BaseToolInvocation<
         returnDisplay: `Found ${fileCount} matching file(s)`,
       };
     } catch (error) {
-      const errorMessage =
-        error instanceof Error ? error.message : String(error);
-      console.error(`GlobLogic execute Error: ${errorMessage}`, error);
+      debugLogger.warn(`GlobLogic execute Error`, error);
+      const errorMessage = getErrorMessage(error);
       const rawError = `Error during glob search operation: ${errorMessage}`;
       return {
         llmContent: rawError,
diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts
index e2637accb8..d279d65e49 100644
--- a/packages/core/src/tools/grep.ts
+++ b/packages/core/src/tools/grep.ts
@@ -199,7 +199,7 @@ class GrepToolInvocation extends BaseToolInvocation<
         returnDisplay: `Found ${matchCount} ${matchTerm}`,
       };
     } catch (error) {
-      console.error(`Error during GrepLogic execution: ${error}`);
+      debugLogger.warn(`Error during GrepLogic execution: ${error}`);
       const errorMessage = getErrorMessage(error);
       return {
         llmContent: `Error during grep search operation: ${errorMessage}`,
@@ -552,7 +552,7 @@ class GrepToolInvocation extends BaseToolInvocation<
 
       return allMatches;
     } catch (error: unknown) {
-      console.error(
+      debugLogger.warn(
         `GrepLogic: Error in performGrepSearch (Strategy: ${strategyUsed}): ${getErrorMessage(
           error,
         )}`,
diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts
index 1cda0c9e7e..d6c828c94b 100644
--- a/packages/core/src/tools/ls.test.ts
+++ b/packages/core/src/tools/ls.test.ts
@@ -248,11 +248,6 @@ describe('LSTool', () => {
         return originalStat(p);
       });
 
-      // Spy on console.error to verify it's called
-      const consoleErrorSpy = vi
-        .spyOn(console, 'error')
-        .mockImplementation(() => {});
-
       const invocation = lsTool.build({ path: tempRootDir });
       const result = await invocation.execute(abortSignal);
 
@@ -261,13 +256,7 @@ describe('LSTool', () => {
       expect(result.llmContent).not.toContain('problematic.txt');
       expect(result.returnDisplay).toBe('Listed 1 item(s).');
 
-      // Verify error was logged
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        expect.stringMatching(/Error accessing.*problematic\.txt/s),
-      );
-
       statSpy.mockRestore();
-      consoleErrorSpy.mockRestore();
     });
   });
 
diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts
index 7aac367e50..b899ae8fcc 100644
--- a/packages/core/src/tools/ls.ts
+++ b/packages/core/src/tools/ls.ts
@@ -14,6 +14,7 @@ import type { Config } from '../config/config.js';
 import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
 import { ToolErrorType } from './tool-error.js';
 import { LS_TOOL_NAME } from './tool-names.js';
+import { debugLogger } from '../utils/debugLogger.js';
 
 /**
  * Parameters for the LS tool
@@ -205,7 +206,7 @@ class LSToolInvocation extends BaseToolInvocation<LSToolParams, ToolResult> {
           });
         } catch (error) {
           // Log error internally but don't fail the whole listing
-          console.error(`Error accessing ${fullPath}: ${error}`);
+          debugLogger.debug(`Error accessing ${fullPath}: ${error}`);
         }
       }
 

From 29efebe38f5491da822c3f97fe377dbdb31223d2 Mon Sep 17 00:00:00 2001
From: Alisa <62909685+alisa-alisa@users.noreply.github.com>
Date: Mon, 27 Oct 2025 11:56:08 -0700
Subject: [PATCH 36/73] Implementing support for recitations events in
 responses from A2A Server (#12067)

Co-authored-by: Alisa Novikova <alisanovikova@google.com>
---
 packages/a2a-server/src/agent/task.test.ts | 61 +++++++++++++++++++++-
 packages/a2a-server/src/agent/task.ts      | 19 +++++++
 packages/a2a-server/src/types.ts           | 11 +++-
 3 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/packages/a2a-server/src/agent/task.test.ts b/packages/a2a-server/src/agent/task.test.ts
index 1bf26d8bc8..8b347f70e2 100644
--- a/packages/a2a-server/src/agent/task.test.ts
+++ b/packages/a2a-server/src/agent/task.test.ts
@@ -4,11 +4,24 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  describe,
+  it,
+  expect,
+  vi,
+  beforeEach,
+  afterEach,
+  type Mock,
+} from 'vitest';
 import { Task } from './task.js';
-import type { Config, ToolCallRequestInfo } from '@google/gemini-cli-core';
+import {
+  GeminiEventType,
+  type Config,
+  type ToolCallRequestInfo,
+} from '@google/gemini-cli-core';
 import { createMockConfig } from '../utils/testing_utils.js';
 import type { ExecutionEventBus } from '@a2a-js/sdk/server';
+import { CoderAgentEvent } from '../types.js';
 import type { ToolCall } from '@google/gemini-cli-core';
 
 describe('Task', () => {
@@ -94,6 +107,50 @@ describe('Task', () => {
         }),
       );
     });
+
+    it('should handle Citation event and publish to event bus', async () => {
+      const mockConfig = createMockConfig();
+      const mockEventBus: ExecutionEventBus = {
+        publish: vi.fn(),
+        on: vi.fn(),
+        off: vi.fn(),
+        once: vi.fn(),
+        removeAllListeners: vi.fn(),
+        finished: vi.fn(),
+      };
+
+      // @ts-expect-error - Calling private constructor for test purposes.
+      const task = new Task(
+        'task-id',
+        'context-id',
+        mockConfig as Config,
+        mockEventBus,
+      );
+
+      const citationText = 'Source: example.com';
+      const citationEvent = {
+        type: GeminiEventType.Citation,
+        value: citationText,
+      };
+
+      await task.acceptAgentMessage(citationEvent);
+
+      expect(mockEventBus.publish).toHaveBeenCalledOnce();
+      const publishedEvent = (mockEventBus.publish as Mock).mock.calls[0][0];
+
+      expect(publishedEvent.kind).toBe('status-update');
+      expect(publishedEvent.taskId).toBe('task-id');
+      expect(publishedEvent.metadata.coderAgent.kind).toBe(
+        CoderAgentEvent.CitationEvent,
+      );
+      expect(publishedEvent.status.message).toBeDefined();
+      expect(publishedEvent.status.message.parts).toEqual([
+        {
+          kind: 'text',
+          text: citationText,
+        },
+      ]);
+    });
   });
 
   describe('_schedulerToolCallsUpdate', () => {
diff --git a/packages/a2a-server/src/agent/task.ts b/packages/a2a-server/src/agent/task.ts
index eee5e736d6..f0061bc6a9 100644
--- a/packages/a2a-server/src/agent/task.ts
+++ b/packages/a2a-server/src/agent/task.ts
@@ -49,6 +49,7 @@ import type {
   TaskMetadata,
   Thought,
   ThoughtSummary,
+  Citation,
 } from '../types.js';
 import type { PartUnion, Part as genAiPart } from '@google/genai';
 
@@ -638,6 +639,10 @@ export class Task {
         logger.info('[Task] Sending agent thought...');
         this._sendThought(event.value, traceId);
         break;
+      case GeminiEventType.Citation:
+        logger.info('[Task] Received citation from LLM stream.');
+        this._sendCitation(event.value);
+        break;
       case GeminiEventType.ChatCompressed:
         break;
       case GeminiEventType.Finished:
@@ -979,4 +984,18 @@ export class Task {
       ),
     );
   }
+
+  _sendCitation(citation: string) {
+    if (!citation || citation.trim() === '') {
+      return;
+    }
+    logger.info('[Task] Sending citation to event bus.');
+    const message = this._createTextMessage(citation);
+    const citationEvent: Citation = {
+      kind: CoderAgentEvent.CitationEvent,
+    };
+    this.eventBus?.publish(
+      this._createStatusUpdateEvent(this.taskState, citationEvent, message),
+    );
+  }
 }
diff --git a/packages/a2a-server/src/types.ts b/packages/a2a-server/src/types.ts
index f806af833d..74b5ec9320 100644
--- a/packages/a2a-server/src/types.ts
+++ b/packages/a2a-server/src/types.ts
@@ -37,6 +37,10 @@ export enum CoderAgentEvent {
    * An event that contains a thought from the agent.
    */
   ThoughtEvent = 'thought',
+  /**
+   * An event that contains citation from the agent.
+   */
+  CitationEvent = 'citation',
 }
 
 export interface AgentSettings {
@@ -64,6 +68,10 @@ export interface Thought {
   kind: CoderAgentEvent.ThoughtEvent;
 }
 
+export interface Citation {
+  kind: CoderAgentEvent.CitationEvent;
+}
+
 export type ThoughtSummary = {
   subject: string;
   description: string;
@@ -80,7 +88,8 @@ export type CoderAgentMessage =
   | ToolCallUpdate
   | TextContent
   | StateChange
-  | Thought;
+  | Thought
+  | Citation;
 
 export interface TaskMetadata {
   id: string;

From 4ef3c09332d8a272db40028e99b646999c1088e6 Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Mon, 27 Oct 2025 12:16:25 -0700
Subject: [PATCH 37/73] fix(core): update loop detection LLM schema fields
 (#12091)

---
 .../src/services/loopDetectionService.test.ts | 27 ++++++++++---------
 .../core/src/services/loopDetectionService.ts | 22 +++++++++------
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/packages/core/src/services/loopDetectionService.test.ts b/packages/core/src/services/loopDetectionService.test.ts
index e464bfb6c9..cb06ad8ef2 100644
--- a/packages/core/src/services/loopDetectionService.test.ts
+++ b/packages/core/src/services/loopDetectionService.test.ts
@@ -671,7 +671,7 @@ describe('LoopDetectionService LLM Checks', () => {
   it('should trigger LLM check on the 30th turn', async () => {
     mockBaseLlmClient.generateJson = vi
       .fn()
-      .mockResolvedValue({ confidence: 0.1 });
+      .mockResolvedValue({ unproductive_state_confidence: 0.1 });
     await advanceTurns(30);
     expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
     expect(mockBaseLlmClient.generateJson).toHaveBeenCalledWith(
@@ -687,9 +687,10 @@ describe('LoopDetectionService LLM Checks', () => {
 
   it('should detect a cognitive loop when confidence is high', async () => {
     // First check at turn 30
-    mockBaseLlmClient.generateJson = vi
-      .fn()
-      .mockResolvedValue({ confidence: 0.85, reasoning: 'Repetitive actions' });
+    mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
+      unproductive_state_confidence: 0.85,
+      unproductive_state_analysis: 'Repetitive actions',
+    });
     await advanceTurns(30);
     expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
 
@@ -697,9 +698,10 @@ describe('LoopDetectionService LLM Checks', () => {
     // The interval will be: 5 + (15 - 5) * (1 - 0.85) = 5 + 10 * 0.15 = 6.5 -> rounded to 7
     await advanceTurns(6); // advance to turn 36
 
-    mockBaseLlmClient.generateJson = vi
-      .fn()
-      .mockResolvedValue({ confidence: 0.95, reasoning: 'Repetitive actions' });
+    mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
+      unproductive_state_confidence: 0.95,
+      unproductive_state_analysis: 'Repetitive actions',
+    });
     const finalResult = await service.turnStarted(abortController.signal); // This is turn 37
 
     expect(finalResult).toBe(true);
@@ -713,9 +715,10 @@ describe('LoopDetectionService LLM Checks', () => {
   });
 
   it('should not detect a loop when confidence is low', async () => {
-    mockBaseLlmClient.generateJson = vi
-      .fn()
-      .mockResolvedValue({ confidence: 0.5, reasoning: 'Looks okay' });
+    mockBaseLlmClient.generateJson = vi.fn().mockResolvedValue({
+      unproductive_state_confidence: 0.5,
+      unproductive_state_analysis: 'Looks okay',
+    });
     await advanceTurns(30);
     const result = await service.turnStarted(abortController.signal);
     expect(result).toBe(false);
@@ -726,7 +729,7 @@ describe('LoopDetectionService LLM Checks', () => {
     // Confidence is 0.0, so interval should be MAX_LLM_CHECK_INTERVAL (15)
     mockBaseLlmClient.generateJson = vi
       .fn()
-      .mockResolvedValue({ confidence: 0.0 });
+      .mockResolvedValue({ unproductive_state_confidence: 0.0 });
     await advanceTurns(30); // First check at turn 30
     expect(mockBaseLlmClient.generateJson).toHaveBeenCalledTimes(1);
 
@@ -771,7 +774,7 @@ describe('LoopDetectionService LLM Checks', () => {
 
     mockBaseLlmClient.generateJson = vi
       .fn()
-      .mockResolvedValue({ confidence: 0.1 });
+      .mockResolvedValue({ unproductive_state_confidence: 0.1 });
 
     await advanceTurns(30);
 
diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts
index ac291b679d..e70ae83ffe 100644
--- a/packages/core/src/services/loopDetectionService.ts
+++ b/packages/core/src/services/loopDetectionService.ts
@@ -413,18 +413,21 @@ export class LoopDetectionService {
     const schema: Record<string, unknown> = {
       type: 'object',
       properties: {
-        reasoning: {
+        unproductive_state_analysis: {
           type: 'string',
           description:
             'Your reasoning on if the conversation is looping without forward progress.',
         },
-        confidence: {
+        unproductive_state_confidence: {
           type: 'number',
           description:
             'A number between 0.0 and 1.0 representing your confidence that the conversation is in an unproductive state.',
         },
       },
-      required: ['reasoning', 'confidence'],
+      required: [
+        'unproductive_state_analysis',
+        'unproductive_state_confidence',
+      ],
     };
     let result;
     try {
@@ -442,10 +445,13 @@ export class LoopDetectionService {
       return false;
     }
 
-    if (typeof result['confidence'] === 'number') {
-      if (result['confidence'] > 0.9) {
-        if (typeof result['reasoning'] === 'string' && result['reasoning']) {
-          debugLogger.warn(result['reasoning']);
+    if (typeof result['unproductive_state_confidence'] === 'number') {
+      if (result['unproductive_state_confidence'] > 0.9) {
+        if (
+          typeof result['unproductive_state_analysis'] === 'string' &&
+          result['unproductive_state_analysis']
+        ) {
+          debugLogger.warn(result['unproductive_state_analysis']);
         }
         logLoopDetected(
           this.config,
@@ -456,7 +462,7 @@ export class LoopDetectionService {
         this.llmCheckInterval = Math.round(
           MIN_LLM_CHECK_INTERVAL +
             (MAX_LLM_CHECK_INTERVAL - MIN_LLM_CHECK_INTERVAL) *
-              (1 - result['confidence']),
+              (1 - result['unproductive_state_confidence']),
         );
       }
     }

From 44c62c8e5d0285df7b9ab21f1fa67931cfaf31bd Mon Sep 17 00:00:00 2001
From: Jenna Inouye <jinouye@google.com>
Date: Mon, 27 Oct 2025 12:52:32 -0700
Subject: [PATCH 38/73] Docs: Contributing guide (#12012)

---
 CONTRIBUTING.md | 177 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 145 insertions(+), 32 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 03e9ad6564..56263d51c2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,18 @@
 # How to Contribute
 
-We would love to accept your patches and contributions to this project.
+We would love to accept your patches and contributions to this project. This
+document includes:
+
+- **[Before you begin](#before-you-begin):** Essential steps to take before
+  becoming a Gemini CLI contributor.
+- **[Code contribution process](#code-contribution-process):** How to contribute
+  code to Gemini CLI.
+- **[Development setup and workflow](#development-setup-and-workflow):** How to
+  set up your development environment and workflow.
+- **[Documentation contribution process](#documentation-contribution-process):**
+  How to contribute documentation to Gemini CLI.
+
+We're looking forward to seeing your contributions!
 
 ## Before you begin
 
@@ -23,15 +35,25 @@ sign a new one.
 This project follows
 [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
 
-## Contribution Process
+## Code contribution process
 
-### Code Reviews
+### Get started
+
+The process for contributing code is as follows:
+
+1.  **Find an issue** that you want to work on.
+2.  **Fork the repository** and create a new branch.
+3.  **Make your changes** in the `packages/` directory.
+4.  **Ensure all checks pass** by running `npm run preflight`.
+5.  **Open a pull request** with your changes.
+
+### Code reviews
 
 All submissions, including submissions by project members, require review. We
 use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests)
 for this purpose.
 
-### Self Assigning Issues
+### Self assigning issues
 
 If you're looking for an issue to work on, check out our list of issues that are
 labeled
@@ -44,12 +66,12 @@ assign the issue to you, provided it is not already assigned.
 Please note that you can have a maximum of 3 issues assigned to you at any given
 time.
 
-### Pull Request Guidelines
+### Pull request guidelines
 
 To help us review and merge your PRs quickly, please follow these guidelines.
 PRs that do not meet these standards may be closed.
 
-#### 1. Link to an Existing Issue
+#### 1. Link to an existing issue
 
 All PRs should be linked to an existing issue in our tracker. This ensures that
 every change has been discussed and is aligned with the project's goals before
@@ -62,7 +84,7 @@ any code is written.
 If an issue for your change doesn't exist, please **open one first** and wait
 for feedback before you start coding.
 
-#### 2. Keep It Small and Focused
+#### 2. Keep it small and focused
 
 We favor small, atomic PRs that address a single issue or add a single,
 self-contained feature.
@@ -74,37 +96,40 @@ self-contained feature.
 Large changes should be broken down into a series of smaller, logical PRs that
 can be reviewed and merged independently.
 
-#### 3. Use Draft PRs for Work in Progress
+#### 3. Use draft PRs for work in progress
 
 If you'd like to get early feedback on your work, please use GitHub's **Draft
 Pull Request** feature. This signals to the maintainers that the PR is not yet
 ready for a formal review but is open for discussion and initial feedback.
 
-#### 4. Ensure All Checks Pass
+#### 4. Ensure all checks pass
 
 Before submitting your PR, ensure that all automated checks are passing by
 running `npm run preflight`. This command runs all tests, linting, and other
 style checks.
 
-#### 5. Update Documentation
+#### 5. Update documentation
 
 If your PR introduces a user-facing change (e.g., a new command, a modified
 flag, or a change in behavior), you must also update the relevant documentation
 in the `/docs` directory.
 
-#### 6. Write Clear Commit Messages and a Good PR Description
+See more about writing documentation:
+[Documentation contribution process](#documentation-contribution-process).
+
+#### 6. Write clear commit messages and a good PR description
 
 Your PR should have a clear, descriptive title and a detailed description of the
 changes. Follow the [Conventional Commits](https://www.conventionalcommits.org/)
 standard for your commit messages.
 
-- **Good PR Title:** `feat(cli): Add --json flag to 'config get' command`
-- **Bad PR Title:** `Made some changes`
+- **Good PR title:** `feat(cli): Add --json flag to 'config get' command`
+- **Bad PR title:** `Made some changes`
 
 In the PR description, explain the "why" behind your changes and link to the
 relevant issue (e.g., `Fixes #123`).
 
-## Forking
+### Forking
 
 If you are forking the repository you will be able to run the Build, Test and
 Integration test workflows. However in order to make the integration tests run
@@ -118,12 +143,12 @@ Additionally you will need to click on the `Actions` tab and enable workflows
 for your repository, you'll find it's the large blue button in the center of the
 screen.
 
-## Development Setup and Workflow
+### Development setup and workflow
 
 This section guides contributors on how to build, modify, and understand the
 development setup of this project.
 
-### Setting Up the Development Environment
+### Setting up the development environment
 
 **Prerequisites:**
 
@@ -135,7 +160,7 @@ development setup of this project.
       version of Node.js `>=20` is acceptable.
 2.  **Git**
 
-### Build Process
+### Build process
 
 To clone the repository:
 
@@ -160,7 +185,7 @@ This command typically compiles TypeScript to JavaScript, bundles assets, and
 prepares the packages for execution. Refer to `scripts/build.js` and
 `package.json` scripts for more details on what happens during the build.
 
-### Enabling Sandboxing
+### Enabling sandboxing
 
 [Sandboxing](#sandboxing) is highly recommended and requires, at a minimum,
 setting `GEMINI_SANDBOX=true` in your `~/.env` and ensuring a sandboxing
@@ -176,7 +201,7 @@ npm run build:all
 
 To skip building the sandbox container, you can use `npm run build` instead.
 
-### Running
+### Running the CLI
 
 To start the Gemini CLI from the source code (after building), run the following
 command from the root directory:
@@ -190,11 +215,11 @@ utilize `npm link path/to/gemini-cli/packages/cli` (see:
 [docs](https://docs.npmjs.com/cli/v9/commands/npm-link)) or
 `alias gemini="node path/to/gemini-cli/packages/cli"` to run with `gemini`
 
-### Running Tests
+### Running tests
 
 This project contains two types of tests: unit tests and integration tests.
 
-#### Unit Tests
+#### Unit tests
 
 To execute the unit test suite for the project:
 
@@ -206,7 +231,7 @@ This will run tests located in the `packages/core` and `packages/cli`
 directories. Ensure tests pass before submitting any changes. For a more
 comprehensive check, it is recommended to run `npm run preflight`.
 
-#### Integration Tests
+#### Integration tests
 
 The integration tests are designed to validate the end-to-end functionality of
 the Gemini CLI. They are not run as part of the default `npm run test` command.
@@ -220,7 +245,7 @@ npm run test:e2e
 For more detailed information on the integration testing framework, please see
 the [Integration Tests documentation](./docs/integration-tests.md).
 
-### Linting and Preflight Checks
+### Linting and preflight checks
 
 To ensure code quality and formatting consistency, run the preflight check:
 
@@ -267,7 +292,7 @@ root directory:
 npm run lint
 ```
 
-### Coding Conventions
+### Coding conventions
 
 - Please adhere to the coding style, patterns, and conventions used throughout
   the existing codebase.
@@ -279,7 +304,7 @@ npm run lint
 - **Imports:** Pay special attention to import paths. The project uses ESLint to
   enforce restrictions on relative imports between packages.
 
-### Project Structure
+### Project structure
 
 - `packages/`: Contains the individual sub-packages of the project.
   - `a2a-server`: A2A server implementation for the Gemini CLI. (Experimental)
@@ -294,9 +319,9 @@ npm run lint
 
 For more detailed architecture, see `docs/architecture.md`.
 
-## Debugging
+### Debugging
 
-### VS Code:
+#### VS Code
 
 0.  Run the CLI to interactively debug in VS Code with `F5`
 1.  Start the CLI in debug mode from the root directory:
@@ -354,9 +379,9 @@ used for the CLI's interface, is compatible with React DevTools version 4.x.
     Your running CLI application should then connect to React DevTools.
     ![](/docs/assets/connected_devtools.png)
 
-## Sandboxing
+### Sandboxing
 
-### macOS Seatbelt
+#### macOS Seatbelt
 
 On macOS, `gemini` uses Seatbelt (`sandbox-exec`) under a `permissive-open`
 profile (see `packages/cli/src/utils/sandbox-macos-permissive-open.sb`) that
@@ -372,7 +397,7 @@ Available built-in profiles are `{permissive,restrictive}-{open,closed,proxied}`
 `.gemini/sandbox-macos-<profile>.sb` under your project settings directory
 `.gemini`.
 
-### Container-based Sandboxing (All Platforms)
+#### Container-based sandboxing (all platforms)
 
 For stronger container-based sandboxing on macOS or other platforms, you can set
 `GEMINI_SANDBOX=true|docker|podman|<command>` in your environment or `.env`
@@ -395,7 +420,7 @@ for your projects by creating the files `.gemini/sandbox.Dockerfile` and/or
 running `gemini` with `BUILD_SANDBOX=1` to trigger building of your custom
 sandbox.
 
-#### Proxied Networking
+#### Proxied networking
 
 All sandboxing methods, including macOS Seatbelt using `*-proxied` profiles,
 support restricting outbound network traffic through a custom proxy server that
@@ -406,7 +431,7 @@ connections to `example.com:443` (e.g. `curl https://example.com`) and declines
 all other requests. The proxy is started and stopped automatically alongside the
 sandbox.
 
-## Manual Publish
+### Manual publish
 
 We publish an artifact for each commit to our internal registry. But if you need
 to manually cut a local build, then run the following commands:
@@ -418,3 +443,91 @@ npm run auth
 npm run prerelease:dev
 npm publish --workspaces
 ```
+
+## Documentation contribution process
+
+Our documentation must be kept up-to-date with our code contributions. We want
+our documentation to be clear, concise, and helpful to our users. We value:
+
+- **Clarity:** Use simple and direct language. Avoid jargon where possible.
+- **Accuracy:** Ensure all information is correct and up-to-date.
+- **Completeness:** Cover all aspects of a feature or topic.
+- **Examples:** Provide practical examples to help users understand how to use
+  Gemini CLI.
+
+### Getting started
+
+The process for contributing to the documentation is similar to contributing
+code.
+
+1. **Fork the repository** and create a new branch.
+2. **Make your changes** in the `/docs` directory.
+3. **Preview your changes locally** in Markdown rendering.
+4. **Lint and format your changes.** Our preflight check includes linting and
+   formatting for documentation files.
+   ```bash
+   npm run preflight
+   ```
+5. **Open a pull request** with your changes.
+
+### Documentation structure
+
+Our documentation is organized using [sidebar.json](docs/sidebar.json) as the
+table of contents. When adding new documentation:
+
+1. Create your markdown file **in the appropriate directory** under `/docs`.
+2. Add an entry to `sidebar.json` in the relevant section.
+3. Ensure all internal links use relative paths and point to existing files.
+
+### Style guide
+
+We follow the
+[Google Developer Documentation Style Guide](https://developers.google.com/style).
+Please refer to it for guidance on writing style, tone, and formatting.
+
+#### Key style points
+
+- Use sentence case for headings.
+- Write in second person ("you") when addressing the reader.
+- Use present tense.
+- Keep paragraphs short and focused.
+- Use code blocks with appropriate language tags for syntax highlighting.
+- Include practical examples whenever possible.
+
+### Linting and formatting
+
+We use `prettier` to enforce a consistent style across our documentation. The
+`npm run preflight` command will check for any linting issues.
+
+You can also run the linter and formatter separately:
+
+- `npm run lint` - Check for linting issues
+- `npm run format` - Auto-format markdown files
+- `npm run lint:fix` - Auto-fix linting issues where possible
+
+Please make sure your contributions are free of linting errors before submitting
+a pull request.
+
+### Before you submit
+
+Before submitting your documentation pull request, please:
+
+1. Run `npm run preflight` to ensure all checks pass.
+2. Review your changes for clarity and accuracy.
+3. Check that all links work correctly.
+4. Ensure any code examples are tested and functional.
+5. Sign the
+   [Contributor License Agreement (CLA)](https://cla.developers.google.com/) if
+   you haven't already.
+
+### Need help?
+
+If you have questions about contributing documentation:
+
+- Check our [FAQ](docs/faq.md).
+- Review existing documentation for examples.
+- Open [an issue](https://github.com/google-gemini/gemini-cli/issues) to discuss
+  your proposed changes.
+- Reach out to the maintainers.
+
+We appreciate your contributions to making Gemini CLI documentation better!

From 9e8f7c074c03eb49d4f4f2e9b1260ad14fdfc737 Mon Sep 17 00:00:00 2001
From: cocosheng-g <cocosheng@google.com>
Date: Mon, 27 Oct 2025 16:05:11 -0400
Subject: [PATCH 39/73] Create BYOID auth client when detecting BYOID
 credentials (#11592)

---
 packages/core/src/code_assist/oauth2.test.ts | 49 +++++++++-
 packages/core/src/code_assist/oauth2.ts      | 97 ++++++++++++--------
 packages/core/src/code_assist/server.ts      |  4 +-
 packages/core/src/code_assist/setup.ts       |  4 +-
 4 files changed, 112 insertions(+), 42 deletions(-)

diff --git a/packages/core/src/code_assist/oauth2.test.ts b/packages/core/src/code_assist/oauth2.test.ts
index d089440e16..2210c695f9 100644
--- a/packages/core/src/code_assist/oauth2.test.ts
+++ b/packages/core/src/code_assist/oauth2.test.ts
@@ -14,7 +14,7 @@ import {
   clearOauthClientCache,
 } from './oauth2.js';
 import { UserAccountManager } from '../utils/userAccountManager.js';
-import { OAuth2Client, Compute } from 'google-auth-library';
+import { OAuth2Client, Compute, GoogleAuth } from 'google-auth-library';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import http from 'node:http';
@@ -420,6 +420,53 @@ describe('oauth2', () => {
         // Assert the correct credentials were used
         expect(mockClient.setCredentials).toHaveBeenCalledWith(envCreds);
       });
+
+      it('should use GoogleAuth for BYOID credentials from GOOGLE_APPLICATION_CREDENTIALS', async () => {
+        // Setup BYOID credentials via environment variable
+        const byoidCredentials = {
+          type: 'external_account_authorized_user',
+          client_id: 'mock-client-id',
+        };
+        const envCredsPath = path.join(tempHomeDir, 'byoid_creds.json');
+        await fs.promises.writeFile(
+          envCredsPath,
+          JSON.stringify(byoidCredentials),
+        );
+        vi.stubEnv('GOOGLE_APPLICATION_CREDENTIALS', envCredsPath);
+
+        // Mock GoogleAuth and its chain of calls
+        const mockExternalAccountClient = {
+          getAccessToken: vi.fn().mockResolvedValue({ token: 'byoid-token' }),
+        };
+        const mockFromJSON = vi
+          .fn()
+          .mockResolvedValue(mockExternalAccountClient);
+        const mockGoogleAuthInstance = {
+          fromJSON: mockFromJSON,
+        };
+        (GoogleAuth as unknown as Mock).mockImplementation(
+          () => mockGoogleAuthInstance,
+        );
+
+        const mockOAuth2Client = {
+          on: vi.fn(),
+        };
+        (OAuth2Client as unknown as Mock).mockImplementation(
+          () => mockOAuth2Client,
+        );
+
+        const client = await getOauthClient(
+          AuthType.LOGIN_WITH_GOOGLE,
+          mockConfig,
+        );
+
+        // Assert that GoogleAuth was used and the correct client was returned
+        expect(GoogleAuth).toHaveBeenCalledWith({
+          scopes: expect.any(Array),
+        });
+        expect(mockFromJSON).toHaveBeenCalledWith(byoidCredentials);
+        expect(client).toBe(mockExternalAccountClient);
+      });
     });
 
     describe('with GCP environment variables', () => {
diff --git a/packages/core/src/code_assist/oauth2.ts b/packages/core/src/code_assist/oauth2.ts
index fac45172e9..ef0be547f0 100644
--- a/packages/core/src/code_assist/oauth2.ts
+++ b/packages/core/src/code_assist/oauth2.ts
@@ -4,11 +4,12 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { Credentials } from 'google-auth-library';
+import type { Credentials, AuthClient, JWTInput } from 'google-auth-library';
 import {
   OAuth2Client,
   Compute,
   CodeChallengeMethod,
+  GoogleAuth,
 } from 'google-auth-library';
 import * as http from 'node:http';
 import url from 'node:url';
@@ -64,7 +65,7 @@ export interface OauthWebLogin {
   loginCompletePromise: Promise<void>;
 }
 
-const oauthClientPromises = new Map<AuthType, Promise<OAuth2Client>>();
+const oauthClientPromises = new Map<AuthType, Promise<AuthClient>>();
 
 function getUseEncryptedStorageFlag() {
   return process.env[FORCE_ENCRYPTED_FILE_ENV_VAR] === 'true';
@@ -73,7 +74,28 @@ function getUseEncryptedStorageFlag() {
 async function initOauthClient(
   authType: AuthType,
   config: Config,
-): Promise<OAuth2Client> {
+): Promise<AuthClient> {
+  const credentials = await fetchCachedCredentials();
+
+  if (
+    credentials &&
+    (credentials as { type?: string }).type ===
+      'external_account_authorized_user'
+  ) {
+    const auth = new GoogleAuth({
+      scopes: OAUTH_SCOPE,
+    });
+    const byoidClient = await auth.fromJSON({
+      ...credentials,
+      refresh_token: credentials.refresh_token ?? undefined,
+    });
+    const token = await byoidClient.getAccessToken();
+    if (token) {
+      debugLogger.debug('Created BYOID auth client.');
+      return byoidClient;
+    }
+  }
+
   const client = new OAuth2Client({
     clientId: OAUTH_CLIENT_ID,
     clientSecret: OAUTH_CLIENT_SECRET,
@@ -102,20 +124,35 @@ async function initOauthClient(
     }
   });
 
-  // If there are cached creds on disk, they always take precedence
-  if (await loadCachedCredentials(client)) {
-    // Found valid cached credentials.
-    // Check if we need to retrieve Google Account ID or Email
-    if (!userAccountManager.getCachedGoogleAccount()) {
-      try {
-        await fetchAndCacheUserInfo(client);
-      } catch (error) {
-        // Non-fatal, continue with existing auth.
-        debugLogger.warn('Failed to fetch user info:', getErrorMessage(error));
+  if (credentials) {
+    client.setCredentials(credentials as Credentials);
+    try {
+      // This will verify locally that the credentials look good.
+      const { token } = await client.getAccessToken();
+      if (token) {
+        // This will check with the server to see if it hasn't been revoked.
+        await client.getTokenInfo(token);
+
+        if (!userAccountManager.getCachedGoogleAccount()) {
+          try {
+            await fetchAndCacheUserInfo(client);
+          } catch (error) {
+            // Non-fatal, continue with existing auth.
+            debugLogger.warn(
+              'Failed to fetch user info:',
+              getErrorMessage(error),
+            );
+          }
+        }
+        debugLogger.log('Loaded cached credentials.');
+        return client;
       }
+    } catch (error) {
+      debugLogger.debug(
+        `Cached credentials are not valid:`,
+        getErrorMessage(error),
+      );
     }
-    debugLogger.log('Loaded cached credentials.');
-    return client;
   }
 
   // In Google Cloud Shell, we can use Application Default Credentials (ADC)
@@ -218,7 +255,7 @@ async function initOauthClient(
 export async function getOauthClient(
   authType: AuthType,
   config: Config,
-): Promise<OAuth2Client> {
+): Promise<AuthClient> {
   if (!oauthClientPromises.has(authType)) {
     oauthClientPromises.set(authType, initOauthClient(authType, config));
   }
@@ -432,15 +469,12 @@ export function getAvailablePort(): Promise<number> {
   });
 }
 
-async function loadCachedCredentials(client: OAuth2Client): Promise<boolean> {
+async function fetchCachedCredentials(): Promise<
+  Credentials | JWTInput | null
+> {
   const useEncryptedStorage = getUseEncryptedStorageFlag();
   if (useEncryptedStorage) {
-    const credentials = await OAuthCredentialStorage.loadCredentials();
-    if (credentials) {
-      client.setCredentials(credentials);
-      return true;
-    }
-    return false;
+    return await OAuthCredentialStorage.loadCredentials();
   }
 
   const pathsToTry = [
@@ -450,19 +484,8 @@ async function loadCachedCredentials(client: OAuth2Client): Promise<boolean> {
 
   for (const keyFile of pathsToTry) {
     try {
-      const creds = await fs.readFile(keyFile, 'utf-8');
-      client.setCredentials(JSON.parse(creds));
-
-      // This will verify locally that the credentials look good.
-      const { token } = await client.getAccessToken();
-      if (!token) {
-        continue;
-      }
-
-      // This will check with the server to see if it hasn't been revoked.
-      await client.getTokenInfo(token);
-
-      return true;
+      const keyFileString = await fs.readFile(keyFile, 'utf-8');
+      return JSON.parse(keyFileString);
     } catch (error) {
       // Log specific error for debugging, but continue trying other paths
       debugLogger.debug(
@@ -472,7 +495,7 @@ async function loadCachedCredentials(client: OAuth2Client): Promise<boolean> {
     }
   }
 
-  return false;
+  return null;
 }
 
 async function cacheCredentials(credentials: Credentials) {
diff --git a/packages/core/src/code_assist/server.ts b/packages/core/src/code_assist/server.ts
index 915d07c1df..8859d56083 100644
--- a/packages/core/src/code_assist/server.ts
+++ b/packages/core/src/code_assist/server.ts
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { OAuth2Client } from 'google-auth-library';
+import type { AuthClient } from 'google-auth-library';
 import type {
   CodeAssistGlobalUserSettingResponse,
   GoogleRpcResponse,
@@ -47,7 +47,7 @@ export const CODE_ASSIST_API_VERSION = 'v1internal';
 
 export class CodeAssistServer implements ContentGenerator {
   constructor(
-    readonly client: OAuth2Client,
+    readonly client: AuthClient,
     readonly projectId?: string,
     readonly httpOptions: HttpOptions = {},
     readonly sessionId?: string,
diff --git a/packages/core/src/code_assist/setup.ts b/packages/core/src/code_assist/setup.ts
index 055a0dbb57..d33c019d6c 100644
--- a/packages/core/src/code_assist/setup.ts
+++ b/packages/core/src/code_assist/setup.ts
@@ -12,7 +12,7 @@ import type {
 } from './types.js';
 import { UserTierId } from './types.js';
 import { CodeAssistServer } from './server.js';
-import type { OAuth2Client } from 'google-auth-library';
+import type { AuthClient } from 'google-auth-library';
 
 export class ProjectIdRequiredError extends Error {
   constructor() {
@@ -32,7 +32,7 @@ export interface UserData {
  * @param projectId the user's project id, if any
  * @returns the user's actual project id
  */
-export async function setupUser(client: OAuth2Client): Promise<UserData> {
+export async function setupUser(client: AuthClient): Promise<UserData> {
   const projectId =
     process.env['GOOGLE_CLOUD_PROJECT'] ||
     process.env['GOOGLE_CLOUD_PROJECT_ID'] ||

From abd22a753deee186f995ce2d98fd058e12965e35 Mon Sep 17 00:00:00 2001
From: Ruchika Goel <goelruchika1@gmail.com>
Date: Mon, 27 Oct 2025 13:34:38 -0700
Subject: [PATCH 40/73] =?UTF-8?q?feat(ID=20token=20support):=20Add=20ID=20?=
 =?UTF-8?q?token=20support=20for=20authenticating=20to=20MC=E2=80=A6=20(#1?=
 =?UTF-8?q?2031)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adam Weidman <adamfweidman@google.com>
---
 docs/tools/mcp-server.md                      | 25 ++++++
 packages/core/src/config/config.ts            |  2 +
 .../core/src/mcp/google-auth-provider.test.ts | 88 ++++++++++++++++++-
 packages/core/src/mcp/google-auth-provider.ts | 57 ++++++++++--
 4 files changed, 164 insertions(+), 8 deletions(-)

diff --git a/docs/tools/mcp-server.md b/docs/tools/mcp-server.md
index 47f169ba38..685a637cf8 100644
--- a/docs/tools/mcp-server.md
+++ b/docs/tools/mcp-server.md
@@ -150,6 +150,11 @@ Each server configuration supports the following properties:
   server. Tools listed here will not be available to the model, even if they are
   exposed by the server. **Note:** `excludeTools` takes precedence over
   `includeTools` - if a tool is in both lists, it will be excluded.
+- **`allow_unscoped_id_tokens_cloud_run`** (boolean): When `true` and the MCP
+  server host is a Cloud Run service (`*.run.app`), the CLI will use Google
+  Application Default Credentials (ADC) to generate an unscoped ID token and
+  send it as `Authorization: Bearer <token>`. When using this flag, do not set
+  OAuth scopes; they are not needed.
 - **`targetAudience`** (string): The OAuth Client ID allowlisted on the
   IAP-protected application you are trying to access. Used with
   `authProviderType: 'service_account_impersonation'`.
@@ -281,6 +286,26 @@ property:
 }
 ```
 
+#### Google Credential with Cloud Run ID tokens
+
+When connecting to a Cloud Run service endpoint (`*.run.app`), you must opt into
+ID token based authentication using ADC. Note that the generated ID token is
+unscoped.
+
+```json
+{
+  "mcpServers": {
+    "googleCloudServer": {
+      "url": "https://my-gcp-service.run.app/sse",
+      "authProviderType": "google_credentials",
+      "allow_unscoped_id_tokens_cloud_run": true
+    }
+  }
+}
+```
+
+Note: Only `*.run.app` hosts are supported for this flag.
+
 #### Service Account Impersonation
 
 To authenticate with a server using Service Account Impersonation, you must set
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 78632d0480..5e3a337218 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -193,6 +193,8 @@ export class MCPServerConfig {
     // OAuth configuration
     readonly oauth?: MCPOAuthConfig,
     readonly authProviderType?: AuthProviderType,
+    // When true, use Google ADC to fetch ID tokens for Cloud Run
+    readonly allow_unscoped_id_tokens_cloud_run?: boolean,
     // Service Account Configuration
     /* targetAudience format: CLIENT_ID.apps.googleusercontent.com */
     readonly targetAudience?: string,
diff --git a/packages/core/src/mcp/google-auth-provider.test.ts b/packages/core/src/mcp/google-auth-provider.test.ts
index efe959ff3c..ce86d7a2ab 100644
--- a/packages/core/src/mcp/google-auth-provider.test.ts
+++ b/packages/core/src/mcp/google-auth-provider.test.ts
@@ -20,12 +20,16 @@ describe('GoogleCredentialProvider', () => {
     },
   } as MCPServerConfig;
 
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
   it('should throw an error if no scopes are provided', () => {
     const config = {
       url: 'https://test.googleapis.com',
     } as MCPServerConfig;
     expect(() => new GoogleCredentialProvider(config)).toThrow(
-      'Scopes must be provided in the oauth config for Google Credentials provider',
+      'Scopes must be provided in the oauth config for Google Credentials provider (or enable allow_unscoped_id_tokens_for_cloud_run to use ID tokens for Cloud Run endpoints)',
     );
   });
 
@@ -80,7 +84,19 @@ describe('GoogleCredentialProvider', () => {
     );
   });
 
-  describe('with provider instance', () => {
+  it('should not allow run.app host even when unscoped ID token flag is not present', () => {
+    const config = {
+      url: 'https://test.run.app',
+      oauth: {
+        scopes: ['scope1', 'scope2'],
+      },
+    } as MCPServerConfig;
+    expect(() => new GoogleCredentialProvider(config)).toThrow(
+      'To enable the Cloud Run MCP Server at https://test.run.app please set allow_unscoped_id_tokens_cloud_run:true in the MCP Server config.',
+    );
+  });
+
+  describe('with provider instance (Access Tokens)', () => {
     let provider: GoogleCredentialProvider;
     let mockGetAccessToken: Mock;
     let mockClient: {
@@ -154,4 +170,72 @@ describe('GoogleCredentialProvider', () => {
       vi.useRealTimers();
     });
   });
+
+  describe('ID token flow (allow_unscoped_id_tokens_cloud_run)', () => {
+    let mockFetchIdToken: Mock;
+    let mockIdClient: {
+      idTokenProvider: {
+        fetchIdToken: Mock;
+      };
+    };
+
+    beforeEach(() => {
+      mockFetchIdToken = vi.fn();
+      mockIdClient = {
+        idTokenProvider: {
+          fetchIdToken: mockFetchIdToken,
+        },
+      };
+      (GoogleAuth.prototype.getIdTokenClient as Mock).mockResolvedValue(
+        mockIdClient,
+      );
+    });
+
+    it('should return ID token when flag is enabled and derive audience from hostname', async () => {
+      const config = {
+        url: 'https://test.run.app/path',
+        allow_unscoped_id_tokens_cloud_run: true,
+      } as MCPServerConfig;
+      const payload = { exp: Math.floor(Date.now() / 1000) + 3600 };
+      const validToken = `header.${Buffer.from(JSON.stringify(payload)).toString('base64')}.signature`;
+      mockFetchIdToken.mockResolvedValue(validToken);
+
+      const provider = new GoogleCredentialProvider(config);
+      const tokens = await provider.tokens();
+      expect(tokens?.access_token).toBe(validToken);
+      expect(GoogleAuth.prototype.getIdTokenClient).toHaveBeenCalledWith(
+        'test.run.app',
+      );
+      expect(mockFetchIdToken).toHaveBeenCalledWith('test.run.app');
+    });
+
+    it('should return undefined and log error when fetching ID token fails', async () => {
+      const config = {
+        url: 'https://test.run.app/path',
+        allow_unscoped_id_tokens_cloud_run: true,
+      } as MCPServerConfig;
+      const consoleErrorSpy = vi
+        .spyOn(console, 'error')
+        .mockImplementation(() => {});
+      mockFetchIdToken.mockRejectedValue(new Error('Fetch failed'));
+
+      const provider = new GoogleCredentialProvider(config);
+      const tokens = await provider.tokens();
+      expect(tokens).toBeUndefined();
+      expect(consoleErrorSpy).toHaveBeenCalledWith(
+        'Failed to get ID token from Google ADC',
+        expect.any(Error),
+      );
+      consoleErrorSpy.mockRestore();
+    });
+
+    it('should not require scopes when flag allow_unscoped_id_tokens_cloud_run is true', () => {
+      const config = {
+        url: 'https://test.run.app',
+        allow_unscoped_id_tokens_cloud_run: true,
+      } as MCPServerConfig;
+
+      expect(() => new GoogleCredentialProvider(config)).not.toThrow();
+    });
+  });
 });
diff --git a/packages/core/src/mcp/google-auth-provider.ts b/packages/core/src/mcp/google-auth-provider.ts
index d152b4d256..3159798095 100644
--- a/packages/core/src/mcp/google-auth-provider.ts
+++ b/packages/core/src/mcp/google-auth-provider.ts
@@ -13,12 +13,17 @@ import type {
 } from '@modelcontextprotocol/sdk/shared/auth.js';
 import { GoogleAuth } from 'google-auth-library';
 import type { MCPServerConfig } from '../config/config.js';
-import { FIVE_MIN_BUFFER_MS } from './oauth-utils.js';
+import { OAuthUtils, FIVE_MIN_BUFFER_MS } from './oauth-utils.js';
 
+const CLOUD_RUN_HOST_REGEX = /^(.*\.)?run\.app$/;
+
+// An array of hosts that are allowed to use the Google Credential provider.
 const ALLOWED_HOSTS = [/^.+\.googleapis\.com$/, /^(.*\.)?luci\.app$/];
 
 export class GoogleCredentialProvider implements OAuthClientProvider {
   private readonly auth: GoogleAuth;
+  private readonly useIdToken: boolean = false;
+  private readonly audience?: string;
   private cachedToken?: OAuthTokens;
   private tokenExpiryTime?: number;
 
@@ -42,20 +47,35 @@ export class GoogleCredentialProvider implements OAuthClientProvider {
     }
 
     const hostname = new URL(url).hostname;
-    if (!ALLOWED_HOSTS.some((pattern) => pattern.test(hostname))) {
+    const isRunAppHost = CLOUD_RUN_HOST_REGEX.test(hostname);
+    if (!this.config?.allow_unscoped_id_tokens_cloud_run && isRunAppHost) {
+      throw new Error(
+        `To enable the Cloud Run MCP Server at ${url} please set allow_unscoped_id_tokens_cloud_run:true in the MCP Server config.`,
+      );
+    }
+    if (this.config?.allow_unscoped_id_tokens_cloud_run && isRunAppHost) {
+      this.useIdToken = true;
+    }
+    this.audience = hostname;
+
+    if (
+      !this.useIdToken &&
+      !ALLOWED_HOSTS.some((pattern) => pattern.test(hostname))
+    ) {
       throw new Error(
         `Host "${hostname}" is not an allowed host for Google Credential provider.`,
       );
     }
 
-    const scopes = this.config?.oauth?.scopes;
-    if (!scopes || scopes.length === 0) {
+    // If we are using the access token flow, we MUST have scopes.
+    if (!this.useIdToken && !this.config?.oauth?.scopes) {
       throw new Error(
-        'Scopes must be provided in the oauth config for Google Credentials provider',
+        'Scopes must be provided in the oauth config for Google Credentials provider (or enable allow_unscoped_id_tokens_for_cloud_run to use ID tokens for Cloud Run endpoints)',
       );
     }
+
     this.auth = new GoogleAuth({
-      scopes,
+      scopes: this.config?.oauth?.scopes,
     });
   }
 
@@ -81,6 +101,31 @@ export class GoogleCredentialProvider implements OAuthClientProvider {
     this.cachedToken = undefined;
     this.tokenExpiryTime = undefined;
 
+    // If allow_unscoped_id_tokens_for_cloud_run is configured, use ID tokens.
+    if (this.useIdToken) {
+      try {
+        const idClient = await this.auth.getIdTokenClient(this.audience!);
+        const idToken = await idClient.idTokenProvider.fetchIdToken(
+          this.audience!,
+        );
+
+        const newToken: OAuthTokens = {
+          access_token: idToken,
+          token_type: 'Bearer',
+        };
+
+        const expiryTime = OAuthUtils.parseTokenExpiry(idToken);
+        if (expiryTime) {
+          this.tokenExpiryTime = expiryTime;
+          this.cachedToken = newToken;
+        }
+        return newToken;
+      } catch (e) {
+        console.error('Failed to get ID token from Google ADC', e);
+        return undefined;
+      }
+    }
+
     const client = await this.auth.getClient();
     const accessTokenResponse = await client.getAccessToken();
 

From 6db64aab2bf761c1943a42b1d04871cc224fc96b Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Mon, 27 Oct 2025 13:40:03 -0700
Subject: [PATCH 41/73] fix(telemetry): Prevent duplicate StartSessionEvent
 logging (#12090)

---
 packages/cli/src/core/initializer.ts    |  7 +++++++
 packages/cli/src/gemini.test.tsx        | 24 ++++++++++++++++++++++++
 packages/core/src/config/config.test.ts | 18 ------------------
 packages/core/src/config/config.ts      |  9 +--------
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/packages/cli/src/core/initializer.ts b/packages/cli/src/core/initializer.ts
index 089e0fb505..b7b2c6be16 100644
--- a/packages/cli/src/core/initializer.ts
+++ b/packages/cli/src/core/initializer.ts
@@ -10,6 +10,8 @@ import {
   IdeConnectionType,
   logIdeConnection,
   type Config,
+  StartSessionEvent,
+  logCliConfiguration,
 } from '@google/gemini-cli-core';
 import { type LoadedSettings } from '../config/settings.js';
 import { performInitialAuth } from './auth.js';
@@ -42,6 +44,11 @@ export async function initializeApp(
   const shouldOpenAuthDialog =
     settings.merged.security?.auth?.selectedType === undefined || !!authError;
 
+  logCliConfiguration(
+    config,
+    new StartSessionEvent(config, config.getToolRegistry()),
+  );
+
   if (config.getIdeMode()) {
     const ideClient = await IdeClient.getInstance();
     await ideClient.connect();
diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx
index 8be78561b9..645928cfb1 100644
--- a/packages/cli/src/gemini.test.tsx
+++ b/packages/cli/src/gemini.test.tsx
@@ -174,6 +174,18 @@ describe('gemini.tsx main function', () => {
         getMessageBus: () => ({
           subscribe: vi.fn(),
         }),
+        getToolRegistry: vi.fn(),
+        getContentGeneratorConfig: vi.fn(),
+        getModel: () => 'gemini-pro',
+        getEmbeddingModel: () => 'embedding-001',
+        getApprovalMode: () => 'default',
+        getCoreTools: () => [],
+        getTelemetryEnabled: () => false,
+        getTelemetryLogPromptsEnabled: () => false,
+        getFileFilteringRespectGitIgnore: () => true,
+        getOutputFormat: () => 'text',
+        getExtensions: () => [],
+        getUsageStatisticsEnabled: () => false,
       } as unknown as Config;
     });
     vi.mocked(loadSettings).mockReturnValue({
@@ -309,6 +321,18 @@ describe('gemini.tsx main function kitty protocol', () => {
       getMessageBus: () => ({
         subscribe: vi.fn(),
       }),
+      getToolRegistry: vi.fn(),
+      getContentGeneratorConfig: vi.fn(),
+      getModel: () => 'gemini-pro',
+      getEmbeddingModel: () => 'embedding-001',
+      getApprovalMode: () => 'default',
+      getCoreTools: () => [],
+      getTelemetryEnabled: () => false,
+      getTelemetryLogPromptsEnabled: () => false,
+      getFileFilteringRespectGitIgnore: () => true,
+      getOutputFormat: () => 'text',
+      getExtensions: () => [],
+      getUsageStatisticsEnabled: () => false,
     } as unknown as Config);
     vi.mocked(loadSettings).mockReturnValue({
       errors: [],
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index d1549c5355..5c49e50ec1 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -25,8 +25,6 @@ import {
 } from '../core/contentGenerator.js';
 import { GeminiClient } from '../core/client.js';
 import { GitService } from '../services/gitService.js';
-import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
-
 import { ShellTool } from '../tools/shell.js';
 import { ReadFileTool } from '../tools/read-file.js';
 import { GrepTool } from '../tools/grep.js';
@@ -180,10 +178,6 @@ describe('Server Config (config.ts)', () => {
   beforeEach(() => {
     // Reset mocks if necessary
     vi.clearAllMocks();
-    vi.spyOn(
-      ClearcutLogger.prototype,
-      'logStartSessionEvent',
-    ).mockImplementation(() => undefined);
   });
 
   describe('initialize', () => {
@@ -432,18 +426,6 @@ describe('Server Config (config.ts)', () => {
         expect(config.getUsageStatisticsEnabled()).toBe(enabled);
       },
     );
-
-    it('logs the session start event', async () => {
-      const config = new Config({
-        ...baseParams,
-        usageStatisticsEnabled: true,
-      });
-      await config.refreshAuth(AuthType.USE_GEMINI);
-
-      expect(
-        ClearcutLogger.prototype.logStartSessionEvent,
-      ).toHaveBeenCalledOnce();
-    });
   });
 
   describe('Telemetry Settings', () => {
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 5e3a337218..860a166f21 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -42,7 +42,6 @@ import {
   uiTelemetryService,
 } from '../telemetry/index.js';
 import { tokenLimit } from '../core/tokenLimits.js';
-import { StartSessionEvent } from '../telemetry/index.js';
 import {
   DEFAULT_GEMINI_EMBEDDING_MODEL,
   DEFAULT_GEMINI_FLASH_MODEL,
@@ -55,10 +54,7 @@ import { ideContextStore } from '../ide/ideContext.js';
 import { WriteTodosTool } from '../tools/write-todos.js';
 import type { FileSystemService } from '../services/fileSystemService.js';
 import { StandardFileSystemService } from '../services/fileSystemService.js';
-import {
-  logCliConfiguration,
-  logRipgrepFallback,
-} from '../telemetry/loggers.js';
+import { logRipgrepFallback } from '../telemetry/loggers.js';
 import { RipgrepFallbackEvent } from '../telemetry/types.js';
 import type { FallbackModelHandler } from '../fallback/types.js';
 import { ModelRouterService } from '../routing/modelRouterService.js';
@@ -576,9 +572,6 @@ export class Config {
 
     // Reset the session flag since we're explicitly changing auth and using default model
     this.inFallbackMode = false;
-
-    // Logging the cli configuration here as the auth related configuration params would have been loaded by this point
-    logCliConfiguration(this, new StartSessionEvent(this, this.toolRegistry));
   }
 
   getUserTier(): UserTierId | undefined {

From 2a87d663d293ea7211b78ad7348285502819e7d0 Mon Sep 17 00:00:00 2001
From: Sandy Tao <sandytao520@icloud.com>
Date: Mon, 27 Oct 2025 14:29:39 -0700
Subject: [PATCH 42/73] refactor(core): extract ChatCompressionService from
 GeminiClient (#12001)

---
 packages/core/src/core/client.test.ts         | 586 +++---------------
 packages/core/src/core/client.ts              | 230 +------
 .../services/chatCompressionService.test.ts   | 296 +++++++++
 .../src/services/chatCompressionService.ts    | 220 +++++++
 packages/core/src/utils/environmentContext.ts |  26 +-
 5 files changed, 656 insertions(+), 702 deletions(-)
 create mode 100644 packages/core/src/services/chatCompressionService.test.ts
 create mode 100644 packages/core/src/services/chatCompressionService.ts

diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index c273ff00d7..da0479ecae 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -16,7 +16,6 @@ import {
 
 import type { Content, GenerateContentResponse, Part } from '@google/genai';
 import {
-  findCompressSplitPoint,
   isThinkingDefault,
   isThinkingSupported,
   GeminiClient,
@@ -40,9 +39,11 @@ import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import { tokenLimit } from './tokenLimits.js';
 import { ideContextStore } from '../ide/ideContext.js';
-import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';
 import type { ModelRouterService } from '../routing/modelRouterService.js';
 import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
+import { ChatCompressionService } from '../services/chatCompressionService.js';
+
+vi.mock('../services/chatCompressionService.js');
 
 // Mock fs module to prevent actual file system operations during tests
 const mockFileSystem = new Map<string, string>();
@@ -132,83 +133,6 @@ async function fromAsync<T>(promise: AsyncGenerator<T>): Promise<readonly T[]> {
   return results;
 }
 
-describe('findCompressSplitPoint', () => {
-  it('should throw an error for non-positive numbers', () => {
-    expect(() => findCompressSplitPoint([], 0)).toThrow(
-      'Fraction must be between 0 and 1',
-    );
-  });
-
-  it('should throw an error for a fraction greater than or equal to 1', () => {
-    expect(() => findCompressSplitPoint([], 1)).toThrow(
-      'Fraction must be between 0 and 1',
-    );
-  });
-
-  it('should handle an empty history', () => {
-    expect(findCompressSplitPoint([], 0.5)).toBe(0);
-  });
-
-  it('should handle a fraction in the middle', () => {
-    const history: Content[] = [
-      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%)
-      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%)
-      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%)
-      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%)
-      { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%)
-    ];
-    expect(findCompressSplitPoint(history, 0.5)).toBe(4);
-  });
-
-  it('should handle a fraction of last index', () => {
-    const history: Content[] = [
-      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%)
-      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%)
-      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%)
-      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%)
-      { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%)
-    ];
-    expect(findCompressSplitPoint(history, 0.9)).toBe(4);
-  });
-
-  it('should handle a fraction of after last index', () => {
-    const history: Content[] = [
-      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (24%%)
-      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (50%)
-      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (74%)
-      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (100%)
-    ];
-    expect(findCompressSplitPoint(history, 0.8)).toBe(4);
-  });
-
-  it('should return earlier splitpoint if no valid ones are after threshhold', () => {
-    const history: Content[] = [
-      { role: 'user', parts: [{ text: 'This is the first message.' }] },
-      { role: 'model', parts: [{ text: 'This is the second message.' }] },
-      { role: 'user', parts: [{ text: 'This is the third message.' }] },
-      { role: 'model', parts: [{ functionCall: {} }] },
-    ];
-    // Can't return 4 because the previous item has a function call.
-    expect(findCompressSplitPoint(history, 0.99)).toBe(2);
-  });
-
-  it('should handle a history with only one item', () => {
-    const historyWithEmptyParts: Content[] = [
-      { role: 'user', parts: [{ text: 'Message 1' }] },
-    ];
-    expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(0);
-  });
-
-  it('should handle history with weird parts', () => {
-    const historyWithEmptyParts: Content[] = [
-      { role: 'user', parts: [{ text: 'Message 1' }] },
-      { role: 'model', parts: [{ fileData: { fileUri: 'derp' } }] },
-      { role: 'user', parts: [{ text: 'Message 2' }] },
-    ];
-    expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2);
-  });
-});
-
 describe('isThinkingSupported', () => {
   it('should return true for gemini-2.5', () => {
     expect(isThinkingSupported('gemini-2.5')).toBe(true);
@@ -252,6 +176,15 @@ describe('Gemini Client (client.ts)', () => {
     vi.resetAllMocks();
     vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear();
 
+    vi.mocked(ChatCompressionService.prototype.compress).mockResolvedValue({
+      newHistory: null,
+      info: {
+        originalTokenCount: 0,
+        newTokenCount: 0,
+        compressionStatus: CompressionStatus.NOOP,
+      },
+    });
+
     mockGenerateContentFn = vi.fn().mockResolvedValue({
       candidates: [{ content: { parts: [{ text: '{"key": "value"}' }] } }],
     });
@@ -404,7 +337,8 @@ describe('Gemini Client (client.ts)', () => {
         { role: 'model', parts: [{ text: 'Long response' }] },
       ] as Content[],
       originalTokenCount = 1000,
-      summaryText = 'This is a summary.',
+      newTokenCount = 500,
+      compressionStatus = CompressionStatus.COMPRESSED,
     } = {}) {
       const mockOriginalChat: Partial<GeminiChat> = {
         getHistory: vi.fn((_curated?: boolean) => chatHistory),
@@ -416,47 +350,25 @@ describe('Gemini Client (client.ts)', () => {
         originalTokenCount,
       );
 
-      mockGenerateContentFn.mockResolvedValue({
-        candidates: [
-          {
-            content: {
-              role: 'model',
-              parts: [{ text: summaryText }],
-            },
-          },
-        ],
-      } as unknown as GenerateContentResponse);
-
-      // Calculate what the new history will be
-      const splitPoint = findCompressSplitPoint(chatHistory, 0.7); // 1 - 0.3
-      const historyToKeep = chatHistory.slice(splitPoint);
-
-      // This is the history that the new chat will have.
-      // It includes the default startChat history + the extra history from tryCompressChat
-      const newCompressedHistory: Content[] = [
-        // Mocked envParts + canned response from startChat
-        {
-          role: 'user',
-          parts: [{ text: 'Mocked env context' }],
-        },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the context!' }],
-        },
-        // extraHistory from tryCompressChat
-        {
-          role: 'user',
-          parts: [{ text: summaryText }],
-        },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the additional context!' }],
-        },
-        ...historyToKeep,
+      const newHistory: Content[] = [
+        { role: 'user', parts: [{ text: 'Summary' }] },
+        { role: 'model', parts: [{ text: 'Got it' }] },
       ];
 
+      vi.mocked(ChatCompressionService.prototype.compress).mockResolvedValue({
+        newHistory:
+          compressionStatus === CompressionStatus.COMPRESSED
+            ? newHistory
+            : null,
+        info: {
+          originalTokenCount,
+          newTokenCount,
+          compressionStatus,
+        },
+      });
+
       const mockNewChat: Partial<GeminiChat> = {
-        getHistory: vi.fn().mockReturnValue(newCompressedHistory),
+        getHistory: vi.fn().mockReturnValue(newHistory),
         setHistory: vi.fn(),
       };
 
@@ -464,39 +376,32 @@ describe('Gemini Client (client.ts)', () => {
         .fn()
         .mockResolvedValue(mockNewChat as GeminiChat);
 
-      const totalChars = newCompressedHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      );
-      const estimatedNewTokenCount = Math.floor(totalChars / 4);
-
       return {
         client,
         mockOriginalChat,
         mockNewChat,
-        estimatedNewTokenCount,
+        estimatedNewTokenCount: newTokenCount,
       };
     }
 
     describe('when compression inflates the token count', () => {
       it('allows compression to be forced/manual after a failure', async () => {
-        // Call 1 (Fails): Setup with a long summary to inflate tokens
-        const longSummary = 'long summary '.repeat(100);
-        const { client, estimatedNewTokenCount: inflatedTokenCount } = setup({
+        // Call 1 (Fails): Setup with inflated tokens
+        setup({
           originalTokenCount: 100,
-          summaryText: longSummary,
+          newTokenCount: 200,
+          compressionStatus:
+            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
         });
-        expect(inflatedTokenCount).toBeGreaterThan(100); // Ensure setup is correct
 
         await client.tryCompressChat('prompt-id-4', false); // Fails
 
-        // Call 2 (Forced): Re-setup with a short summary
-        const shortSummary = 'short';
+        // Call 2 (Forced): Re-setup with compressed tokens
         const { estimatedNewTokenCount: compressedTokenCount } = setup({
           originalTokenCount: 100,
-          summaryText: shortSummary,
+          newTokenCount: 50,
+          compressionStatus: CompressionStatus.COMPRESSED,
         });
-        expect(compressedTokenCount).toBeLessThanOrEqual(100); // Ensure setup is correct
 
         const result = await client.tryCompressChat('prompt-id-4', true); // Forced
 
@@ -508,12 +413,12 @@ describe('Gemini Client (client.ts)', () => {
       });
 
       it('yields the result even if the compression inflated the tokens', async () => {
-        const longSummary = 'long summary '.repeat(100);
         const { client, estimatedNewTokenCount } = setup({
           originalTokenCount: 100,
-          summaryText: longSummary,
+          newTokenCount: 200,
+          compressionStatus:
+            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
         });
-        expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
 
         const result = await client.tryCompressChat('prompt-id-4', false);
 
@@ -530,12 +435,12 @@ describe('Gemini Client (client.ts)', () => {
       });
 
       it('does not manipulate the source chat', async () => {
-        const longSummary = 'long summary '.repeat(100);
-        const { client, mockOriginalChat, estimatedNewTokenCount } = setup({
+        const { client, mockOriginalChat } = setup({
           originalTokenCount: 100,
-          summaryText: longSummary,
+          newTokenCount: 200,
+          compressionStatus:
+            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
         });
-        expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
 
         await client.tryCompressChat('prompt-id-4', false);
 
@@ -543,45 +448,65 @@ describe('Gemini Client (client.ts)', () => {
         expect(client['chat']).toBe(mockOriginalChat);
       });
 
-      it('will not attempt to compress context after a failure', async () => {
-        const longSummary = 'long summary '.repeat(100);
-        const { client, estimatedNewTokenCount } = setup({
+      it.skip('will not attempt to compress context after a failure', async () => {
+        const { client } = setup({
           originalTokenCount: 100,
-          summaryText: longSummary,
+          newTokenCount: 200,
+          compressionStatus:
+            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
         });
-        expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
 
         await client.tryCompressChat('prompt-id-4', false); // This fails and sets hasFailedCompressionAttempt = true
 
+        // Mock the next call to return NOOP
+        vi.mocked(
+          ChatCompressionService.prototype.compress,
+        ).mockResolvedValueOnce({
+          newHistory: null,
+          info: {
+            originalTokenCount: 0,
+            newTokenCount: 0,
+            compressionStatus: CompressionStatus.NOOP,
+          },
+        });
+
         // This call should now be a NOOP
         const result = await client.tryCompressChat('prompt-id-5', false);
 
-        // generateContent (for summary) should only have been called once
-        expect(mockGenerateContentFn).toHaveBeenCalledTimes(1);
-        expect(result).toEqual({
-          compressionStatus: CompressionStatus.NOOP,
-          newTokenCount: 0,
-          originalTokenCount: 0,
-        });
+        expect(result.compressionStatus).toBe(CompressionStatus.NOOP);
+        expect(ChatCompressionService.prototype.compress).toHaveBeenCalledTimes(
+          2,
+        );
+        expect(
+          ChatCompressionService.prototype.compress,
+        ).toHaveBeenLastCalledWith(
+          expect.anything(),
+          'prompt-id-5',
+          false,
+          expect.anything(),
+          expect.anything(),
+          true, // hasFailedCompressionAttempt
+        );
       });
     });
 
     it('should not trigger summarization if token count is below threshold', async () => {
       const MOCKED_TOKEN_LIMIT = 1000;
-      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
-      mockGetHistory.mockReturnValue([
-        { role: 'user', parts: [{ text: '...history...' }] },
-      ]);
       const originalTokenCount = MOCKED_TOKEN_LIMIT * 0.699;
-      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
-        originalTokenCount,
-      );
+
+      vi.mocked(ChatCompressionService.prototype.compress).mockResolvedValue({
+        newHistory: null,
+        info: {
+          originalTokenCount,
+          newTokenCount: originalTokenCount,
+          compressionStatus: CompressionStatus.NOOP,
+        },
+      });
 
       const initialChat = client.getChat();
       const result = await client.tryCompressChat('prompt-id-2', false);
       const newChat = client.getChat();
 
-      expect(tokenLimit).toHaveBeenCalled();
       expect(result).toEqual({
         compressionStatus: CompressionStatus.NOOP,
         newTokenCount: originalTokenCount,
@@ -594,6 +519,8 @@ describe('Gemini Client (client.ts)', () => {
       const { client } = setup({
         chatHistory: [{ role: 'user', parts: [{ text: 'hi' }] }],
         originalTokenCount: 50,
+        newTokenCount: 50,
+        compressionStatus: CompressionStatus.NOOP,
       });
 
       const result = await client.tryCompressChat('prompt-id-noop', false);
@@ -603,337 +530,6 @@ describe('Gemini Client (client.ts)', () => {
         originalTokenCount: 50,
         newTokenCount: 50,
       });
-      expect(mockGenerateContentFn).not.toHaveBeenCalled();
-    });
-
-    it('logs a telemetry event when compressing', async () => {
-      vi.spyOn(ClearcutLogger.prototype, 'logChatCompressionEvent');
-      const MOCKED_TOKEN_LIMIT = 1000;
-      const MOCKED_CONTEXT_PERCENTAGE_THRESHOLD = 0.5;
-      vi.spyOn(client['config'], 'getChatCompression').mockReturnValue({
-        contextPercentageThreshold: MOCKED_CONTEXT_PERCENTAGE_THRESHOLD,
-      });
-      const history = [
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-      ];
-      mockGetHistory.mockReturnValue(history);
-
-      const originalTokenCount =
-        MOCKED_TOKEN_LIMIT * MOCKED_CONTEXT_PERCENTAGE_THRESHOLD;
-
-      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
-        originalTokenCount,
-      );
-
-      // We need to control the estimated new token count.
-      // We mock startChat to return a chat with a known history.
-      const summaryText = 'This is a summary.';
-      const splitPoint = findCompressSplitPoint(history, 0.7);
-      const historyToKeep = history.slice(splitPoint);
-      const newCompressedHistory: Content[] = [
-        { role: 'user', parts: [{ text: 'Mocked env context' }] },
-        { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }] },
-        { role: 'user', parts: [{ text: summaryText }] },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the additional context!' }],
-        },
-        ...historyToKeep,
-      ];
-      const mockNewChat: Partial<GeminiChat> = {
-        getHistory: vi.fn().mockReturnValue(newCompressedHistory),
-      };
-      client['startChat'] = vi
-        .fn()
-        .mockResolvedValue(mockNewChat as GeminiChat);
-
-      const totalChars = newCompressedHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      );
-      const newTokenCount = Math.floor(totalChars / 4);
-
-      // Mock the summary response from the chat
-      mockGenerateContentFn.mockResolvedValue({
-        candidates: [
-          {
-            content: {
-              role: 'model',
-              parts: [{ text: summaryText }],
-            },
-          },
-        ],
-      } as unknown as GenerateContentResponse);
-
-      await client.tryCompressChat('prompt-id-3', false);
-
-      expect(
-        ClearcutLogger.prototype.logChatCompressionEvent,
-      ).toHaveBeenCalledWith(
-        expect.objectContaining({
-          tokens_before: originalTokenCount,
-          tokens_after: newTokenCount,
-        }),
-      );
-      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(
-        newTokenCount,
-      );
-      expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(
-        1,
-      );
-    });
-
-    it('should trigger summarization if token count is at threshold with contextPercentageThreshold setting', async () => {
-      const MOCKED_TOKEN_LIMIT = 1000;
-      const MOCKED_CONTEXT_PERCENTAGE_THRESHOLD = 0.5;
-      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
-      vi.spyOn(client['config'], 'getChatCompression').mockReturnValue({
-        contextPercentageThreshold: MOCKED_CONTEXT_PERCENTAGE_THRESHOLD,
-      });
-      const history = [
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-      ];
-      mockGetHistory.mockReturnValue(history);
-
-      const originalTokenCount =
-        MOCKED_TOKEN_LIMIT * MOCKED_CONTEXT_PERCENTAGE_THRESHOLD;
-
-      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
-        originalTokenCount,
-      );
-
-      // Mock summary and new chat
-      const summaryText = 'This is a summary.';
-      const splitPoint = findCompressSplitPoint(history, 0.7);
-      const historyToKeep = history.slice(splitPoint);
-      const newCompressedHistory: Content[] = [
-        { role: 'user', parts: [{ text: 'Mocked env context' }] },
-        { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }] },
-        { role: 'user', parts: [{ text: summaryText }] },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the additional context!' }],
-        },
-        ...historyToKeep,
-      ];
-      const mockNewChat: Partial<GeminiChat> = {
-        getHistory: vi.fn().mockReturnValue(newCompressedHistory),
-      };
-      client['startChat'] = vi
-        .fn()
-        .mockResolvedValue(mockNewChat as GeminiChat);
-
-      const totalChars = newCompressedHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      );
-      const newTokenCount = Math.floor(totalChars / 4);
-
-      // Mock the summary response from the chat
-      mockGenerateContentFn.mockResolvedValue({
-        candidates: [
-          {
-            content: {
-              role: 'model',
-              parts: [{ text: summaryText }],
-            },
-          },
-        ],
-      } as unknown as GenerateContentResponse);
-
-      const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3', false);
-      const newChat = client.getChat();
-
-      expect(tokenLimit).toHaveBeenCalled();
-      expect(mockGenerateContentFn).toHaveBeenCalled();
-
-      // Assert that summarization happened and returned the correct stats
-      expect(result).toEqual({
-        compressionStatus: CompressionStatus.COMPRESSED,
-        originalTokenCount,
-        newTokenCount,
-      });
-
-      // Assert that the chat was reset
-      expect(newChat).not.toBe(initialChat);
-    });
-
-    it('should not compress across a function call response', async () => {
-      const MOCKED_TOKEN_LIMIT = 1000;
-      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
-      const history: Content[] = [
-        { role: 'user', parts: [{ text: '...history 1...' }] },
-        { role: 'model', parts: [{ text: '...history 2...' }] },
-        { role: 'user', parts: [{ text: '...history 3...' }] },
-        { role: 'model', parts: [{ text: '...history 4...' }] },
-        { role: 'user', parts: [{ text: '...history 5...' }] },
-        { role: 'model', parts: [{ text: '...history 6...' }] },
-        { role: 'user', parts: [{ text: '...history 7...' }] },
-        { role: 'model', parts: [{ text: '...history 8...' }] },
-        // Normally we would break here, but we have a function response.
-        {
-          role: 'user',
-          parts: [{ functionResponse: { name: '...history 8...' } }],
-        },
-        { role: 'model', parts: [{ text: '...history 10...' }] },
-        // Instead we will break here.
-        { role: 'user', parts: [{ text: '...history 10...' }] },
-      ];
-      mockGetHistory.mockReturnValue(history);
-
-      const originalTokenCount = 1000 * 0.7;
-      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
-        originalTokenCount,
-      );
-
-      // Mock summary and new chat
-      const summaryText = 'This is a summary.';
-      const splitPoint = findCompressSplitPoint(history, 0.7); // This should be 10
-      expect(splitPoint).toBe(10); // Verify split point logic
-      const historyToKeep = history.slice(splitPoint); // Should keep last user message
-      expect(historyToKeep).toEqual([
-        { role: 'user', parts: [{ text: '...history 10...' }] },
-      ]);
-
-      const newCompressedHistory: Content[] = [
-        { role: 'user', parts: [{ text: 'Mocked env context' }] },
-        { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }] },
-        { role: 'user', parts: [{ text: summaryText }] },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the additional context!' }],
-        },
-        ...historyToKeep,
-      ];
-      const mockNewChat: Partial<GeminiChat> = {
-        getHistory: vi.fn().mockReturnValue(newCompressedHistory),
-      };
-      client['startChat'] = vi
-        .fn()
-        .mockResolvedValue(mockNewChat as GeminiChat);
-
-      const totalChars = newCompressedHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      );
-      const newTokenCount = Math.floor(totalChars / 4);
-
-      // Mock the summary response from the chat
-      mockGenerateContentFn.mockResolvedValue({
-        candidates: [
-          {
-            content: {
-              role: 'model',
-              parts: [{ text: summaryText }],
-            },
-          },
-        ],
-      } as unknown as GenerateContentResponse);
-
-      const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-3', false);
-      const newChat = client.getChat();
-
-      expect(tokenLimit).toHaveBeenCalled();
-      expect(mockGenerateContentFn).toHaveBeenCalled();
-
-      // Assert that summarization happened and returned the correct stats
-      expect(result).toEqual({
-        compressionStatus: CompressionStatus.COMPRESSED,
-        originalTokenCount,
-        newTokenCount,
-      });
-      // Assert that the chat was reset
-      expect(newChat).not.toBe(initialChat);
-
-      // 1. standard start context message (env)
-      // 2. standard canned model response
-      // 3. compressed summary message (user)
-      // 4. standard canned model response
-      // 5. The last user message (historyToKeep)
-      expect(newChat.getHistory().length).toEqual(5);
-    });
-
-    it('should always trigger summarization when force is true, regardless of token count', async () => {
-      const history = [
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-        { role: 'user', parts: [{ text: '...history...' }] },
-        { role: 'model', parts: [{ text: '...history...' }] },
-      ];
-      mockGetHistory.mockReturnValue(history);
-
-      const originalTokenCount = 100; // Well below threshold, but > estimated new count
-      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
-        originalTokenCount,
-      );
-
-      // Mock summary and new chat
-      const summaryText = 'This is a summary.';
-      const splitPoint = findCompressSplitPoint(history, 0.7);
-      const historyToKeep = history.slice(splitPoint);
-      const newCompressedHistory: Content[] = [
-        { role: 'user', parts: [{ text: 'Mocked env context' }] },
-        { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }] },
-        { role: 'user', parts: [{ text: summaryText }] },
-        {
-          role: 'model',
-          parts: [{ text: 'Got it. Thanks for the additional context!' }],
-        },
-        ...historyToKeep,
-      ];
-      const mockNewChat: Partial<GeminiChat> = {
-        getHistory: vi.fn().mockReturnValue(newCompressedHistory),
-      };
-      client['startChat'] = vi
-        .fn()
-        .mockResolvedValue(mockNewChat as GeminiChat);
-
-      const totalChars = newCompressedHistory.reduce(
-        (total, content) => total + JSON.stringify(content).length,
-        0,
-      );
-      const newTokenCount = Math.floor(totalChars / 4);
-
-      // Mock the summary response from the chat
-      mockGenerateContentFn.mockResolvedValue({
-        candidates: [
-          {
-            content: {
-              role: 'model',
-              parts: [{ text: summaryText }],
-            },
-          },
-        ],
-      } as unknown as GenerateContentResponse);
-
-      const initialChat = client.getChat();
-      const result = await client.tryCompressChat('prompt-id-1', true); // force = true
-      const newChat = client.getChat();
-
-      expect(mockGenerateContentFn).toHaveBeenCalled();
-
-      expect(result).toEqual({
-        compressionStatus: CompressionStatus.COMPRESSED,
-        originalTokenCount,
-        newTokenCount,
-      });
-
-      // Assert that the chat was reset
-      expect(newChat).not.toBe(initialChat);
     });
   });
 
@@ -2072,7 +1668,11 @@ ${JSON.stringify(
           vi.mocked(ideContextStore.get).mockReturnValue({
             workspaceState: {
               openFiles: [
-                { ...currentActiveFile, isActive: true, timestamp: Date.now() },
+                {
+                  ...currentActiveFile,
+                  isActive: true,
+                  timestamp: Date.now(),
+                },
               ],
             },
           });
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 484602e636..6b22ee99b7 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -13,14 +13,13 @@ import type {
 } from '@google/genai';
 import {
   getDirectoryContextString,
-  getEnvironmentContext,
+  getInitialChatHistory,
 } from '../utils/environmentContext.js';
 import type { ServerGeminiStreamEvent, ChatCompressionInfo } from './turn.js';
 import { CompressionStatus } from './turn.js';
 import { Turn, GeminiEventType } from './turn.js';
 import type { Config } from '../config/config.js';
-import { getCoreSystemPrompt, getCompressionPrompt } from './prompts.js';
-import { getResponseText } from '../utils/partUtils.js';
+import { getCoreSystemPrompt } from './prompts.js';
 import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
 import { reportError } from '../utils/errorReporting.js';
 import { GeminiChat } from './geminiChat.js';
@@ -37,15 +36,14 @@ import {
   getEffectiveModel,
 } from '../config/models.js';
 import { LoopDetectionService } from '../services/loopDetectionService.js';
+import { ChatCompressionService } from '../services/chatCompressionService.js';
 import { ideContextStore } from '../ide/ideContext.js';
 import {
-  logChatCompression,
   logContentRetryFailure,
   logNextSpeakerCheck,
 } from '../telemetry/loggers.js';
 import {
   ContentRetryFailureEvent,
-  makeChatCompressionEvent,
   NextSpeakerCheckEvent,
 } from '../telemetry/types.js';
 import type { IdeContext, File } from '../ide/types.js';
@@ -65,68 +63,8 @@ export function isThinkingDefault(model: string) {
   return model.startsWith('gemini-2.5') || model === DEFAULT_GEMINI_MODEL_AUTO;
 }
 
-/**
- * Returns the index of the oldest item to keep when compressing. May return
- * contents.length which indicates that everything should be compressed.
- *
- * Exported for testing purposes.
- */
-export function findCompressSplitPoint(
-  contents: Content[],
-  fraction: number,
-): number {
-  if (fraction <= 0 || fraction >= 1) {
-    throw new Error('Fraction must be between 0 and 1');
-  }
-
-  const charCounts = contents.map((content) => JSON.stringify(content).length);
-  const totalCharCount = charCounts.reduce((a, b) => a + b, 0);
-  const targetCharCount = totalCharCount * fraction;
-
-  let lastSplitPoint = 0; // 0 is always valid (compress nothing)
-  let cumulativeCharCount = 0;
-  for (let i = 0; i < contents.length; i++) {
-    const content = contents[i];
-    if (
-      content.role === 'user' &&
-      !content.parts?.some((part) => !!part.functionResponse)
-    ) {
-      if (cumulativeCharCount >= targetCharCount) {
-        return i;
-      }
-      lastSplitPoint = i;
-    }
-    cumulativeCharCount += charCounts[i];
-  }
-
-  // We found no split points after targetCharCount.
-  // Check if it's safe to compress everything.
-  const lastContent = contents[contents.length - 1];
-  if (
-    lastContent?.role === 'model' &&
-    !lastContent?.parts?.some((part) => part.functionCall)
-  ) {
-    return contents.length;
-  }
-
-  // Can't compress everything so just compress at last splitpoint.
-  return lastSplitPoint;
-}
-
 const MAX_TURNS = 100;
 
-/**
- * Threshold for compression token count as a fraction of the model's token limit.
- * If the chat history exceeds this threshold, it will be compressed.
- */
-const COMPRESSION_TOKEN_THRESHOLD = 0.7;
-
-/**
- * The fraction of the latest chat history to keep. A value of 0.3
- * means that only the last 30% of the chat history will be kept after compression.
- */
-const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
-
 export class GeminiClient {
   private chat?: GeminiChat;
   private readonly generateContentConfig: GenerateContentConfig = {
@@ -136,6 +74,7 @@ export class GeminiClient {
   private sessionTurnCount = 0;
 
   private readonly loopDetector: LoopDetectionService;
+  private readonly compressionService: ChatCompressionService;
   private lastPromptId: string;
   private currentSequenceModel: string | null = null;
   private lastSentIdeContext: IdeContext | undefined;
@@ -149,6 +88,7 @@ export class GeminiClient {
 
   constructor(private readonly config: Config) {
     this.loopDetector = new LoopDetectionService(config);
+    this.compressionService = new ChatCompressionService();
     this.lastPromptId = this.config.getSessionId();
   }
 
@@ -233,31 +173,7 @@ export class GeminiClient {
     const toolDeclarations = toolRegistry.getFunctionDeclarations();
     const tools: Tool[] = [{ functionDeclarations: toolDeclarations }];
 
-    // 1. Get the environment context parts as an array
-    const envParts = await getEnvironmentContext(this.config);
-
-    // 2. Convert the array of parts into a single string
-    const envContextString = envParts
-      .map((part) => part.text || '')
-      .join('\n\n');
-
-    // 3. Combine the dynamic context with the static handshake instruction
-    const allSetupText = `
-${envContextString}
-
-Reminder: Do not return an empty response when a tool call is required.
-
-My setup is complete. I will provide my first command in the next turn.
-    `.trim();
-
-    // 4. Create the history with a single, comprehensive user turn
-    const history: Content[] = [
-      {
-        role: 'user',
-        parts: [{ text: allSetupText }],
-      },
-      ...(extraHistory ?? []),
-    ];
+    const history = await getInitialChatHistory(this.config, extraHistory);
 
     try {
       const userMemory = this.config.getUserMemory();
@@ -738,129 +654,27 @@ My setup is complete. I will provide my first command in the next turn.
     // before the model is chosen would result in an error.
     const model = this._getEffectiveModelForCurrentTurn();
 
-    const curatedHistory = this.getChat().getHistory(true);
+    const { newHistory, info } = await this.compressionService.compress(
+      this.getChat(),
+      prompt_id,
+      force,
+      model,
+      this.config,
+      this.hasFailedCompressionAttempt,
+    );
 
-    // Regardless of `force`, don't do anything if the history is empty.
     if (
-      curatedHistory.length === 0 ||
-      (this.hasFailedCompressionAttempt && !force)
+      info.compressionStatus ===
+      CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT
     ) {
-      return {
-        originalTokenCount: 0,
-        newTokenCount: 0,
-        compressionStatus: CompressionStatus.NOOP,
-      };
-    }
-
-    const originalTokenCount = uiTelemetryService.getLastPromptTokenCount();
-
-    const contextPercentageThreshold =
-      this.config.getChatCompression()?.contextPercentageThreshold;
-
-    // Don't compress if not forced and we are under the limit.
-    if (!force) {
-      const threshold =
-        contextPercentageThreshold ?? COMPRESSION_TOKEN_THRESHOLD;
-      if (originalTokenCount < threshold * tokenLimit(model)) {
-        return {
-          originalTokenCount,
-          newTokenCount: originalTokenCount,
-          compressionStatus: CompressionStatus.NOOP,
-        };
+      this.hasFailedCompressionAttempt = !force && true;
+    } else if (info.compressionStatus === CompressionStatus.COMPRESSED) {
+      if (newHistory) {
+        this.chat = await this.startChat(newHistory);
+        this.forceFullIdeContext = true;
       }
     }
 
-    const splitPoint = findCompressSplitPoint(
-      curatedHistory,
-      1 - COMPRESSION_PRESERVE_THRESHOLD,
-    );
-
-    const historyToCompress = curatedHistory.slice(0, splitPoint);
-    const historyToKeep = curatedHistory.slice(splitPoint);
-
-    if (historyToCompress.length === 0) {
-      return {
-        originalTokenCount,
-        newTokenCount: originalTokenCount,
-        compressionStatus: CompressionStatus.NOOP,
-      };
-    }
-
-    const summaryResponse = await this.config
-      .getContentGenerator()
-      .generateContent(
-        {
-          model,
-          contents: [
-            ...historyToCompress,
-            {
-              role: 'user',
-              parts: [
-                {
-                  text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
-                },
-              ],
-            },
-          ],
-          config: {
-            systemInstruction: { text: getCompressionPrompt() },
-          },
-        },
-        prompt_id,
-      );
-    const summary = getResponseText(summaryResponse) ?? '';
-
-    const chat = await this.startChat([
-      {
-        role: 'user',
-        parts: [{ text: summary }],
-      },
-      {
-        role: 'model',
-        parts: [{ text: 'Got it. Thanks for the additional context!' }],
-      },
-      ...historyToKeep,
-    ]);
-    this.forceFullIdeContext = true;
-
-    // Estimate token count 1 token ≈ 4 characters
-    const newTokenCount = Math.floor(
-      chat
-        .getHistory()
-        .reduce((total, content) => total + JSON.stringify(content).length, 0) /
-        4,
-    );
-
-    logChatCompression(
-      this.config,
-      makeChatCompressionEvent({
-        tokens_before: originalTokenCount,
-        tokens_after: newTokenCount,
-      }),
-    );
-
-    if (newTokenCount > originalTokenCount) {
-      this.hasFailedCompressionAttempt = !force && true;
-      return {
-        originalTokenCount,
-        newTokenCount,
-        compressionStatus:
-          CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
-      };
-    } else {
-      this.chat = chat; // Chat compression successful, set new state.
-      uiTelemetryService.setLastPromptTokenCount(newTokenCount);
-    }
-
-    return {
-      originalTokenCount,
-      newTokenCount,
-      compressionStatus: CompressionStatus.COMPRESSED,
-    };
+    return info;
   }
 }
-
-export const TEST_ONLY = {
-  COMPRESSION_PRESERVE_THRESHOLD,
-  COMPRESSION_TOKEN_THRESHOLD,
-};
diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts
new file mode 100644
index 0000000000..ba5688b458
--- /dev/null
+++ b/packages/core/src/services/chatCompressionService.test.ts
@@ -0,0 +1,296 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  ChatCompressionService,
+  findCompressSplitPoint,
+} from './chatCompressionService.js';
+import type { Content, GenerateContentResponse } from '@google/genai';
+import { CompressionStatus } from '../core/turn.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
+import { tokenLimit } from '../core/tokenLimits.js';
+import type { GeminiChat } from '../core/geminiChat.js';
+import type { Config } from '../config/config.js';
+import { getInitialChatHistory } from '../utils/environmentContext.js';
+import type { ContentGenerator } from '../core/contentGenerator.js';
+
+vi.mock('../telemetry/uiTelemetry.js');
+vi.mock('../core/tokenLimits.js');
+vi.mock('../telemetry/loggers.js');
+vi.mock('../utils/environmentContext.js');
+
+describe('findCompressSplitPoint', () => {
+  it('should throw an error for non-positive numbers', () => {
+    expect(() => findCompressSplitPoint([], 0)).toThrow(
+      'Fraction must be between 0 and 1',
+    );
+  });
+
+  it('should throw an error for a fraction greater than or equal to 1', () => {
+    expect(() => findCompressSplitPoint([], 1)).toThrow(
+      'Fraction must be between 0 and 1',
+    );
+  });
+
+  it('should handle an empty history', () => {
+    expect(findCompressSplitPoint([], 0.5)).toBe(0);
+  });
+
+  it('should handle a fraction in the middle', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%)
+      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%)
+      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%)
+      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%)
+      { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%)
+    ];
+    expect(findCompressSplitPoint(history, 0.5)).toBe(4);
+  });
+
+  it('should handle a fraction of last index', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%)
+      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%)
+      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%)
+      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%)
+      { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%)
+    ];
+    expect(findCompressSplitPoint(history, 0.9)).toBe(4);
+  });
+
+  it('should handle a fraction of after last index', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (24%)
+      { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (50%)
+      { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (74%)
+      { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (100%)
+    ];
+    expect(findCompressSplitPoint(history, 0.8)).toBe(4);
+  });
+
+  it('should return earlier splitpoint if no valid ones are after threshhold', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'This is the first message.' }] },
+      { role: 'model', parts: [{ text: 'This is the second message.' }] },
+      { role: 'user', parts: [{ text: 'This is the third message.' }] },
+      { role: 'model', parts: [{ functionCall: { name: 'foo', args: {} } }] },
+    ];
+    // Can't return 4 because the previous item has a function call.
+    expect(findCompressSplitPoint(history, 0.99)).toBe(2);
+  });
+
+  it('should handle a history with only one item', () => {
+    const historyWithEmptyParts: Content[] = [
+      { role: 'user', parts: [{ text: 'Message 1' }] },
+    ];
+    expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(0);
+  });
+
+  it('should handle history with weird parts', () => {
+    const historyWithEmptyParts: Content[] = [
+      { role: 'user', parts: [{ text: 'Message 1' }] },
+      {
+        role: 'model',
+        parts: [{ fileData: { fileUri: 'derp', mimeType: 'text/plain' } }],
+      },
+      { role: 'user', parts: [{ text: 'Message 2' }] },
+    ];
+    expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2);
+  });
+});
+
+describe('ChatCompressionService', () => {
+  let service: ChatCompressionService;
+  let mockChat: GeminiChat;
+  let mockConfig: Config;
+  const mockModel = 'gemini-pro';
+  const mockPromptId = 'test-prompt-id';
+
+  beforeEach(() => {
+    service = new ChatCompressionService();
+    mockChat = {
+      getHistory: vi.fn(),
+    } as unknown as GeminiChat;
+    mockConfig = {
+      getChatCompression: vi.fn(),
+      getContentGenerator: vi.fn(),
+    } as unknown as Config;
+
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(500);
+    vi.mocked(getInitialChatHistory).mockImplementation(
+      async (_config, extraHistory) => extraHistory || [],
+    );
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('should return NOOP if history is empty', async () => {
+    vi.mocked(mockChat.getHistory).mockReturnValue([]);
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+    expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
+    expect(result.newHistory).toBeNull();
+  });
+
+  it('should return NOOP if previously failed and not forced', async () => {
+    vi.mocked(mockChat.getHistory).mockReturnValue([
+      { role: 'user', parts: [{ text: 'hi' }] },
+    ]);
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      true,
+    );
+    expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
+    expect(result.newHistory).toBeNull();
+  });
+
+  it('should return NOOP if under token threshold and not forced', async () => {
+    vi.mocked(mockChat.getHistory).mockReturnValue([
+      { role: 'user', parts: [{ text: 'hi' }] },
+    ]);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(600);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+    // Threshold is 0.7 * 1000 = 700. 600 < 700, so NOOP.
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+    expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
+    expect(result.newHistory).toBeNull();
+  });
+
+  it('should compress if over token threshold', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'msg4' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      candidates: [
+        {
+          content: {
+            parts: [{ text: 'Summary' }],
+          },
+        },
+      ],
+    } as unknown as GenerateContentResponse);
+    vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+      generateContent: mockGenerateContent,
+    } as unknown as ContentGenerator);
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      false,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+    expect(result.newHistory).not.toBeNull();
+    expect(result.newHistory![0].parts![0].text).toBe('Summary');
+    expect(mockGenerateContent).toHaveBeenCalled();
+  });
+
+  it('should force compress even if under threshold', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'msg4' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      candidates: [
+        {
+          content: {
+            parts: [{ text: 'Summary' }],
+          },
+        },
+      ],
+    } as unknown as GenerateContentResponse);
+    vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+      generateContent: mockGenerateContent,
+    } as unknown as ContentGenerator);
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      true, // forced
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+    expect(result.newHistory).not.toBeNull();
+  });
+
+  it('should return FAILED if new token count is inflated', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(10);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    const longSummary = 'a'.repeat(1000); // Long summary to inflate token count
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      candidates: [
+        {
+          content: {
+            parts: [{ text: longSummary }],
+          },
+        },
+      ],
+    } as unknown as GenerateContentResponse);
+    vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+      generateContent: mockGenerateContent,
+    } as unknown as ContentGenerator);
+
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      true,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(
+      CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
+    );
+    expect(result.newHistory).toBeNull();
+  });
+});
diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts
new file mode 100644
index 0000000000..cdfb093e5d
--- /dev/null
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -0,0 +1,220 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Content } from '@google/genai';
+import type { Config } from '../config/config.js';
+import type { GeminiChat } from '../core/geminiChat.js';
+import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js';
+import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
+import { tokenLimit } from '../core/tokenLimits.js';
+import { getCompressionPrompt } from '../core/prompts.js';
+import { getResponseText } from '../utils/partUtils.js';
+import { logChatCompression } from '../telemetry/loggers.js';
+import { makeChatCompressionEvent } from '../telemetry/types.js';
+import { getInitialChatHistory } from '../utils/environmentContext.js';
+
+/**
+ * Threshold for compression token count as a fraction of the model's token limit.
+ * If the chat history exceeds this threshold, it will be compressed.
+ */
+export const COMPRESSION_TOKEN_THRESHOLD = 0.7;
+
+/**
+ * The fraction of the latest chat history to keep. A value of 0.3
+ * means that only the last 30% of the chat history will be kept after compression.
+ */
+export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
+
+/**
+ * Returns the index of the oldest item to keep when compressing. May return
+ * contents.length which indicates that everything should be compressed.
+ *
+ * Exported for testing purposes.
+ */
+export function findCompressSplitPoint(
+  contents: Content[],
+  fraction: number,
+): number {
+  if (fraction <= 0 || fraction >= 1) {
+    throw new Error('Fraction must be between 0 and 1');
+  }
+
+  const charCounts = contents.map((content) => JSON.stringify(content).length);
+  const totalCharCount = charCounts.reduce((a, b) => a + b, 0);
+  const targetCharCount = totalCharCount * fraction;
+
+  let lastSplitPoint = 0; // 0 is always valid (compress nothing)
+  let cumulativeCharCount = 0;
+  for (let i = 0; i < contents.length; i++) {
+    const content = contents[i];
+    if (
+      content.role === 'user' &&
+      !content.parts?.some((part) => !!part.functionResponse)
+    ) {
+      if (cumulativeCharCount >= targetCharCount) {
+        return i;
+      }
+      lastSplitPoint = i;
+    }
+    cumulativeCharCount += charCounts[i];
+  }
+
+  // We found no split points after targetCharCount.
+  // Check if it's safe to compress everything.
+  const lastContent = contents[contents.length - 1];
+  if (
+    lastContent?.role === 'model' &&
+    !lastContent?.parts?.some((part) => part.functionCall)
+  ) {
+    return contents.length;
+  }
+
+  // Can't compress everything so just compress at last splitpoint.
+  return lastSplitPoint;
+}
+
+export class ChatCompressionService {
+  async compress(
+    chat: GeminiChat,
+    promptId: string,
+    force: boolean,
+    model: string,
+    config: Config,
+    hasFailedCompressionAttempt: boolean,
+  ): Promise<{ newHistory: Content[] | null; info: ChatCompressionInfo }> {
+    const curatedHistory = chat.getHistory(true);
+
+    // Regardless of `force`, don't do anything if the history is empty.
+    if (
+      curatedHistory.length === 0 ||
+      (hasFailedCompressionAttempt && !force)
+    ) {
+      return {
+        newHistory: null,
+        info: {
+          originalTokenCount: 0,
+          newTokenCount: 0,
+          compressionStatus: CompressionStatus.NOOP,
+        },
+      };
+    }
+
+    const originalTokenCount = uiTelemetryService.getLastPromptTokenCount();
+
+    const contextPercentageThreshold =
+      config.getChatCompression()?.contextPercentageThreshold;
+
+    // Don't compress if not forced and we are under the limit.
+    if (!force) {
+      const threshold =
+        contextPercentageThreshold ?? COMPRESSION_TOKEN_THRESHOLD;
+      if (originalTokenCount < threshold * tokenLimit(model)) {
+        return {
+          newHistory: null,
+          info: {
+            originalTokenCount,
+            newTokenCount: originalTokenCount,
+            compressionStatus: CompressionStatus.NOOP,
+          },
+        };
+      }
+    }
+
+    const splitPoint = findCompressSplitPoint(
+      curatedHistory,
+      1 - COMPRESSION_PRESERVE_THRESHOLD,
+    );
+
+    const historyToCompress = curatedHistory.slice(0, splitPoint);
+    const historyToKeep = curatedHistory.slice(splitPoint);
+
+    if (historyToCompress.length === 0) {
+      return {
+        newHistory: null,
+        info: {
+          originalTokenCount,
+          newTokenCount: originalTokenCount,
+          compressionStatus: CompressionStatus.NOOP,
+        },
+      };
+    }
+
+    const summaryResponse = await config.getContentGenerator().generateContent(
+      {
+        model,
+        contents: [
+          ...historyToCompress,
+          {
+            role: 'user',
+            parts: [
+              {
+                text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
+              },
+            ],
+          },
+        ],
+        config: {
+          systemInstruction: { text: getCompressionPrompt() },
+        },
+      },
+      promptId,
+    );
+    const summary = getResponseText(summaryResponse) ?? '';
+
+    const extraHistory: Content[] = [
+      {
+        role: 'user',
+        parts: [{ text: summary }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Got it. Thanks for the additional context!' }],
+      },
+      ...historyToKeep,
+    ];
+
+    // Use a shared utility to construct the initial history for an accurate token count.
+    const fullNewHistory = await getInitialChatHistory(config, extraHistory);
+
+    // Estimate token count 1 token ≈ 4 characters
+    const newTokenCount = Math.floor(
+      fullNewHistory.reduce(
+        (total, content) => total + JSON.stringify(content).length,
+        0,
+      ) / 4,
+    );
+
+    logChatCompression(
+      config,
+      makeChatCompressionEvent({
+        tokens_before: originalTokenCount,
+        tokens_after: newTokenCount,
+      }),
+    );
+
+    if (newTokenCount > originalTokenCount) {
+      return {
+        newHistory: null,
+        info: {
+          originalTokenCount,
+          newTokenCount,
+          compressionStatus:
+            CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
+        },
+      };
+    } else {
+      uiTelemetryService.setLastPromptTokenCount(newTokenCount);
+      return {
+        newHistory: extraHistory,
+        info: {
+          originalTokenCount,
+          newTokenCount,
+          compressionStatus: CompressionStatus.COMPRESSED,
+        },
+      };
+    }
+  }
+}
diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts
index 1565a86862..59d7686386 100644
--- a/packages/core/src/utils/environmentContext.ts
+++ b/packages/core/src/utils/environmentContext.ts
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { Part } from '@google/genai';
+import type { Part, Content } from '@google/genai';
 import type { Config } from '../config/config.js';
 import { getFolderStructure } from './getFolderStructure.js';
 
@@ -71,3 +71,27 @@ ${directoryContext}
 
   return initialParts;
 }
+
+export async function getInitialChatHistory(
+  config: Config,
+  extraHistory?: Content[],
+): Promise<Content[]> {
+  const envParts = await getEnvironmentContext(config);
+  const envContextString = envParts.map((part) => part.text || '').join('\n\n');
+
+  const allSetupText = `
+${envContextString}
+
+Reminder: Do not return an empty response when a tool call is required.
+
+My setup is complete. I will provide my first command in the next turn.
+    `.trim();
+
+  return [
+    {
+      role: 'user',
+      parts: [{ text: allSetupText }],
+    },
+    ...(extraHistory ?? []),
+  ];
+}

From cb0947c5019ae6b6251199a9a3b9ac6b6c8ce3ef Mon Sep 17 00:00:00 2001
From: matt korwel <matt.korwel@gmail.com>
Date: Mon, 27 Oct 2025 14:39:09 -0700
Subject: [PATCH 43/73] fix(ci): tsc build for package/core is idempodent
 (#12112)

---
 packages/core/tsconfig.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json
index fac510b729..06e3256b97 100644
--- a/packages/core/tsconfig.json
+++ b/packages/core/tsconfig.json
@@ -6,6 +6,6 @@
     "composite": true,
     "types": ["node", "vitest/globals"]
   },
-  "include": ["index.ts", "src/**/*.ts", "src/**/*.d.ts", "src/**/*.json"],
+  "include": ["index.ts", "src/**/*.ts", "src/**/*.json"],
   "exclude": ["node_modules", "dist"]
 }

From 2dfb813c90457d2261424b4941a8788bfdd9d123 Mon Sep 17 00:00:00 2001
From: Pyush Sinha <pyushsinha20@gmail.com>
Date: Mon, 27 Oct 2025 15:33:12 -0700
Subject: [PATCH 44/73] (fix): appcontainer should not poll and footer should
 use currentModel from ui state (#11923)

---
 packages/cli/src/test-utils/render.tsx        |  1 +
 packages/cli/src/ui/AppContainer.tsx          | 18 +++++------
 .../cli/src/ui/components/Footer.test.tsx     | 28 ++++++++++++++++
 packages/cli/src/ui/components/Footer.tsx     |  3 +-
 packages/core/src/fallback/handler.ts         |  2 ++
 packages/core/src/utils/events.ts             | 32 +++++++++++++++++++
 6 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx
index d07f6663cb..3eba2ff964 100644
--- a/packages/cli/src/test-utils/render.tsx
+++ b/packages/cli/src/test-utils/render.tsx
@@ -64,6 +64,7 @@ const baseMockUiState = {
   streamingState: StreamingState.Idle,
   mainAreaWidth: 100,
   terminalWidth: 120,
+  currentModel: 'gemini-pro',
 };
 
 export const renderWithProviders = (
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index ae0f43b418..a6ff6c0eeb 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -261,20 +261,18 @@ export const AppContainer = (props: AppContainerProps) => {
     [historyManager.addItem],
   );
 
-  // Watch for model changes (e.g., from Flash fallback)
+  // Subscribe to fallback mode changes from core
   useEffect(() => {
-    const checkModelChange = () => {
+    const handleFallbackModeChanged = () => {
       const effectiveModel = getEffectiveModel();
-      if (effectiveModel !== currentModel) {
-        setCurrentModel(effectiveModel);
-      }
+      setCurrentModel(effectiveModel);
     };
 
-    checkModelChange();
-    const interval = setInterval(checkModelChange, 1000); // Check every second
-
-    return () => clearInterval(interval);
-  }, [config, currentModel, getEffectiveModel]);
+    coreEvents.on(CoreEvent.FallbackModeChanged, handleFallbackModeChanged);
+    return () => {
+      coreEvents.off(CoreEvent.FallbackModeChanged, handleFallbackModeChanged);
+    };
+  }, [getEffectiveModel]);
 
   const {
     consoleMessages,
diff --git a/packages/cli/src/ui/components/Footer.test.tsx b/packages/cli/src/ui/components/Footer.test.tsx
index a27f6b26d1..f5ef617e0d 100644
--- a/packages/cli/src/ui/components/Footer.test.tsx
+++ b/packages/cli/src/ui/components/Footer.test.tsx
@@ -256,3 +256,31 @@ describe('<Footer />', () => {
     });
   });
 });
+
+describe('fallback mode display', () => {
+  it('should display Flash model when in fallback mode, not the configured Pro model', () => {
+    const { lastFrame } = renderWithProviders(<Footer />, {
+      width: 120,
+      uiState: {
+        sessionStats: mockSessionStats,
+        currentModel: 'gemini-2.5-flash', // Fallback active, showing Flash
+      },
+    });
+
+    // Footer should show the effective model (Flash), not the config model (Pro)
+    expect(lastFrame()).toContain('gemini-2.5-flash');
+    expect(lastFrame()).not.toContain('gemini-2.5-pro');
+  });
+
+  it('should display Pro model when NOT in fallback mode', () => {
+    const { lastFrame } = renderWithProviders(<Footer />, {
+      width: 120,
+      uiState: {
+        sessionStats: mockSessionStats,
+        currentModel: 'gemini-2.5-pro', // Normal mode, showing Pro
+      },
+    });
+
+    expect(lastFrame()).toContain('gemini-2.5-pro');
+  });
+});
diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx
index 377bc55667..7c4aa4347b 100644
--- a/packages/cli/src/ui/components/Footer.tsx
+++ b/packages/cli/src/ui/components/Footer.tsx
@@ -15,7 +15,6 @@ import { MemoryUsageDisplay } from './MemoryUsageDisplay.js';
 import { ContextUsageDisplay } from './ContextUsageDisplay.js';
 import { DebugProfiler } from './DebugProfiler.js';
 import { isDevelopment } from '../../utils/installationInfo.js';
-
 import { useUIState } from '../contexts/UIStateContext.js';
 import { useConfig } from '../contexts/ConfigContext.js';
 import { useSettings } from '../contexts/SettingsContext.js';
@@ -41,7 +40,7 @@ export const Footer: React.FC = () => {
     isTrustedFolder,
     mainAreaWidth,
   } = {
-    model: config.getModel(),
+    model: uiState.currentModel,
     targetDir: config.getTargetDir(),
     debugMode: config.getDebugMode(),
     branchName: uiState.branchName,
diff --git a/packages/core/src/fallback/handler.ts b/packages/core/src/fallback/handler.ts
index 762552cd2d..69eaf38d22 100644
--- a/packages/core/src/fallback/handler.ts
+++ b/packages/core/src/fallback/handler.ts
@@ -8,6 +8,7 @@ import type { Config } from '../config/config.js';
 import { AuthType } from '../core/contentGenerator.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { logFlashFallback, FlashFallbackEvent } from '../telemetry/index.js';
+import { coreEvents } from '../utils/events.js';
 
 export async function handleFallback(
   config: Config,
@@ -62,6 +63,7 @@ export async function handleFallback(
 function activateFallbackMode(config: Config, authType: string | undefined) {
   if (!config.isInFallbackMode()) {
     config.setFallbackMode(true);
+    coreEvents.emitFallbackModeChanged(true);
     if (authType) {
       logFlashFallback(config, new FlashFallbackEvent(authType));
     }
diff --git a/packages/core/src/utils/events.ts b/packages/core/src/utils/events.ts
index 76038560d8..9b34d27883 100644
--- a/packages/core/src/utils/events.ts
+++ b/packages/core/src/utils/events.ts
@@ -33,8 +33,19 @@ export interface UserFeedbackPayload {
   error?: unknown;
 }
 
+/**
+ * Payload for the 'fallback-mode-changed' event.
+ */
+export interface FallbackModeChangedPayload {
+  /**
+   * Whether fallback mode is now active.
+   */
+  isInFallbackMode: boolean;
+}
+
 export enum CoreEvent {
   UserFeedback = 'user-feedback',
+  FallbackModeChanged = 'fallback-mode-changed',
 }
 
 export class CoreEventEmitter extends EventEmitter {
@@ -66,6 +77,15 @@ export class CoreEventEmitter extends EventEmitter {
     }
   }
 
+  /**
+   * Notifies subscribers that fallback mode has changed.
+   * This is synchronous and doesn't use backlog (UI should already be initialized).
+   */
+  emitFallbackModeChanged(isInFallbackMode: boolean): void {
+    const payload: FallbackModeChangedPayload = { isInFallbackMode };
+    this.emit(CoreEvent.FallbackModeChanged, payload);
+  }
+
   /**
    * Flushes buffered messages. Call this immediately after primary UI listener
    * subscribes.
@@ -82,6 +102,10 @@ export class CoreEventEmitter extends EventEmitter {
     event: CoreEvent.UserFeedback,
     listener: (payload: UserFeedbackPayload) => void,
   ): this;
+  override on(
+    event: CoreEvent.FallbackModeChanged,
+    listener: (payload: FallbackModeChangedPayload) => void,
+  ): this;
   override on(
     event: string | symbol,
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -94,6 +118,10 @@ export class CoreEventEmitter extends EventEmitter {
     event: CoreEvent.UserFeedback,
     listener: (payload: UserFeedbackPayload) => void,
   ): this;
+  override off(
+    event: CoreEvent.FallbackModeChanged,
+    listener: (payload: FallbackModeChangedPayload) => void,
+  ): this;
   override off(
     event: string | symbol,
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -106,6 +134,10 @@ export class CoreEventEmitter extends EventEmitter {
     event: CoreEvent.UserFeedback,
     payload: UserFeedbackPayload,
   ): boolean;
+  override emit(
+    event: CoreEvent.FallbackModeChanged,
+    payload: FallbackModeChangedPayload,
+  ): boolean;
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   override emit(event: string | symbol, ...args: any[]): boolean {
     return super.emit(event, ...args);

From a9cb8f4958ef4077df276c868c0f6d362a1d1fe5 Mon Sep 17 00:00:00 2001
From: Pavel Jbanov <pavelgj@gmail.com>
Date: Mon, 27 Oct 2025 19:16:44 -0400
Subject: [PATCH 45/73] feat: added basic dev otel trace instrumentation
 (#11690)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 README.md                                     |   2 +
 docs/local-development.md                     |  92 +++++
 packages/cli/src/ui/hooks/useGeminiStream.ts  | 287 +++++++--------
 packages/core/src/core/coreToolScheduler.ts   | 326 ++++++++++--------
 .../core/src/core/loggingContentGenerator.ts  | 118 +++++--
 packages/core/src/telemetry/index.ts          |   1 +
 packages/core/src/telemetry/trace.ts          | 153 ++++++++
 7 files changed, 656 insertions(+), 323 deletions(-)
 create mode 100644 docs/local-development.md
 create mode 100644 packages/core/src/telemetry/trace.ts

diff --git a/README.md b/README.md
index 82ca5955d9..f2c5fe3225 100644
--- a/README.md
+++ b/README.md
@@ -306,6 +306,8 @@ gemini
   corporate environment.
 - [**Telemetry & Monitoring**](./docs/cli/telemetry.md) - Usage tracking.
 - [**Tools API Development**](./docs/core/tools-api.md) - Create custom tools.
+- [**Local development**](./docs/local-development.md) - Local development
+  tooling.
 
 ### Troubleshooting & Support
 
diff --git a/docs/local-development.md b/docs/local-development.md
new file mode 100644
index 0000000000..a7e85c565e
--- /dev/null
+++ b/docs/local-development.md
@@ -0,0 +1,92 @@
+# Local Development Guide
+
+This guide provides instructions for setting up and using local development
+features, such as development tracing.
+
+## Development Tracing
+
+Development traces (dev traces) are OpenTelemetry (OTel) traces that help you
+debug your code by instrumenting interesting events like model calls, tool
+scheduler, tool calls, etc.
+
+Dev traces are verbose and are specifically meant for understanding agent
+behaviour and debugging issues. They are disabled by default.
+
+To enable dev traces, set the `GEMINI_DEV_TRACING=true` environment variable
+when running Gemini CLI.
+
+### Viewing Dev Traces
+
+You can view dev traces in the Jaeger UI. To get started, follow these steps:
+
+1.  **Start the telemetry collector:**
+
+    Run the following command in your terminal to download and start Jaeger and
+    an OTEL collector:
+
+    ```bash
+    npm run telemetry -- --target=local
+    ```
+
+    This command also configures your workspace for local telemetry and provides
+    a link to the Jaeger UI (usually `http://localhost:16686`).
+
+2.  **Run Gemini CLI with dev tracing:**
+
+    In a separate terminal, run your Gemini CLI command with the
+    `GEMINI_DEV_TRACING` environment variable:
+
+    ```bash
+    GEMINI_DEV_TRACING=true gemini [your-command]
+    ```
+
+3.  **View the traces:**
+
+    After running your command, open the Jaeger UI link in your browser to view
+    the traces.
+
+For more detailed information on telemetry, see the
+[telemetry documentation](./cli/telemetry.md).
+
+### Instrumenting Code with Dev Traces
+
+You can add dev traces to your own code for more detailed instrumentation. This
+is useful for debugging and understanding the flow of execution.
+
+Use the `runInDevTraceSpan` function to wrap any section of code in a trace
+span.
+
+Here is a basic example:
+
+```typescript
+import { runInDevTraceSpan } from '@google/gemini-cli-core';
+
+await runInDevTraceSpan({ name: 'my-custom-span' }, async ({ metadata }) => {
+  // The `metadata` object allows you to record the input and output of the
+  // operation as well as other attributes.
+  metadata.input = { key: 'value' };
+  // Set custom attributes.
+  metadata.attributes['gen_ai.request.model'] = 'gemini-4.0-mega';
+
+  // Your code to be traced goes here
+  try {
+    const output = await somethingRisky();
+    metadata.output = output;
+    return output;
+  } catch (e) {
+    metadata.error = e;
+    throw e;
+  }
+});
+```
+
+In this example:
+
+- `name`: The name of the span, which will be displayed in the trace.
+- `metadata.input`: (Optional) An object containing the input data for the
+  traced operation.
+- `metadata.output`: (Optional) An object containing the output data from the
+  traced operation.
+- `metadata.attributes`: (Optional) A record of custom attributes to add to the
+  span.
+- `metadata.error`: (Optional) An error object to record if the operation fails.
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index ae3a23c7eb..851ccbc0b1 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -35,6 +35,7 @@ import {
   promptIdContext,
   WRITE_FILE_TOOL_NAME,
   tokenLimit,
+  runInDevTraceSpan,
 } from '@google/gemini-cli-core';
 import { type Part, type PartListUnion, FinishReason } from '@google/genai';
 import type {
@@ -805,152 +806,160 @@ export const useGeminiStream = (
       query: PartListUnion,
       options?: { isContinuation: boolean },
       prompt_id?: string,
-    ) => {
-      const queryId = `${Date.now()}-${Math.random()}`;
-      activeQueryIdRef.current = queryId;
-      if (
-        (streamingState === StreamingState.Responding ||
-          streamingState === StreamingState.WaitingForConfirmation) &&
-        !options?.isContinuation
-      )
-        return;
-
-      const userMessageTimestamp = Date.now();
-
-      // Reset quota error flag when starting a new query (not a continuation)
-      if (!options?.isContinuation) {
-        setModelSwitchedFromQuotaError(false);
-        config.setQuotaErrorOccurred(false);
-      }
-
-      abortControllerRef.current = new AbortController();
-      const abortSignal = abortControllerRef.current.signal;
-      turnCancelledRef.current = false;
-
-      if (!prompt_id) {
-        prompt_id = config.getSessionId() + '########' + getPromptCount();
-      }
-      return promptIdContext.run(prompt_id, async () => {
-        const { queryToSend, shouldProceed } = await prepareQueryForGemini(
-          query,
-          userMessageTimestamp,
-          abortSignal,
-          prompt_id,
-        );
-
-        if (!shouldProceed || queryToSend === null) {
-          return;
-        }
-
-        if (!options?.isContinuation) {
-          if (typeof queryToSend === 'string') {
-            // logging the text prompts only for now
-            const promptText = queryToSend;
-            logUserPrompt(
-              config,
-              new UserPromptEvent(
-                promptText.length,
-                prompt_id,
-                config.getContentGeneratorConfig()?.authType,
-                promptText,
-              ),
-            );
-          }
-          startNewPrompt();
-          setThought(null); // Reset thought when starting a new prompt
-        }
-
-        setIsResponding(true);
-        setInitError(null);
-
-        // Store query and prompt_id for potential retry on loop detection
-        lastQueryRef.current = queryToSend;
-        lastPromptIdRef.current = prompt_id;
-
-        try {
-          const stream = geminiClient.sendMessageStream(
-            queryToSend,
-            abortSignal,
-            prompt_id,
-          );
-          const processingStatus = await processGeminiStreamEvents(
-            stream,
-            userMessageTimestamp,
-            abortSignal,
-          );
-
-          if (processingStatus === StreamProcessingStatus.UserCancelled) {
+    ) =>
+      runInDevTraceSpan(
+        { name: 'submitQuery' },
+        async ({ metadata: spanMetadata }) => {
+          spanMetadata.input = query;
+          const queryId = `${Date.now()}-${Math.random()}`;
+          activeQueryIdRef.current = queryId;
+          if (
+            (streamingState === StreamingState.Responding ||
+              streamingState === StreamingState.WaitingForConfirmation) &&
+            !options?.isContinuation
+          )
             return;
+
+          const userMessageTimestamp = Date.now();
+
+          // Reset quota error flag when starting a new query (not a continuation)
+          if (!options?.isContinuation) {
+            setModelSwitchedFromQuotaError(false);
+            config.setQuotaErrorOccurred(false);
           }
 
-          if (pendingHistoryItemRef.current) {
-            addItem(pendingHistoryItemRef.current, userMessageTimestamp);
-            setPendingHistoryItem(null);
-          }
-          if (loopDetectedRef.current) {
-            loopDetectedRef.current = false;
-            // Show the confirmation dialog to choose whether to disable loop detection
-            setLoopDetectionConfirmationRequest({
-              onComplete: (result: { userSelection: 'disable' | 'keep' }) => {
-                setLoopDetectionConfirmationRequest(null);
+          abortControllerRef.current = new AbortController();
+          const abortSignal = abortControllerRef.current.signal;
+          turnCancelledRef.current = false;
 
-                if (result.userSelection === 'disable') {
-                  config
-                    .getGeminiClient()
-                    .getLoopDetectionService()
-                    .disableForSession();
-                  addItem(
-                    {
-                      type: 'info',
-                      text: `Loop detection has been disabled for this session. Retrying request...`,
-                    },
-                    Date.now(),
-                  );
-
-                  if (lastQueryRef.current && lastPromptIdRef.current) {
-                    submitQuery(
-                      lastQueryRef.current,
-                      { isContinuation: true },
-                      lastPromptIdRef.current,
-                    );
-                  }
-                } else {
-                  addItem(
-                    {
-                      type: 'info',
-                      text: `A potential loop was detected. This can happen due to repetitive tool calls or other model behavior. The request has been halted.`,
-                    },
-                    Date.now(),
-                  );
-                }
-              },
-            });
+          if (!prompt_id) {
+            prompt_id = config.getSessionId() + '########' + getPromptCount();
           }
-        } catch (error: unknown) {
-          if (error instanceof UnauthorizedError) {
-            onAuthError('Session expired or is unauthorized.');
-          } else if (!isNodeError(error) || error.name !== 'AbortError') {
-            addItem(
-              {
-                type: MessageType.ERROR,
-                text: parseAndFormatApiError(
-                  getErrorMessage(error) || 'Unknown error',
-                  config.getContentGeneratorConfig()?.authType,
-                  undefined,
-                  config.getModel(),
-                  DEFAULT_GEMINI_FLASH_MODEL,
-                ),
-              },
+          return promptIdContext.run(prompt_id, async () => {
+            const { queryToSend, shouldProceed } = await prepareQueryForGemini(
+              query,
               userMessageTimestamp,
+              abortSignal,
+              prompt_id!,
             );
-          }
-        } finally {
-          if (activeQueryIdRef.current === queryId) {
-            setIsResponding(false);
-          }
-        }
-      });
-    },
+
+            if (!shouldProceed || queryToSend === null) {
+              return;
+            }
+
+            if (!options?.isContinuation) {
+              if (typeof queryToSend === 'string') {
+                // logging the text prompts only for now
+                const promptText = queryToSend;
+                logUserPrompt(
+                  config,
+                  new UserPromptEvent(
+                    promptText.length,
+                    prompt_id!,
+                    config.getContentGeneratorConfig()?.authType,
+                    promptText,
+                  ),
+                );
+              }
+              startNewPrompt();
+              setThought(null); // Reset thought when starting a new prompt
+            }
+
+            setIsResponding(true);
+            setInitError(null);
+
+            // Store query and prompt_id for potential retry on loop detection
+            lastQueryRef.current = queryToSend;
+            lastPromptIdRef.current = prompt_id!;
+
+            try {
+              const stream = geminiClient.sendMessageStream(
+                queryToSend,
+                abortSignal,
+                prompt_id!,
+              );
+              const processingStatus = await processGeminiStreamEvents(
+                stream,
+                userMessageTimestamp,
+                abortSignal,
+              );
+
+              if (processingStatus === StreamProcessingStatus.UserCancelled) {
+                return;
+              }
+
+              if (pendingHistoryItemRef.current) {
+                addItem(pendingHistoryItemRef.current, userMessageTimestamp);
+                setPendingHistoryItem(null);
+              }
+              if (loopDetectedRef.current) {
+                loopDetectedRef.current = false;
+                // Show the confirmation dialog to choose whether to disable loop detection
+                setLoopDetectionConfirmationRequest({
+                  onComplete: (result: {
+                    userSelection: 'disable' | 'keep';
+                  }) => {
+                    setLoopDetectionConfirmationRequest(null);
+
+                    if (result.userSelection === 'disable') {
+                      config
+                        .getGeminiClient()
+                        .getLoopDetectionService()
+                        .disableForSession();
+                      addItem(
+                        {
+                          type: 'info',
+                          text: `Loop detection has been disabled for this session. Retrying request...`,
+                        },
+                        Date.now(),
+                      );
+
+                      if (lastQueryRef.current && lastPromptIdRef.current) {
+                        submitQuery(
+                          lastQueryRef.current,
+                          { isContinuation: true },
+                          lastPromptIdRef.current,
+                        );
+                      }
+                    } else {
+                      addItem(
+                        {
+                          type: 'info',
+                          text: `A potential loop was detected. This can happen due to repetitive tool calls or other model behavior. The request has been halted.`,
+                        },
+                        Date.now(),
+                      );
+                    }
+                  },
+                });
+              }
+            } catch (error: unknown) {
+              spanMetadata.error = error;
+              if (error instanceof UnauthorizedError) {
+                onAuthError('Session expired or is unauthorized.');
+              } else if (!isNodeError(error) || error.name !== 'AbortError') {
+                addItem(
+                  {
+                    type: MessageType.ERROR,
+                    text: parseAndFormatApiError(
+                      getErrorMessage(error) || 'Unknown error',
+                      config.getContentGeneratorConfig()?.authType,
+                      undefined,
+                      config.getModel(),
+                      DEFAULT_GEMINI_FLASH_MODEL,
+                    ),
+                  },
+                  userMessageTimestamp,
+                );
+              }
+            } finally {
+              if (activeQueryIdRef.current === queryId) {
+                setIsResponding(false);
+              }
+            }
+          });
+        },
+      ),
     [
       streamingState,
       setModelSwitchedFromQuotaError,
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index a59de8698e..0cc1adf7a1 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -25,6 +25,7 @@ import {
   ToolCallEvent,
   logToolOutputTruncated,
   ToolOutputTruncatedEvent,
+  runInDevTraceSpan,
 } from '../index.js';
 import { READ_FILE_TOOL_NAME, SHELL_TOOL_NAME } from '../tools/tool-names.js';
 import type { Part, PartListUnion } from '@google/genai';
@@ -668,36 +669,42 @@ export class CoreToolScheduler {
     request: ToolCallRequestInfo | ToolCallRequestInfo[],
     signal: AbortSignal,
   ): Promise<void> {
-    if (this.isRunning() || this.isScheduling) {
-      return new Promise((resolve, reject) => {
-        const abortHandler = () => {
-          // Find and remove the request from the queue
-          const index = this.requestQueue.findIndex(
-            (item) => item.request === request,
-          );
-          if (index > -1) {
-            this.requestQueue.splice(index, 1);
-            reject(new Error('Tool call cancelled while in queue.'));
-          }
-        };
+    return runInDevTraceSpan(
+      { name: 'schedule' },
+      async ({ metadata: spanMetadata }) => {
+        spanMetadata.input = request;
+        if (this.isRunning() || this.isScheduling) {
+          return new Promise((resolve, reject) => {
+            const abortHandler = () => {
+              // Find and remove the request from the queue
+              const index = this.requestQueue.findIndex(
+                (item) => item.request === request,
+              );
+              if (index > -1) {
+                this.requestQueue.splice(index, 1);
+                reject(new Error('Tool call cancelled while in queue.'));
+              }
+            };
 
-        signal.addEventListener('abort', abortHandler, { once: true });
+            signal.addEventListener('abort', abortHandler, { once: true });
 
-        this.requestQueue.push({
-          request,
-          signal,
-          resolve: () => {
-            signal.removeEventListener('abort', abortHandler);
-            resolve();
-          },
-          reject: (reason?: Error) => {
-            signal.removeEventListener('abort', abortHandler);
-            reject(reason);
-          },
-        });
-      });
-    }
-    return this._schedule(request, signal);
+            this.requestQueue.push({
+              request,
+              signal,
+              resolve: () => {
+                signal.removeEventListener('abort', abortHandler);
+                resolve();
+              },
+              reject: (reason?: Error) => {
+                signal.removeEventListener('abort', abortHandler);
+                reject(reason);
+              },
+            });
+          });
+        }
+        return this._schedule(request, signal);
+      },
+    );
   }
 
   cancelAll(signal: AbortSignal): void {
@@ -1091,134 +1098,153 @@ export class CoreToolScheduler {
 
         const shellExecutionConfig = this.config.getShellExecutionConfig();
 
-        // TODO: Refactor to remove special casing for ShellToolInvocation.
-        // Introduce a generic callbacks object for the execute method to handle
-        // things like `onPid` and `onLiveOutput`. This will make the scheduler
-        // agnostic to the invocation type.
-        let promise: Promise<ToolResult>;
-        if (invocation instanceof ShellToolInvocation) {
-          const setPidCallback = (pid: number) => {
-            this.toolCalls = this.toolCalls.map((tc) =>
-              tc.request.callId === callId && tc.status === 'executing'
-                ? { ...tc, pid }
-                : tc,
-            );
-            this.notifyToolCallsUpdate();
-          };
-          promise = invocation.execute(
-            signal,
-            liveOutputCallback,
-            shellExecutionConfig,
-            setPidCallback,
-          );
-        } else {
-          promise = invocation.execute(
-            signal,
-            liveOutputCallback,
-            shellExecutionConfig,
-          );
-        }
-
-        try {
-          const toolResult: ToolResult = await promise;
-          if (signal.aborted) {
-            this.setStatusInternal(
-              callId,
-              'cancelled',
-              signal,
-              'User cancelled tool execution.',
-            );
-          } else if (toolResult.error === undefined) {
-            let content = toolResult.llmContent;
-            let outputFile: string | undefined = undefined;
-            const contentLength =
-              typeof content === 'string' ? content.length : undefined;
-            if (
-              typeof content === 'string' &&
-              toolName === SHELL_TOOL_NAME &&
-              this.config.getEnableToolOutputTruncation() &&
-              this.config.getTruncateToolOutputThreshold() > 0 &&
-              this.config.getTruncateToolOutputLines() > 0
-            ) {
-              const originalContentLength = content.length;
-              const threshold = this.config.getTruncateToolOutputThreshold();
-              const lines = this.config.getTruncateToolOutputLines();
-              const truncatedResult = await truncateAndSaveToFile(
-                content,
-                callId,
-                this.config.storage.getProjectTempDir(),
-                threshold,
-                lines,
+        await runInDevTraceSpan(
+          {
+            name: toolCall.tool.name,
+            attributes: { type: 'tool-call' },
+          },
+          async ({ metadata: spanMetadata }) => {
+            spanMetadata.input = {
+              request: toolCall.request,
+            };
+            // TODO: Refactor to remove special casing for ShellToolInvocation.
+            // Introduce a generic callbacks object for the execute method to handle
+            // things like `onPid` and `onLiveOutput`. This will make the scheduler
+            // agnostic to the invocation type.
+            let promise: Promise<ToolResult>;
+            if (invocation instanceof ShellToolInvocation) {
+              const setPidCallback = (pid: number) => {
+                this.toolCalls = this.toolCalls.map((tc) =>
+                  tc.request.callId === callId && tc.status === 'executing'
+                    ? { ...tc, pid }
+                    : tc,
+                );
+                this.notifyToolCallsUpdate();
+              };
+              promise = invocation.execute(
+                signal,
+                liveOutputCallback,
+                shellExecutionConfig,
+                setPidCallback,
               );
-              content = truncatedResult.content;
-              outputFile = truncatedResult.outputFile;
+            } else {
+              promise = invocation.execute(
+                signal,
+                liveOutputCallback,
+                shellExecutionConfig,
+              );
+            }
 
-              if (outputFile) {
-                logToolOutputTruncated(
-                  this.config,
-                  new ToolOutputTruncatedEvent(
-                    scheduledCall.request.prompt_id,
-                    {
-                      toolName,
-                      originalContentLength,
-                      truncatedContentLength: content.length,
-                      threshold,
-                      lines,
-                    },
+            try {
+              const toolResult: ToolResult = await promise;
+              spanMetadata.output = toolResult;
+              if (signal.aborted) {
+                this.setStatusInternal(
+                  callId,
+                  'cancelled',
+                  signal,
+                  'User cancelled tool execution.',
+                );
+              } else if (toolResult.error === undefined) {
+                let content = toolResult.llmContent;
+                let outputFile: string | undefined = undefined;
+                const contentLength =
+                  typeof content === 'string' ? content.length : undefined;
+                if (
+                  typeof content === 'string' &&
+                  toolName === SHELL_TOOL_NAME &&
+                  this.config.getEnableToolOutputTruncation() &&
+                  this.config.getTruncateToolOutputThreshold() > 0 &&
+                  this.config.getTruncateToolOutputLines() > 0
+                ) {
+                  const originalContentLength = content.length;
+                  const threshold =
+                    this.config.getTruncateToolOutputThreshold();
+                  const lines = this.config.getTruncateToolOutputLines();
+                  const truncatedResult = await truncateAndSaveToFile(
+                    content,
+                    callId,
+                    this.config.storage.getProjectTempDir(),
+                    threshold,
+                    lines,
+                  );
+                  content = truncatedResult.content;
+                  outputFile = truncatedResult.outputFile;
+
+                  if (outputFile) {
+                    logToolOutputTruncated(
+                      this.config,
+                      new ToolOutputTruncatedEvent(
+                        scheduledCall.request.prompt_id,
+                        {
+                          toolName,
+                          originalContentLength,
+                          truncatedContentLength: content.length,
+                          threshold,
+                          lines,
+                        },
+                      ),
+                    );
+                  }
+                }
+
+                const response = convertToFunctionResponse(
+                  toolName,
+                  callId,
+                  content,
+                );
+                const successResponse: ToolCallResponseInfo = {
+                  callId,
+                  responseParts: response,
+                  resultDisplay: toolResult.returnDisplay,
+                  error: undefined,
+                  errorType: undefined,
+                  outputFile,
+                  contentLength,
+                };
+                this.setStatusInternal(
+                  callId,
+                  'success',
+                  signal,
+                  successResponse,
+                );
+              } else {
+                // It is a failure
+                const error = new Error(toolResult.error.message);
+                const errorResponse = createErrorResponse(
+                  scheduledCall.request,
+                  error,
+                  toolResult.error.type,
+                );
+                this.setStatusInternal(callId, 'error', signal, errorResponse);
+              }
+            } catch (executionError: unknown) {
+              spanMetadata.error = executionError;
+              if (signal.aborted) {
+                this.setStatusInternal(
+                  callId,
+                  'cancelled',
+                  signal,
+                  'User cancelled tool execution.',
+                );
+              } else {
+                this.setStatusInternal(
+                  callId,
+                  'error',
+                  signal,
+                  createErrorResponse(
+                    scheduledCall.request,
+                    executionError instanceof Error
+                      ? executionError
+                      : new Error(String(executionError)),
+                    ToolErrorType.UNHANDLED_EXCEPTION,
                   ),
                 );
               }
             }
-
-            const response = convertToFunctionResponse(
-              toolName,
-              callId,
-              content,
-            );
-            const successResponse: ToolCallResponseInfo = {
-              callId,
-              responseParts: response,
-              resultDisplay: toolResult.returnDisplay,
-              error: undefined,
-              errorType: undefined,
-              outputFile,
-              contentLength,
-            };
-            this.setStatusInternal(callId, 'success', signal, successResponse);
-          } else {
-            // It is a failure
-            const error = new Error(toolResult.error.message);
-            const errorResponse = createErrorResponse(
-              scheduledCall.request,
-              error,
-              toolResult.error.type,
-            );
-            this.setStatusInternal(callId, 'error', signal, errorResponse);
-          }
-        } catch (executionError: unknown) {
-          if (signal.aborted) {
-            this.setStatusInternal(
-              callId,
-              'cancelled',
-              signal,
-              'User cancelled tool execution.',
-            );
-          } else {
-            this.setStatusInternal(
-              callId,
-              'error',
-              signal,
-              createErrorResponse(
-                scheduledCall.request,
-                executionError instanceof Error
-                  ? executionError
-                  : new Error(String(executionError)),
-                ToolErrorType.UNHANDLED_EXCEPTION,
-              ),
-            );
-          }
-        }
-        await this.checkAndNotifyCompletion(signal);
+            await this.checkAndNotifyCompletion(signal);
+          },
+        );
       }
     }
   }
diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts
index 4d78d12a4a..e73d74513f 100644
--- a/packages/core/src/core/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator.ts
@@ -28,6 +28,7 @@ import {
 import type { ContentGenerator } from './contentGenerator.js';
 import { toContents } from '../code_assist/converter.js';
 import { isStructuredError } from '../utils/quotaErrorDetection.js';
+import { runInDevTraceSpan, type SpanMetadata } from '../telemetry/trace.js';
 
 interface StructuredError {
   status: number;
@@ -107,47 +108,74 @@ export class LoggingContentGenerator implements ContentGenerator {
     req: GenerateContentParameters,
     userPromptId: string,
   ): Promise<GenerateContentResponse> {
-    const startTime = Date.now();
-    this.logApiRequest(toContents(req.contents), req.model, userPromptId);
-    try {
-      const response = await this.wrapped.generateContent(req, userPromptId);
-      const durationMs = Date.now() - startTime;
-      this._logApiResponse(
-        durationMs,
-        response.modelVersion || req.model,
-        userPromptId,
-        response.usageMetadata,
-        JSON.stringify(response),
-      );
-      return response;
-    } catch (error) {
-      const durationMs = Date.now() - startTime;
-      this._logApiError(durationMs, error, req.model, userPromptId);
-      throw error;
-    }
+    return runInDevTraceSpan(
+      {
+        name: 'generateContent',
+      },
+      async ({ metadata: spanMetadata }) => {
+        spanMetadata.input = { request: req, userPromptId, model: req.model };
+
+        const startTime = Date.now();
+        this.logApiRequest(toContents(req.contents), req.model, userPromptId);
+        try {
+          const response = await this.wrapped.generateContent(
+            req,
+            userPromptId,
+          );
+          spanMetadata.output = {
+            response,
+            usageMetadata: response.usageMetadata,
+          };
+          const durationMs = Date.now() - startTime;
+          this._logApiResponse(
+            durationMs,
+            response.modelVersion || req.model,
+            userPromptId,
+            response.usageMetadata,
+            JSON.stringify(response),
+          );
+          return response;
+        } catch (error) {
+          const durationMs = Date.now() - startTime;
+          this._logApiError(durationMs, error, req.model, userPromptId);
+          throw error;
+        }
+      },
+    );
   }
 
   async generateContentStream(
     req: GenerateContentParameters,
     userPromptId: string,
   ): Promise<AsyncGenerator<GenerateContentResponse>> {
-    const startTime = Date.now();
-    this.logApiRequest(toContents(req.contents), req.model, userPromptId);
+    return runInDevTraceSpan(
+      {
+        name: 'generateContentStream',
+        noAutoEnd: true,
+      },
+      async ({ metadata: spanMetadata, endSpan }) => {
+        spanMetadata.input = { request: req, userPromptId, model: req.model };
+        const startTime = Date.now();
+        this.logApiRequest(toContents(req.contents), req.model, userPromptId);
 
-    let stream: AsyncGenerator<GenerateContentResponse>;
-    try {
-      stream = await this.wrapped.generateContentStream(req, userPromptId);
-    } catch (error) {
-      const durationMs = Date.now() - startTime;
-      this._logApiError(durationMs, error, req.model, userPromptId);
-      throw error;
-    }
+        let stream: AsyncGenerator<GenerateContentResponse>;
+        try {
+          stream = await this.wrapped.generateContentStream(req, userPromptId);
+        } catch (error) {
+          const durationMs = Date.now() - startTime;
+          this._logApiError(durationMs, error, req.model, userPromptId);
+          throw error;
+        }
 
-    return this.loggingStreamWrapper(
-      stream,
-      startTime,
-      userPromptId,
-      req.model,
+        return this.loggingStreamWrapper(
+          stream,
+          startTime,
+          userPromptId,
+          req.model,
+          spanMetadata,
+          endSpan,
+        );
+      },
     );
   }
 
@@ -156,6 +184,8 @@ export class LoggingContentGenerator implements ContentGenerator {
     startTime: number,
     userPromptId: string,
     model: string,
+    spanMetadata: SpanMetadata,
+    endSpan: () => void,
   ): AsyncGenerator<GenerateContentResponse> {
     const responses: GenerateContentResponse[] = [];
 
@@ -177,7 +207,15 @@ export class LoggingContentGenerator implements ContentGenerator {
         lastUsageMetadata,
         JSON.stringify(responses),
       );
+      spanMetadata.output = {
+        streamChunks: responses.map((r) => ({
+          content: r.candidates?.[0]?.content ?? null,
+        })),
+        usageMetadata: lastUsageMetadata,
+        durationMs,
+      };
     } catch (error) {
+      spanMetadata.error = error;
       const durationMs = Date.now() - startTime;
       this._logApiError(
         durationMs,
@@ -186,6 +224,8 @@ export class LoggingContentGenerator implements ContentGenerator {
         userPromptId,
       );
       throw error;
+    } finally {
+      endSpan();
     }
   }
 
@@ -196,6 +236,16 @@ export class LoggingContentGenerator implements ContentGenerator {
   async embedContent(
     req: EmbedContentParameters,
   ): Promise<EmbedContentResponse> {
-    return this.wrapped.embedContent(req);
+    return runInDevTraceSpan(
+      {
+        name: 'embedContent',
+      },
+      async ({ metadata: spanMetadata }) => {
+        spanMetadata.input = { request: req };
+        const output = await this.wrapped.embedContent(req);
+        spanMetadata.output = output;
+        return output;
+      },
+    );
   }
 }
diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts
index dfcf860107..c379a835be 100644
--- a/packages/core/src/telemetry/index.ts
+++ b/packages/core/src/telemetry/index.ts
@@ -137,3 +137,4 @@ export {
   GenAiProviderName,
   GenAiTokenType,
 } from './metrics.js';
+export { runInDevTraceSpan, type SpanMetadata } from './trace.js';
diff --git a/packages/core/src/telemetry/trace.ts b/packages/core/src/telemetry/trace.ts
new file mode 100644
index 0000000000..6b4feda387
--- /dev/null
+++ b/packages/core/src/telemetry/trace.ts
@@ -0,0 +1,153 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  diag,
+  SpanStatusCode,
+  trace,
+  type AttributeValue,
+  type SpanOptions,
+} from '@opentelemetry/api';
+import { safeJsonStringify } from '../utils/safeJsonStringify.js';
+
+const TRACER_NAME = 'gemini-cli';
+const TRACER_VERSION = 'v1';
+
+/**
+ * Metadata for a span.
+ */
+export interface SpanMetadata {
+  /** The name of the span. */
+  name: string;
+  /** The input to the span. */
+  input?: unknown;
+  /** The output of the span. */
+  output?: unknown;
+  error?: unknown;
+  /** Additional attributes for the span. */
+  attributes: Record<string, AttributeValue>;
+}
+
+/**
+ * Runs a function in a new OpenTelemetry span.
+ *
+ * The `meta` object will be automatically used to set the span's status and attributes upon completion.
+ *
+ * @example
+ * ```typescript
+ * runInDevTraceSpan({ name: 'my-operation' }, ({ metadata }) => {
+ *   metadata.input = { foo: 'bar' };
+ *   // ... do work ...
+ *   metadata.output = { result: 'baz' };
+ *   metadata.attributes['my.custom.attribute'] = 'some-value';
+ * });
+ * ```
+ *
+ * @param opts The options for the span.
+ * @param fn The function to run in the span.
+ * @returns The result of the function.
+ */
+export async function runInDevTraceSpan<R>(
+  opts: SpanOptions & { name: string; noAutoEnd?: boolean },
+  fn: ({
+    metadata,
+  }: {
+    metadata: SpanMetadata;
+    endSpan: () => void;
+  }) => Promise<R>,
+): Promise<R> {
+  const { name: spanName, noAutoEnd, ...restOfSpanOpts } = opts;
+  if (process.env['GEMINI_DEV_TRACING'] !== 'true') {
+    // If GEMINI_DEV_TRACING env var not set, we do not trace.
+    return await fn({
+      metadata: {
+        name: spanName,
+        attributes: {},
+      },
+      endSpan: () => {
+        // noop
+      },
+    });
+  }
+
+  const tracer = trace.getTracer(TRACER_NAME, TRACER_VERSION);
+  return await tracer.startActiveSpan(
+    opts.name,
+    restOfSpanOpts,
+    async (span) => {
+      const meta: SpanMetadata = {
+        name: spanName,
+        attributes: {},
+      };
+      const endSpan = () => {
+        try {
+          if (meta.input !== undefined) {
+            span.setAttribute('input-json', safeJsonStringify(meta.input));
+          }
+          if (meta.output !== undefined) {
+            span.setAttribute('output-json', safeJsonStringify(meta.output));
+          }
+          for (const [key, value] of Object.entries(meta.attributes)) {
+            span.setAttribute(key, value as AttributeValue);
+          }
+          if (meta.error) {
+            span.setStatus({
+              code: SpanStatusCode.ERROR,
+              message: getErrorMessage(meta.error),
+            });
+            if (meta.error instanceof Error) {
+              span.recordException(meta.error);
+            }
+          } else {
+            span.setStatus({ code: SpanStatusCode.OK });
+          }
+        } catch (e) {
+          // Log the error but don't rethrow, to ensure span.end() is called.
+          diag.error('Error setting span attributes in endSpan', e);
+          span.setStatus({
+            code: SpanStatusCode.ERROR,
+            message: `Error in endSpan: ${getErrorMessage(e)}`,
+          });
+        } finally {
+          span.end();
+        }
+      };
+      try {
+        return await fn({ metadata: meta, endSpan });
+      } catch (e) {
+        meta.error = e;
+        if (noAutoEnd) {
+          // For streaming operations, the delegated endSpan call will not be reached
+          // on an exception, so we must end the span here to prevent a leak.
+          endSpan();
+        }
+        throw e;
+      } finally {
+        if (!noAutoEnd) {
+          // For non-streaming operations, this ensures the span is always closed,
+          // and if an error occurred, it will be recorded correctly by endSpan.
+          endSpan();
+        }
+      }
+    },
+  );
+}
+
+/**
+ * Gets the error message from an error object.
+ *
+ * @param e The error object.
+ * @returns The error message.
+ */
+function getErrorMessage(e: unknown): string {
+  if (e instanceof Error) {
+    return e.message;
+  }
+  if (typeof e === 'string') {
+    return e;
+  }
+  return safeJsonStringify(e);
+}

From e9f8ccd51abdb61dfc4cbe9dff1b08429276171c Mon Sep 17 00:00:00 2001
From: Jacob Richman <jacob314@gmail.com>
Date: Mon, 27 Oct 2025 16:31:01 -0700
Subject: [PATCH 46/73] Fix config test so it passes even if the user running
 the test happens to have set GEMINI_MODEL to flash (#12114)

---
 packages/cli/src/config/config.test.ts | 49 +++++++++-----------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index a4cd313034..ebf448930f 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -114,13 +114,24 @@ vi.mock('@google/gemini-cli-core', async () => {
   };
 });
 
+// Global setup to ensure clean environment for all tests in this file
+const originalArgv = process.argv;
+const originalGeminiModel = process.env['GEMINI_MODEL'];
+
+beforeEach(() => {
+  delete process.env['GEMINI_MODEL'];
+});
+
+afterEach(() => {
+  process.argv = originalArgv;
+  if (originalGeminiModel !== undefined) {
+    process.env['GEMINI_MODEL'] = originalGeminiModel;
+  } else {
+    delete process.env['GEMINI_MODEL'];
+  }
+});
+
 describe('parseArguments', () => {
-  const originalArgv = process.argv;
-
-  afterEach(() => {
-    process.argv = originalArgv;
-  });
-
   it('should throw an error when both --prompt and --prompt-interactive are used together', async () => {
     process.argv = [
       'node',
@@ -494,8 +505,6 @@ describe('parseArguments', () => {
 });
 
 describe('loadCliConfig', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -503,7 +512,6 @@ describe('loadCliConfig', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1168,8 +1176,6 @@ describe('Approval mode tool exclusion logic', () => {
 });
 
 describe('loadCliConfig with allowed-mcp-server-names', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1177,7 +1183,6 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1498,8 +1503,6 @@ describe('loadCliConfig model selection with model router', () => {
 });
 
 describe('loadCliConfig folderTrust', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1507,7 +1510,6 @@ describe('loadCliConfig folderTrust', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1550,8 +1552,6 @@ describe('loadCliConfig folderTrust', () => {
 });
 
 describe('loadCliConfig with includeDirectories', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1562,8 +1562,6 @@ describe('loadCliConfig with includeDirectories', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
-    vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
 
@@ -1604,8 +1602,6 @@ describe('loadCliConfig with includeDirectories', () => {
 });
 
 describe('loadCliConfig chatCompression', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1613,7 +1609,6 @@ describe('loadCliConfig chatCompression', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1644,8 +1639,6 @@ describe('loadCliConfig chatCompression', () => {
 });
 
 describe('loadCliConfig useRipgrep', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1653,7 +1646,6 @@ describe('loadCliConfig useRipgrep', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1710,8 +1702,6 @@ describe('loadCliConfig useRipgrep', () => {
 });
 
 describe('screenReader configuration', () => {
-  const originalArgv = process.argv;
-
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
@@ -1719,7 +1709,6 @@ describe('screenReader configuration', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
   });
@@ -1764,7 +1753,6 @@ describe('screenReader configuration', () => {
 });
 
 describe('loadCliConfig tool exclusions', () => {
-  const originalArgv = process.argv;
   const originalIsTTY = process.stdin.isTTY;
 
   beforeEach(() => {
@@ -1779,7 +1767,6 @@ describe('loadCliConfig tool exclusions', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     process.stdin.isTTY = originalIsTTY;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();
@@ -1872,7 +1859,6 @@ describe('loadCliConfig tool exclusions', () => {
 });
 
 describe('loadCliConfig interactive', () => {
-  const originalArgv = process.argv;
   const originalIsTTY = process.stdin.isTTY;
 
   beforeEach(() => {
@@ -1883,7 +1869,6 @@ describe('loadCliConfig interactive', () => {
   });
 
   afterEach(() => {
-    process.argv = originalArgv;
     process.stdin.isTTY = originalIsTTY;
     vi.unstubAllEnvs();
     vi.restoreAllMocks();

From 85f3a8c21090c6ea4f0b23915716fe52ad50427a Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Mon, 27 Oct 2025 16:46:35 -0700
Subject: [PATCH 47/73] Migrate to coreEvents/debugLogger (#12107)

---
 packages/core/src/code_assist/oauth2.test.ts  | 100 +++-----
 packages/core/src/code_assist/oauth2.ts       |  26 +-
 packages/core/src/tools/mcp-client-manager.ts |   5 +-
 packages/core/src/tools/mcp-client.test.ts    |  15 +-
 packages/core/src/tools/mcp-client.ts         | 237 ++++++++----------
 packages/core/src/tools/tool-registry.ts      |   8 +-
 6 files changed, 173 insertions(+), 218 deletions(-)

diff --git a/packages/core/src/code_assist/oauth2.test.ts b/packages/core/src/code_assist/oauth2.test.ts
index 2210c695f9..b15a7aa89b 100644
--- a/packages/core/src/code_assist/oauth2.test.ts
+++ b/packages/core/src/code_assist/oauth2.test.ts
@@ -70,7 +70,7 @@ describe('oauth2', () => {
       tempHomeDir = fs.mkdtempSync(
         path.join(os.tmpdir(), 'gemini-cli-test-home-'),
       );
-      (os.homedir as Mock).mockReturnValue(tempHomeDir);
+      vi.mocked(os.homedir).mockReturnValue(tempHomeDir);
     });
     afterEach(() => {
       fs.rmSync(tempHomeDir, { recursive: true, force: true });
@@ -102,15 +102,15 @@ describe('oauth2', () => {
         credentials: mockTokens,
         on: vi.fn(),
       } as unknown as OAuth2Client;
-      (OAuth2Client as unknown as Mock).mockImplementation(
-        () => mockOAuth2Client,
-      );
+      vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
       vi.spyOn(crypto, 'randomBytes').mockReturnValue(mockState as never);
-      (open as Mock).mockImplementation(async () => ({ on: vi.fn() }) as never);
+      vi.mocked(open).mockImplementation(
+        async () => ({ on: vi.fn() }) as never,
+      );
 
       // Mock the UserInfo API response
-      (global.fetch as Mock).mockResolvedValue({
+      vi.mocked(global.fetch).mockResolvedValue({
         ok: true,
         json: vi
           .fn()
@@ -232,9 +232,7 @@ describe('oauth2', () => {
         generateCodeVerifierAsync: mockGenerateCodeVerifierAsync,
         on: vi.fn(),
       } as unknown as OAuth2Client;
-      (OAuth2Client as unknown as Mock).mockImplementation(
-        () => mockOAuth2Client,
-      );
+      vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
       const mockReadline = {
         question: vi.fn((_query, callback) => callback(mockCode)),
@@ -307,7 +305,7 @@ describe('oauth2', () => {
         };
 
         // To mock the new OAuth2Client() inside the function
-        (OAuth2Client as unknown as Mock).mockImplementation(
+        vi.mocked(OAuth2Client).mockImplementation(
           () => mockClient as unknown as OAuth2Client,
         );
 
@@ -387,7 +385,7 @@ describe('oauth2', () => {
           getTokenInfo: vi.fn().mockResolvedValue({}),
           on: vi.fn(),
         };
-        (OAuth2Client as unknown as Mock).mockImplementation(
+        vi.mocked(OAuth2Client).mockImplementation(
           () => mockClient as unknown as OAuth2Client,
         );
 
@@ -411,7 +409,7 @@ describe('oauth2', () => {
           getTokenInfo: vi.fn().mockResolvedValue({}),
           on: vi.fn(),
         };
-        (OAuth2Client as unknown as Mock).mockImplementation(
+        vi.mocked(OAuth2Client).mockImplementation(
           () => mockClient as unknown as OAuth2Client,
         );
 
@@ -483,9 +481,7 @@ describe('oauth2', () => {
           getAccessToken: mockGetAccessToken,
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         // Mock the UserInfo API response for fetchAndCacheUserInfo
         (global.fetch as Mock).mockResolvedValue({
@@ -543,9 +539,7 @@ describe('oauth2', () => {
           getTokenInfo: mockGetTokenInfo,
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         // Make it fall through to cached credentials path
         const cachedCreds = { refresh_token: 'cached-token' };
@@ -578,9 +572,7 @@ describe('oauth2', () => {
           getTokenInfo: mockGetTokenInfo,
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         // Make it fall through to cached credentials path
         const cachedCreds = { refresh_token: 'cached-token' };
@@ -609,9 +601,7 @@ describe('oauth2', () => {
           generateAuthUrl: vi.fn().mockReturnValue('https://example.com/auth'),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         await expect(
           getOauthClient(AuthType.LOGIN_WITH_GOOGLE, mockConfig),
@@ -624,11 +614,9 @@ describe('oauth2', () => {
           generateAuthUrl: vi.fn().mockReturnValue(mockAuthUrl),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
-        (open as Mock).mockImplementation(
+        vi.mocked(open).mockImplementation(
           async () => ({ on: vi.fn() }) as never,
         );
 
@@ -663,11 +651,9 @@ describe('oauth2', () => {
           generateAuthUrl: vi.fn().mockReturnValue(mockAuthUrl),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
-        (open as Mock).mockImplementation(
+        vi.mocked(open).mockImplementation(
           async () => ({ on: vi.fn() }) as never,
         );
 
@@ -722,11 +708,9 @@ describe('oauth2', () => {
           generateAuthUrl: vi.fn().mockReturnValue(mockAuthUrl),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
-        (open as Mock).mockImplementation(
+        vi.mocked(open).mockImplementation(
           async () => ({ on: vi.fn() }) as never,
         );
 
@@ -787,12 +771,10 @@ describe('oauth2', () => {
             .mockRejectedValue(new Error('Token exchange failed')),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         vi.spyOn(crypto, 'randomBytes').mockReturnValue(mockState as never);
-        (open as Mock).mockImplementation(
+        vi.mocked(open).mockImplementation(
           async () => ({ on: vi.fn() }) as never,
         );
 
@@ -858,24 +840,22 @@ describe('oauth2', () => {
             .mockResolvedValue({ token: 'test-access-token' }),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         vi.spyOn(crypto, 'randomBytes').mockReturnValue(mockState as never);
-        (open as Mock).mockImplementation(
+        vi.mocked(open).mockImplementation(
           async () => ({ on: vi.fn() }) as never,
         );
 
         // Mock fetch to fail
-        (global.fetch as Mock).mockResolvedValue({
+        vi.mocked(global.fetch).mockResolvedValue({
           ok: false,
           status: 500,
           statusText: 'Internal Server Error',
         } as unknown as Response);
 
-        const consoleErrorSpy = vi
-          .spyOn(console, 'error')
+        const consoleLogSpy = vi
+          .spyOn(console, 'log')
           .mockImplementation(() => {});
 
         let requestCallback!: http.RequestListener;
@@ -894,10 +874,10 @@ describe('oauth2', () => {
           close: vi.fn(),
           on: vi.fn(),
           address: () => ({ port: 3000 }),
-        };
+        } as unknown as http.Server;
         (http.createServer as Mock).mockImplementation((cb) => {
           requestCallback = cb;
-          return mockHttpServer as unknown as http.Server;
+          return mockHttpServer;
         });
 
         const clientPromise = getOauthClient(
@@ -919,13 +899,13 @@ describe('oauth2', () => {
 
         // Authentication should succeed even if fetchAndCacheUserInfo fails
         expect(client).toBe(mockOAuth2Client);
-        expect(consoleErrorSpy).toHaveBeenCalledWith(
+        expect(consoleLogSpy).toHaveBeenCalledWith(
           'Failed to fetch user info:',
           500,
           'Internal Server Error',
         );
 
-        consoleErrorSpy.mockRestore();
+        consoleLogSpy.mockRestore();
       });
 
       it('should handle user code authentication failure with descriptive error', async () => {
@@ -946,9 +926,7 @@ describe('oauth2', () => {
             .mockRejectedValue(new Error('Invalid authorization code')),
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         const mockReadline = {
           question: vi.fn((_query, callback) => callback('invalid-code')),
@@ -1028,9 +1006,7 @@ describe('oauth2', () => {
           getTokenInfo: mockGetTokenInfo,
           on: vi.fn(),
         } as unknown as OAuth2Client;
-        (OAuth2Client as unknown as Mock).mockImplementation(
-          () => mockOAuth2Client,
-        );
+        vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
         // Pre-populate credentials to make getOauthClient resolve quickly
         const credsPath = path.join(
@@ -1112,12 +1088,12 @@ describe('oauth2', () => {
         on: mockOn,
         credentials: mockTokens,
       } as unknown as OAuth2Client;
-      (OAuth2Client as unknown as Mock).mockImplementation(
-        () => mockOAuth2Client,
-      );
+      vi.mocked(OAuth2Client).mockImplementation(() => mockOAuth2Client);
 
       vi.spyOn(crypto, 'randomBytes').mockReturnValue(mockState as never);
-      (open as Mock).mockImplementation(async () => ({ on: vi.fn() }) as never);
+      vi.mocked(open).mockImplementation(
+        async () => ({ on: vi.fn() }) as never,
+      );
 
       (global.fetch as Mock).mockResolvedValue({
         ok: true,
@@ -1203,7 +1179,7 @@ describe('oauth2', () => {
         on: vi.fn(),
       };
 
-      (OAuth2Client as unknown as Mock).mockImplementation(
+      vi.mocked(OAuth2Client).mockImplementation(
         () => mockClient as unknown as OAuth2Client,
       );
 
diff --git a/packages/core/src/code_assist/oauth2.ts b/packages/core/src/code_assist/oauth2.ts
index ef0be547f0..46ff9fcb00 100644
--- a/packages/core/src/code_assist/oauth2.ts
+++ b/packages/core/src/code_assist/oauth2.ts
@@ -185,7 +185,7 @@ async function initOauthClient(
     for (let i = 0; !success && i < maxRetries; i++) {
       success = await authWithUserCode(client);
       if (!success) {
-        console.error(
+        debugLogger.error(
           '\nFailed to authenticate with user code.',
           i === maxRetries - 1 ? '' : 'Retrying...\n',
         );
@@ -215,17 +215,17 @@ async function initOauthClient(
       // in a minimal Docker container), it will emit an unhandled 'error' event,
       // causing the entire Node.js process to crash.
       childProcess.on('error', (error) => {
-        console.error(
-          'Failed to open browser automatically. Please try running again with NO_BROWSER=true set.',
+        debugLogger.error(
+          `Failed to open browser with error:`,
+          getErrorMessage(error),
+          `\nPlease try running again with NO_BROWSER=true set.`,
         );
-        console.error('Browser error details:', getErrorMessage(error));
       });
     } catch (err) {
-      console.error(
-        'An unexpected error occurred while trying to open the browser:',
+      debugLogger.error(
+        `Failed to open browser with error:`,
         getErrorMessage(err),
-        '\nThis might be due to browser compatibility issues or system configuration.',
-        '\nPlease try running again with NO_BROWSER=true set for manual authentication.',
+        `\nPlease try running again with NO_BROWSER=true set.`,
       );
       throw new FatalAuthenticationError(
         `Failed to open browser: ${getErrorMessage(err)}`,
@@ -293,7 +293,7 @@ async function authWithUserCode(client: OAuth2Client): Promise<boolean> {
   });
 
   if (!code) {
-    console.error('Authorization code is required.');
+    debugLogger.error('Authorization code is required.');
     return false;
   }
 
@@ -305,7 +305,7 @@ async function authWithUserCode(client: OAuth2Client): Promise<boolean> {
     });
     client.setCredentials(tokens);
   } catch (error) {
-    console.error(
+    debugLogger.error(
       'Failed to authenticate with authorization code:',
       getErrorMessage(error),
     );
@@ -528,7 +528,7 @@ export async function clearCachedCredentialFile() {
     // Clear the in-memory OAuth client cache to force re-authentication
     clearOauthClientCache();
   } catch (e) {
-    console.error('Failed to clear cached credentials:', e);
+    debugLogger.warn('Failed to clear cached credentials:', e);
   }
 }
 
@@ -549,7 +549,7 @@ async function fetchAndCacheUserInfo(client: OAuth2Client): Promise<void> {
     );
 
     if (!response.ok) {
-      console.error(
+      debugLogger.log(
         'Failed to fetch user info:',
         response.status,
         response.statusText,
@@ -560,7 +560,7 @@ async function fetchAndCacheUserInfo(client: OAuth2Client): Promise<void> {
     const userInfo = await response.json();
     await userAccountManager.cacheGoogleAccount(userInfo.email);
   } catch (error) {
-    console.error('Error retrieving user info:', error);
+    debugLogger.log('Error retrieving user info:', error);
   }
 }
 
diff --git a/packages/core/src/tools/mcp-client-manager.ts b/packages/core/src/tools/mcp-client-manager.ts
index ec05563d3d..d482da3722 100644
--- a/packages/core/src/tools/mcp-client-manager.ts
+++ b/packages/core/src/tools/mcp-client-manager.ts
@@ -13,6 +13,7 @@ import {
 } from './mcp-client.js';
 import { getErrorMessage } from '../utils/errors.js';
 import type { EventEmitter } from 'node:events';
+import { coreEvents } from '../utils/events.js';
 
 /**
  * Manages the lifecycle of multiple MCP clients, including local child processes.
@@ -70,10 +71,12 @@ export class McpClientManager {
         } catch (error) {
           this.eventEmitter?.emit('mcp-client-update', this.clients);
           // Log the error but don't let a single failed server stop the others
-          console.error(
+          coreEvents.emitFeedback(
+            'error',
             `Error during discovery for server '${name}': ${getErrorMessage(
               error,
             )}`,
+            error,
           );
         }
       });
diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts
index fe755db7bc..23760e9914 100644
--- a/packages/core/src/tools/mcp-client.test.ts
+++ b/packages/core/src/tools/mcp-client.test.ts
@@ -29,6 +29,7 @@ import type { ToolRegistry } from './tool-registry.js';
 import * as fs from 'node:fs';
 import * as os from 'node:os';
 import * as path from 'node:path';
+import { coreEvents } from '../utils/events.js';
 
 vi.mock('@modelcontextprotocol/sdk/client/stdio.js');
 vi.mock('@modelcontextprotocol/sdk/client/index.js');
@@ -37,6 +38,12 @@ vi.mock('../mcp/oauth-provider.js');
 vi.mock('../mcp/oauth-token-storage.js');
 vi.mock('../mcp/oauth-utils.js');
 
+vi.mock('../utils/events.js', () => ({
+  coreEvents: {
+    emitFeedback: vi.fn(),
+  },
+}));
+
 describe('mcp-client', () => {
   let workspaceContext: WorkspaceContext;
   let testWorkspace: string;
@@ -164,9 +171,6 @@ describe('mcp-client', () => {
     });
 
     it('should handle errors when discovering prompts', async () => {
-      const consoleErrorSpy = vi
-        .spyOn(console, 'error')
-        .mockImplementation(() => {});
       const mockedClient = {
         connect: vi.fn(),
         discover: vi.fn(),
@@ -200,10 +204,11 @@ describe('mcp-client', () => {
       await expect(client.discover({} as Config)).rejects.toThrow(
         'No prompts or tools found on the server.',
       );
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
+      expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+        'error',
         `Error discovering prompts from test-server: Test error`,
+        expect.any(Error),
       );
-      consoleErrorSpy.mockRestore();
     });
 
     it('should not discover tools if server does not support them', async () => {
diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts
index b0e46900a7..a2afc04736 100644
--- a/packages/core/src/tools/mcp-client.ts
+++ b/packages/core/src/tools/mcp-client.ts
@@ -42,6 +42,7 @@ import type {
 } from '../utils/workspaceContext.js';
 import type { ToolRegistry } from './tool-registry.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import { coreEvents } from '../utils/events.js';
 
 export const MCP_DEFAULT_TIMEOUT_MSEC = 10 * 60 * 1000; // default to 10 minutes
 
@@ -119,7 +120,11 @@ export class McpClient {
           return;
         }
         if (originalOnError) originalOnError(error);
-        console.error(`MCP ERROR (${this.serverName}):`, error.toString());
+        coreEvents.emitFeedback(
+          'error',
+          `MCP ERROR (${this.serverName})`,
+          error,
+        );
         this.updateStatus(MCPServerStatus.DISCONNECTED);
       };
       this.updateStatus(MCPServerStatus.CONNECTED);
@@ -340,8 +345,9 @@ async function handleAutomaticOAuth(
     }
 
     if (!oauthConfig) {
-      console.error(
-        `❌ Could not configure OAuth for '${mcpServerName}' - please authenticate manually with /mcp auth ${mcpServerName}`,
+      coreEvents.emitFeedback(
+        'error',
+        `Could not configure OAuth for '${mcpServerName}' - please authenticate manually with /mcp auth ${mcpServerName}`,
       );
       return false;
     }
@@ -370,8 +376,10 @@ async function handleAutomaticOAuth(
     );
     return true;
   } catch (error) {
-    console.error(
+    coreEvents.emitFeedback(
+      'error',
       `Failed to handle automatic OAuth for server '${mcpServerName}': ${getErrorMessage(error)}`,
+      error,
     );
     return false;
   }
@@ -420,8 +428,10 @@ async function createTransportWithOAuth(
 
     return null;
   } catch (error) {
-    console.error(
+    coreEvents.emitFeedback(
+      'error',
       `Failed to create OAuth transport for server '${mcpServerName}': ${getErrorMessage(error)}`,
+      error,
     );
     return null;
   }
@@ -520,7 +530,7 @@ export async function connectAndDiscover(
     );
 
     mcpClient.onerror = (error) => {
-      console.error(`MCP ERROR (${mcpServerName}):`, error.toString());
+      coreEvents.emitFeedback('error', `MCP ERROR (${mcpServerName}):`, error);
       updateMCPServerStatus(mcpServerName, MCPServerStatus.DISCONNECTED);
     };
 
@@ -553,10 +563,12 @@ export async function connectAndDiscover(
     if (mcpClient) {
       mcpClient.close();
     }
-    console.error(
+    coreEvents.emitFeedback(
+      'error',
       `Error connecting to MCP server '${mcpServerName}': ${getErrorMessage(
         error,
       )}`,
+      error,
     );
     updateMCPServerStatus(mcpServerName, MCPServerStatus.DISCONNECTED);
   }
@@ -614,10 +626,12 @@ export async function discoverTools(
           ),
         );
       } catch (error) {
-        console.error(
+        coreEvents.emitFeedback(
+          'error',
           `Error discovering tool: '${
             funcDecl.name
           }' from MCP server '${mcpServerName}': ${(error as Error).message}`,
+          error,
         );
       }
     }
@@ -627,10 +641,12 @@ export async function discoverTools(
       error instanceof Error &&
       !error.message?.includes('Method not found')
     ) {
-      console.error(
+      coreEvents.emitFeedback(
+        'error',
         `Error discovering tools from ${mcpServerName}: ${getErrorMessage(
           error,
         )}`,
+        error,
       );
     }
     return [];
@@ -674,10 +690,12 @@ export async function discoverPrompts(
       error instanceof Error &&
       !error.message?.includes('Method not found')
     ) {
-      console.error(
+      coreEvents.emitFeedback(
+        'error',
         `Error discovering prompts from ${mcpServerName}: ${getErrorMessage(
           error,
         )}`,
+        error,
       );
     }
     return [];
@@ -717,10 +735,12 @@ export async function invokeMcpPrompt(
       error instanceof Error &&
       !error.message?.includes('Method not found')
     ) {
-      console.error(
+      coreEvents.emitFeedback(
+        'error',
         `Error invoking prompt '${promptName}' from ${mcpServerName} ${promptParams}: ${getErrorMessage(
           error,
         )}`,
+        error,
       );
     }
     throw error;
@@ -842,12 +862,14 @@ export async function connectToMcpServer(
             },
           );
           if (hasStoredTokens) {
-            debugLogger.log(
+            coreEvents.emitFeedback(
+              'error',
               `Stored OAuth token for SSE server '${mcpServerName}' was rejected. ` +
                 `Please re-authenticate using: /mcp auth ${mcpServerName}`,
             );
           } else {
-            debugLogger.log(
+            coreEvents.emitFeedback(
+              'error',
               `401 error received for SSE server '${mcpServerName}' without OAuth configuration. ` +
                 `Please authenticate using: /mcp auth ${mcpServerName}`,
             );
@@ -935,49 +957,27 @@ export async function connectToMcpServer(
                 accessToken,
               );
               if (oauthTransport) {
-                try {
-                  await mcpClient.connect(oauthTransport, {
-                    timeout:
-                      mcpServerConfig.timeout ?? MCP_DEFAULT_TIMEOUT_MSEC,
-                  });
-                  // Connection successful with OAuth
-                  return mcpClient;
-                } catch (retryError) {
-                  console.error(
-                    `Failed to connect with OAuth token: ${getErrorMessage(
-                      retryError,
-                    )}`,
-                  );
-                  throw retryError;
-                }
+                await mcpClient.connect(oauthTransport, {
+                  timeout: mcpServerConfig.timeout ?? MCP_DEFAULT_TIMEOUT_MSEC,
+                });
+                // Connection successful with OAuth
+                return mcpClient;
               } else {
-                console.error(
-                  `Failed to create OAuth transport for server '${mcpServerName}'`,
-                );
                 throw new Error(
                   `Failed to create OAuth transport for server '${mcpServerName}'`,
                 );
               }
             } else {
-              console.error(
-                `Failed to get OAuth token for server '${mcpServerName}'`,
-              );
               throw new Error(
                 `Failed to get OAuth token for server '${mcpServerName}'`,
               );
             }
           } else {
-            console.error(
-              `Failed to get credentials for server '${mcpServerName}' after successful OAuth authentication`,
-            );
             throw new Error(
               `Failed to get credentials for server '${mcpServerName}' after successful OAuth authentication`,
             );
           }
         } else {
-          console.error(
-            `Failed to handle automatic OAuth for server '${mcpServerName}'`,
-          );
           throw new Error(
             `Failed to handle automatic OAuth for server '${mcpServerName}'`,
           );
@@ -1002,12 +1002,14 @@ export async function connectToMcpServer(
               },
             );
             if (hasStoredTokens) {
-              debugLogger.log(
+              coreEvents.emitFeedback(
+                'error',
                 `Stored OAuth token for SSE server '${mcpServerName}' was rejected. ` +
                   `Please re-authenticate using: /mcp auth ${mcpServerName}`,
               );
             } else {
-              debugLogger.log(
+              coreEvents.emitFeedback(
+                'error',
                 `401 error received for SSE server '${mcpServerName}' without OAuth configuration. ` +
                   `Please authenticate using: /mcp auth ${mcpServerName}`,
               );
@@ -1030,116 +1032,85 @@ export async function connectToMcpServer(
           );
           const baseUrl = `${serverUrl.protocol}//${serverUrl.host}`;
 
-          try {
-            // Try to discover OAuth configuration from the base URL
-            const oauthConfig = await OAuthUtils.discoverOAuthConfig(baseUrl);
-            if (oauthConfig) {
-              debugLogger.log(
-                `Discovered OAuth configuration from base URL for server '${mcpServerName}'`,
-              );
+          // Try to discover OAuth configuration from the base URL
+          const oauthConfig = await OAuthUtils.discoverOAuthConfig(baseUrl);
+          if (oauthConfig) {
+            debugLogger.log(
+              `Discovered OAuth configuration from base URL for server '${mcpServerName}'`,
+            );
 
-              // Create OAuth configuration for authentication
-              const oauthAuthConfig = {
-                enabled: true,
-                authorizationUrl: oauthConfig.authorizationUrl,
-                tokenUrl: oauthConfig.tokenUrl,
-                scopes: oauthConfig.scopes || [],
-              };
+            // Create OAuth configuration for authentication
+            const oauthAuthConfig = {
+              enabled: true,
+              authorizationUrl: oauthConfig.authorizationUrl,
+              tokenUrl: oauthConfig.tokenUrl,
+              scopes: oauthConfig.scopes || [],
+            };
 
-              // Perform OAuth authentication
-              // Pass the server URL for proper discovery
-              const authServerUrl =
-                mcpServerConfig.httpUrl || mcpServerConfig.url;
-              debugLogger.log(
-                `Starting OAuth authentication for server '${mcpServerName}'...`,
-              );
-              const authProvider = new MCPOAuthProvider(
-                new MCPOAuthTokenStorage(),
-              );
-              await authProvider.authenticate(
+            // Perform OAuth authentication
+            // Pass the server URL for proper discovery
+            const authServerUrl =
+              mcpServerConfig.httpUrl || mcpServerConfig.url;
+            debugLogger.log(
+              `Starting OAuth authentication for server '${mcpServerName}'...`,
+            );
+            const authProvider = new MCPOAuthProvider(
+              new MCPOAuthTokenStorage(),
+            );
+            await authProvider.authenticate(
+              mcpServerName,
+              oauthAuthConfig,
+              authServerUrl,
+            );
+
+            // Retry connection with OAuth token
+            const tokenStorage = new MCPOAuthTokenStorage();
+            const credentials =
+              await tokenStorage.getCredentials(mcpServerName);
+            if (credentials) {
+              const authProvider = new MCPOAuthProvider(tokenStorage);
+              const accessToken = await authProvider.getValidToken(
                 mcpServerName,
-                oauthAuthConfig,
-                authServerUrl,
+                {
+                  // Pass client ID if available
+                  clientId: credentials.clientId,
+                },
               );
-
-              // Retry connection with OAuth token
-              const tokenStorage = new MCPOAuthTokenStorage();
-              const credentials =
-                await tokenStorage.getCredentials(mcpServerName);
-              if (credentials) {
-                const authProvider = new MCPOAuthProvider(tokenStorage);
-                const accessToken = await authProvider.getValidToken(
+              if (accessToken) {
+                // Create transport with OAuth token
+                const oauthTransport = await createTransportWithOAuth(
                   mcpServerName,
-                  {
-                    // Pass client ID if available
-                    clientId: credentials.clientId,
-                  },
+                  mcpServerConfig,
+                  accessToken,
                 );
-                if (accessToken) {
-                  // Create transport with OAuth token
-                  const oauthTransport = await createTransportWithOAuth(
-                    mcpServerName,
-                    mcpServerConfig,
-                    accessToken,
-                  );
-                  if (oauthTransport) {
-                    try {
-                      await mcpClient.connect(oauthTransport, {
-                        timeout:
-                          mcpServerConfig.timeout ?? MCP_DEFAULT_TIMEOUT_MSEC,
-                      });
-                      // Connection successful with OAuth
-                      return mcpClient;
-                    } catch (retryError) {
-                      console.error(
-                        `Failed to connect with OAuth token: ${getErrorMessage(
-                          retryError,
-                        )}`,
-                      );
-                      throw retryError;
-                    }
-                  } else {
-                    console.error(
-                      `Failed to create OAuth transport for server '${mcpServerName}'`,
-                    );
-                    throw new Error(
-                      `Failed to create OAuth transport for server '${mcpServerName}'`,
-                    );
-                  }
+                if (oauthTransport) {
+                  await mcpClient.connect(oauthTransport, {
+                    timeout:
+                      mcpServerConfig.timeout ?? MCP_DEFAULT_TIMEOUT_MSEC,
+                  });
+                  // Connection successful with OAuth
+                  return mcpClient;
                 } else {
-                  console.error(
-                    `Failed to get OAuth token for server '${mcpServerName}'`,
-                  );
                   throw new Error(
-                    `Failed to get OAuth token for server '${mcpServerName}'`,
+                    `Failed to create OAuth transport for server '${mcpServerName}'`,
                   );
                 }
               } else {
-                console.error(
-                  `Failed to get stored credentials for server '${mcpServerName}'`,
-                );
                 throw new Error(
-                  `Failed to get stored credentials for server '${mcpServerName}'`,
+                  `Failed to get OAuth token for server '${mcpServerName}'`,
                 );
               }
             } else {
-              console.error(
-                `❌ Could not configure OAuth for '${mcpServerName}' - please authenticate manually with /mcp auth ${mcpServerName}`,
-              );
               throw new Error(
-                `OAuth configuration failed for '${mcpServerName}'. Please authenticate manually with /mcp auth ${mcpServerName}`,
+                `Failed to get stored credentials for server '${mcpServerName}'`,
               );
             }
-          } catch (discoveryError) {
-            console.error(
-              `❌ OAuth discovery failed for '${mcpServerName}' - please authenticate manually with /mcp auth ${mcpServerName}`,
+          } else {
+            throw new Error(
+              `OAuth configuration failed for '${mcpServerName}'. Please authenticate manually with /mcp auth ${mcpServerName}`,
             );
-            throw discoveryError;
           }
         } else {
-          console.error(
-            `❌ '${mcpServerName}' requires authentication but no OAuth configuration found`,
-          );
           throw new Error(
             `MCP server '${mcpServerName}' requires authentication. Please configure OAuth or check server settings.`,
           );
@@ -1239,10 +1210,6 @@ export async function createTransport(
     );
 
     if (!accessToken) {
-      console.error(
-        `MCP server '${mcpServerName}' requires OAuth authentication. ` +
-          `Please authenticate using the /mcp auth command.`,
-      );
       throw new Error(
         `MCP server '${mcpServerName}' requires OAuth authentication. ` +
           `Please authenticate using the /mcp auth command.`,
diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts
index f24365913e..4a4a13c5f9 100644
--- a/packages/core/src/tools/tool-registry.ts
+++ b/packages/core/src/tools/tool-registry.ts
@@ -23,6 +23,7 @@ import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import type { EventEmitter } from 'node:events';
 import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import { coreEvents } from '../utils/events.js';
 
 type ToolParams = Record<string, unknown>;
 
@@ -350,8 +351,11 @@ export class ToolRegistry {
           }
 
           if (code !== 0) {
-            console.error(`Command failed with code ${code}`);
-            console.error(stderr);
+            coreEvents.emitFeedback(
+              'error',
+              `Tool discovery command failed with code ${code}.`,
+              stderr,
+            );
             return reject(
               new Error(`Tool discovery command failed with exit code ${code}`),
             );

From cb208f53d8631e4272816ff64e86389d804127fb Mon Sep 17 00:00:00 2001
From: Pavel Jbanov <pavelgj@gmail.com>
Date: Mon, 27 Oct 2025 23:37:43 -0400
Subject: [PATCH 48/73] Added a a script to setup and run genkit telemetry and
 dev ui (#12120)

---
 .gitignore                  |  2 ++
 docs/local-development.md   | 38 +++++++++++++++++++-
 scripts/telemetry.js        | 14 ++++----
 scripts/telemetry_genkit.js | 70 +++++++++++++++++++++++++++++++++++++
 scripts/telemetry_utils.js  | 13 ++++++-
 5 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 scripts/telemetry_genkit.js

diff --git a/.gitignore b/.gitignore
index ac5609d580..daedb5afb4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,5 @@ gha-creds-*.json
 
 # Log files
 patch_output.log
+
+.genkit
diff --git a/docs/local-development.md b/docs/local-development.md
index a7e85c565e..f69308f356 100644
--- a/docs/local-development.md
+++ b/docs/local-development.md
@@ -17,6 +17,42 @@ when running Gemini CLI.
 
 ### Viewing Dev Traces
 
+You can view dev traces using either Jaeger or the Genkit Developer UI.
+
+#### Using Genkit
+
+Genkit provides a web-based UI for viewing traces and other telemetry data.
+
+1.  **Start the Genkit Telemetry Server:**
+
+    Run the following command to start the Genkit server:
+
+    ```bash
+    npm run telemetry -- --target=genkit
+    ```
+
+    The script will output the URL for the Genkit Developer UI, for example:
+
+    ```
+    Genkit Developer UI: http://localhost:4000
+    ```
+
+2.  **Run Gemini CLI with Dev Tracing:**
+
+    In a separate terminal, run your Gemini CLI command with the
+    `GEMINI_DEV_TRACING` environment variable:
+
+    ```bash
+    GEMINI_DEV_TRACING=true gemini
+    ```
+
+3.  **View the Traces:**
+
+    Open the Genkit Developer UI URL in your browser and navigate to the
+    **Traces** tab to view the traces.
+
+#### Using Jaeger
+
 You can view dev traces in the Jaeger UI. To get started, follow these steps:
 
 1.  **Start the telemetry collector:**
@@ -37,7 +73,7 @@ You can view dev traces in the Jaeger UI. To get started, follow these steps:
     `GEMINI_DEV_TRACING` environment variable:
 
     ```bash
-    GEMINI_DEV_TRACING=true gemini [your-command]
+    GEMINI_DEV_TRACING=true gemini
     ```
 
 3.  **View the traces:**
diff --git a/scripts/telemetry.js b/scripts/telemetry.js
index 0da513c981..7cce93f222 100755
--- a/scripts/telemetry.js
+++ b/scripts/telemetry.js
@@ -45,7 +45,7 @@ if (!settingsTarget) {
 }
 
 let target = settingsTarget || 'local';
-const allowedTargets = ['local', 'gcp'];
+const allowedTargets = ['local', 'gcp', 'genkit'];
 
 const targetArg = process.argv.find((arg) => arg.startsWith('--target='));
 if (targetArg) {
@@ -65,11 +65,13 @@ if (targetArg) {
   );
 }
 
-const scriptPath = join(
-  projectRoot,
-  'scripts',
-  target === 'gcp' ? 'telemetry_gcp.js' : 'local_telemetry.js',
-);
+const targetScripts = {
+  gcp: 'telemetry_gcp.js',
+  local: 'local_telemetry.js',
+  genkit: 'telemetry_genkit.js',
+};
+
+const scriptPath = join(projectRoot, 'scripts', targetScripts[target]);
 
 try {
   console.log(`🚀 Running telemetry script for target: ${target}.`);
diff --git a/scripts/telemetry_genkit.js b/scripts/telemetry_genkit.js
new file mode 100644
index 0000000000..fc1d6331be
--- /dev/null
+++ b/scripts/telemetry_genkit.js
@@ -0,0 +1,70 @@
+#!/usr/bin/env node
+
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createInterface } from 'node:readline';
+import { spawn } from 'node:child_process';
+import { manageTelemetrySettings, registerCleanup } from './telemetry_utils.js';
+
+const GENKIT_START_COMMAND = 'npx';
+const GENKIT_START_ARGS = ['-y', 'genkit-cli', 'start', '--non-interactive'];
+
+async function main() {
+  let genkitProcess;
+
+  const originalSandboxSetting = manageTelemetrySettings(
+    true,
+    '', // Endpoint will be set dynamically
+    'local',
+    undefined,
+    'http',
+  );
+
+  registerCleanup(
+    () => [genkitProcess],
+    () => [],
+    originalSandboxSetting,
+  );
+
+  console.log('🚀 Starting Genkit telemetry server...');
+  genkitProcess = spawn(GENKIT_START_COMMAND, GENKIT_START_ARGS, {
+    stdio: ['ignore', 'pipe', 'pipe'],
+  });
+
+  const rl = createInterface({ input: genkitProcess.stdout });
+
+  rl.on('line', (line) => {
+    console.log(`[Genkit] ${line}`);
+    const match = line.match(/Telemetry API running on (http:\/\/[^\s]+)/);
+    if (match) {
+      const telemetryApiUrl = match[1];
+      const otlpEndpoint = `${telemetryApiUrl}/api/otlp`;
+      console.log(`✅ Genkit telemetry running on: ${otlpEndpoint}`);
+      manageTelemetrySettings(true, otlpEndpoint, 'local', undefined, 'http');
+    }
+  });
+
+  genkitProcess.stderr.on('data', (data) => {
+    console.error(`[Genkit Error] ${data.toString()}`);
+  });
+
+  genkitProcess.on('close', (code) => {
+    console.log(`Genkit process exited with code ${code}`);
+  });
+
+  genkitProcess.on('error', (err) => {
+    console.error('Failed to start Genkit process:', err);
+    process.exit(1);
+  });
+
+  console.log(`
+✨ Genkit telemetry environment is running.
+`);
+  console.log(`Press Ctrl+C to exit.`);
+}
+
+main();
diff --git a/scripts/telemetry_utils.js b/scripts/telemetry_utils.js
index a891a5c9eb..1c81b1eb1b 100644
--- a/scripts/telemetry_utils.js
+++ b/scripts/telemetry_utils.js
@@ -314,6 +314,7 @@ export function manageTelemetrySettings(
   oTelEndpoint = 'http://localhost:4317',
   target = 'local',
   originalSandboxSettingToRestore,
+  otlpProtocol = 'grpc',
 ) {
   const workspaceSettings = readJsonFile(WORKSPACE_SETTINGS_FILE);
   const currentSandboxSetting = workspaceSettings.sandbox;
@@ -344,6 +345,11 @@ export function manageTelemetrySettings(
       settingsModified = true;
       console.log(`🎯 Set telemetry target to ${target}.`);
     }
+    if (workspaceSettings.telemetry.otlpProtocol !== otlpProtocol) {
+      workspaceSettings.telemetry.otlpProtocol = otlpProtocol;
+      settingsModified = true;
+      console.log(`🔧 Set telemetry OTLP protocol to ${otlpProtocol}.`);
+    }
   } else {
     if (workspaceSettings.telemetry.enabled === true) {
       delete workspaceSettings.telemetry.enabled;
@@ -360,6 +366,11 @@ export function manageTelemetrySettings(
       settingsModified = true;
       console.log('🎯 Cleared telemetry target.');
     }
+    if (workspaceSettings.telemetry.otlpProtocol) {
+      delete workspaceSettings.telemetry.otlpProtocol;
+      settingsModified = true;
+      console.log('🔧 Cleared telemetry OTLP protocol.');
+    }
     if (Object.keys(workspaceSettings.telemetry).length === 0) {
       delete workspaceSettings.telemetry;
     }
@@ -399,7 +410,7 @@ export function registerCleanup(
 
     console.log('\n👋 Shutting down...');
 
-    manageTelemetrySettings(false, null, originalSandboxSetting);
+    manageTelemetrySettings(false, null, null, originalSandboxSetting);
 
     const processes = getProcesses ? getProcesses() : [];
     processes.forEach((proc) => {

From ecf0a2487361916784f6e034c5af8150f949f2e1 Mon Sep 17 00:00:00 2001
From: Jainam M <jjainam@google.com>
Date: Tue, 28 Oct 2025 20:02:55 +0530
Subject: [PATCH 49/73] refactor(core): Parameterize tests in glob.test.ts
 (#12061)

---
 packages/core/src/tools/glob.test.ts | 506 ++++++++++++---------------
 1 file changed, 233 insertions(+), 273 deletions(-)

diff --git a/packages/core/src/tools/glob.test.ts b/packages/core/src/tools/glob.test.ts
index b965ce9036..938c509b4b 100644
--- a/packages/core/src/tools/glob.test.ts
+++ b/packages/core/src/tools/glob.test.ts
@@ -242,101 +242,70 @@ describe('GlobTool', () => {
   });
 
   describe('validateToolParams', () => {
-    it('should return null for valid parameters (pattern only)', () => {
-      const params: GlobToolParams = { pattern: '*.js' };
-      expect(globTool.validateToolParams(params)).toBeNull();
-    });
-
-    it('should return null for valid parameters (pattern and path)', () => {
-      const params: GlobToolParams = { pattern: '*.js', path: 'sub' };
-      expect(globTool.validateToolParams(params)).toBeNull();
-    });
-
-    it('should return null for valid parameters (pattern, path, and case_sensitive)', () => {
-      const params: GlobToolParams = {
-        pattern: '*.js',
-        path: 'sub',
-        case_sensitive: true,
-      };
-      expect(globTool.validateToolParams(params)).toBeNull();
-    });
-
-    it('should return error if pattern is missing (schema validation)', () => {
-      // Need to correctly define this as an object without pattern
-      const params = { path: '.' };
+    it.each([
+      {
+        name: 'should return null for valid parameters (pattern only)',
+        params: { pattern: '*.js' },
+        expected: null,
+      },
+      {
+        name: 'should return null for valid parameters (pattern and path)',
+        params: { pattern: '*.js', path: 'sub' },
+        expected: null,
+      },
+      {
+        name: 'should return null for valid parameters (pattern, path, and case_sensitive)',
+        params: { pattern: '*.js', path: 'sub', case_sensitive: true },
+        expected: null,
+      },
+      {
+        name: 'should return error if pattern is missing (schema validation)',
+        params: { path: '.' },
+        expected: `params must have required property 'pattern'`,
+      },
+      {
+        name: 'should return error if pattern is an empty string',
+        params: { pattern: '' },
+        expected: "The 'pattern' parameter cannot be empty.",
+      },
+      {
+        name: 'should return error if pattern is only whitespace',
+        params: { pattern: '   ' },
+        expected: "The 'pattern' parameter cannot be empty.",
+      },
+      {
+        name: 'should return error if path is not a string (schema validation)',
+        params: { pattern: '*.ts', path: 123 },
+        expected: 'params/path must be string',
+      },
+      {
+        name: 'should return error if case_sensitive is not a boolean (schema validation)',
+        params: { pattern: '*.ts', case_sensitive: 'true' },
+        expected: 'params/case_sensitive must be boolean',
+      },
+      {
+        name: "should return error if search path resolves outside the tool's root directory",
+        params: { pattern: '*.txt', path: '../../../../../../../../../../tmp' },
+        expected: 'resolves outside the allowed workspace directories',
+      },
+      {
+        name: 'should return error if specified search path does not exist',
+        params: { pattern: '*.txt', path: 'nonexistent_subdir' },
+        expected: 'Search path does not exist',
+      },
+      {
+        name: 'should return error if specified search path is a file, not a directory',
+        params: { pattern: '*.txt', path: 'fileA.txt' },
+        expected: 'Search path is not a directory',
+      },
+    ])('$name', ({ params, expected }) => {
       // @ts-expect-error - We're intentionally creating invalid params for testing
-      expect(globTool.validateToolParams(params)).toBe(
-        `params must have required property 'pattern'`,
-      );
-    });
-
-    it('should return error if pattern is an empty string', () => {
-      const params: GlobToolParams = { pattern: '' };
-      expect(globTool.validateToolParams(params)).toContain(
-        "The 'pattern' parameter cannot be empty.",
-      );
-    });
-
-    it('should return error if pattern is only whitespace', () => {
-      const params: GlobToolParams = { pattern: '   ' };
-      expect(globTool.validateToolParams(params)).toContain(
-        "The 'pattern' parameter cannot be empty.",
-      );
-    });
-
-    it('should return error if path is provided but is not a string (schema validation)', () => {
-      const params = {
-        pattern: '*.ts',
-        path: 123,
-      };
-      // @ts-expect-error - We're intentionally creating invalid params for testing
-      expect(globTool.validateToolParams(params)).toBe(
-        'params/path must be string',
-      );
-    });
-
-    it('should return error if case_sensitive is provided but is not a boolean (schema validation)', () => {
-      const params = {
-        pattern: '*.ts',
-        case_sensitive: 'true',
-      };
-      // @ts-expect-error - We're intentionally creating invalid params for testing
-      expect(globTool.validateToolParams(params)).toBe(
-        'params/case_sensitive must be boolean',
-      );
-    });
-
-    it("should return error if search path resolves outside the tool's root directory", () => {
-      // Create a globTool instance specifically for this test, with a deeper root
-      tempRootDir = path.join(tempRootDir, 'sub');
-      const specificGlobTool = new GlobTool(mockConfig);
-      // const params: GlobToolParams = { pattern: '*.txt', path: '..' }; // This line is unused and will be removed.
-      // This should be fine as tempRootDir is still within the original tempRootDir (the parent of deeperRootDir)
-      // Let's try to go further up.
-      const paramsOutside: GlobToolParams = {
-        pattern: '*.txt',
-        path: '../../../../../../../../../../tmp', // Definitely outside
-      };
-      expect(specificGlobTool.validateToolParams(paramsOutside)).toContain(
-        'resolves outside the allowed workspace directories',
-      );
-    });
-
-    it('should return error if specified search path does not exist', async () => {
-      const params: GlobToolParams = {
-        pattern: '*.txt',
-        path: 'nonexistent_subdir',
-      };
-      expect(globTool.validateToolParams(params)).toContain(
-        'Search path does not exist',
-      );
-    });
-
-    it('should return error if specified search path is a file, not a directory', async () => {
-      const params: GlobToolParams = { pattern: '*.txt', path: 'fileA.txt' };
-      expect(globTool.validateToolParams(params)).toContain(
-        'Search path is not a directory',
-      );
+      const result = globTool.validateToolParams(params);
+      if (expected === null) {
+        expect(result).toBeNull();
+      } else {
+        expect(result).toContain(expected);
+      }
     });
   });
 
@@ -373,85 +342,84 @@ describe('GlobTool', () => {
   });
 
   describe('ignore file handling', () => {
-    it('should respect .gitignore files by default', async () => {
-      await fs.writeFile(path.join(tempRootDir, '.gitignore'), '*.ignored.txt');
-      await fs.writeFile(
-        path.join(tempRootDir, 'a.ignored.txt'),
-        'ignored content',
-      );
-      await fs.writeFile(
-        path.join(tempRootDir, 'b.notignored.txt'),
-        'not ignored content',
-      );
+    interface IgnoreFileTestCase {
+      name: string;
+      ignoreFile: { name: string; content: string };
+      filesToCreate: string[];
+      globToolParams: GlobToolParams;
+      expectedCountMessage: string;
+      expectedToContain?: string[];
+      notExpectedToContain?: string[];
+    }
 
-      const params: GlobToolParams = { pattern: '*.txt' };
-      const invocation = globTool.build(params);
-      const result = await invocation.execute(abortSignal);
+    it.each<IgnoreFileTestCase>([
+      {
+        name: 'should respect .gitignore files by default',
+        ignoreFile: { name: '.gitignore', content: '*.ignored.txt' },
+        filesToCreate: ['a.ignored.txt', 'b.notignored.txt'],
+        globToolParams: { pattern: '*.txt' },
+        expectedCountMessage: 'Found 3 file(s)',
+        notExpectedToContain: ['a.ignored.txt'],
+      },
+      {
+        name: 'should respect .geminiignore files by default',
+        ignoreFile: { name: '.geminiignore', content: '*.geminiignored.txt' },
+        filesToCreate: ['a.geminiignored.txt', 'b.notignored.txt'],
+        globToolParams: { pattern: '*.txt' },
+        expectedCountMessage: 'Found 3 file(s)',
+        notExpectedToContain: ['a.geminiignored.txt'],
+      },
+      {
+        name: 'should not respect .gitignore when respect_git_ignore is false',
+        ignoreFile: { name: '.gitignore', content: '*.ignored.txt' },
+        filesToCreate: ['a.ignored.txt'],
+        globToolParams: { pattern: '*.txt', respect_git_ignore: false },
+        expectedCountMessage: 'Found 3 file(s)',
+        expectedToContain: ['a.ignored.txt'],
+      },
+      {
+        name: 'should not respect .geminiignore when respect_gemini_ignore is false',
+        ignoreFile: { name: '.geminiignore', content: '*.geminiignored.txt' },
+        filesToCreate: ['a.geminiignored.txt'],
+        globToolParams: { pattern: '*.txt', respect_gemini_ignore: false },
+        expectedCountMessage: 'Found 3 file(s)',
+        expectedToContain: ['a.geminiignored.txt'],
+      },
+    ])(
+      '$name',
+      async ({
+        ignoreFile,
+        filesToCreate,
+        globToolParams,
+        expectedCountMessage,
+        expectedToContain,
+        notExpectedToContain,
+      }) => {
+        await fs.writeFile(
+          path.join(tempRootDir, ignoreFile.name),
+          ignoreFile.content,
+        );
+        for (const file of filesToCreate) {
+          await fs.writeFile(path.join(tempRootDir, file), 'content');
+        }
 
-      expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, b.notignored.txt
-      expect(result.llmContent).not.toContain('a.ignored.txt');
-    });
+        const invocation = globTool.build(globToolParams);
+        const result = await invocation.execute(abortSignal);
 
-    it('should respect .geminiignore files by default', async () => {
-      await fs.writeFile(
-        path.join(tempRootDir, '.geminiignore'),
-        '*.geminiignored.txt',
-      );
-      await fs.writeFile(
-        path.join(tempRootDir, 'a.geminiignored.txt'),
-        'ignored content',
-      );
-      await fs.writeFile(
-        path.join(tempRootDir, 'b.notignored.txt'),
-        'not ignored content',
-      );
+        expect(result.llmContent).toContain(expectedCountMessage);
 
-      const params: GlobToolParams = { pattern: '*.txt' };
-      const invocation = globTool.build(params);
-      const result = await invocation.execute(abortSignal);
-
-      expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, b.notignored.txt
-      expect(result.llmContent).not.toContain('a.geminiignored.txt');
-    });
-
-    it('should not respect .gitignore when respect_git_ignore is false', async () => {
-      await fs.writeFile(path.join(tempRootDir, '.gitignore'), '*.ignored.txt');
-      await fs.writeFile(
-        path.join(tempRootDir, 'a.ignored.txt'),
-        'ignored content',
-      );
-
-      const params: GlobToolParams = {
-        pattern: '*.txt',
-        respect_git_ignore: false,
-      };
-      const invocation = globTool.build(params);
-      const result = await invocation.execute(abortSignal);
-
-      expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.ignored.txt
-      expect(result.llmContent).toContain('a.ignored.txt');
-    });
-
-    it('should not respect .geminiignore when respect_gemini_ignore is false', async () => {
-      await fs.writeFile(
-        path.join(tempRootDir, '.geminiignore'),
-        '*.geminiignored.txt',
-      );
-      await fs.writeFile(
-        path.join(tempRootDir, 'a.geminiignored.txt'),
-        'ignored content',
-      );
-
-      const params: GlobToolParams = {
-        pattern: '*.txt',
-        respect_gemini_ignore: false,
-      };
-      const invocation = globTool.build(params);
-      const result = await invocation.execute(abortSignal);
-
-      expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.geminiignored.txt
-      expect(result.llmContent).toContain('a.geminiignored.txt');
-    });
+        if (expectedToContain) {
+          for (const file of expectedToContain) {
+            expect(result.llmContent).toContain(file);
+          }
+        }
+        if (notExpectedToContain) {
+          for (const file of notExpectedToContain) {
+            expect(result.llmContent).not.toContain(file);
+          }
+        }
+      },
+    );
   });
 });
 
@@ -464,110 +432,102 @@ describe('sortFileEntries', () => {
     mtimeMs: mtimeDate.getTime(),
   });
 
-  it('should sort a mix of recent and older files correctly', () => {
-    const recentTime1 = new Date(nowTimestamp - 1 * 60 * 60 * 1000); // 1 hour ago
-    const recentTime2 = new Date(nowTimestamp - 2 * 60 * 60 * 1000); // 2 hours ago
-    const olderTime1 = new Date(
-      nowTimestamp - (oneDayInMs + 1 * 60 * 60 * 1000),
-    ); // 25 hours ago
-    const olderTime2 = new Date(
-      nowTimestamp - (oneDayInMs + 2 * 60 * 60 * 1000),
-    ); // 26 hours ago
+  const testCases = [
+    {
+      name: 'should sort a mix of recent and older files correctly',
+      entries: [
+        {
+          name: 'older_zebra.txt',
+          mtime: new Date(nowTimestamp - (oneDayInMs + 2 * 60 * 60 * 1000)),
+        },
+        {
+          name: 'recent_alpha.txt',
+          mtime: new Date(nowTimestamp - 1 * 60 * 60 * 1000),
+        },
+        {
+          name: 'older_apple.txt',
+          mtime: new Date(nowTimestamp - (oneDayInMs + 1 * 60 * 60 * 1000)),
+        },
+        {
+          name: 'recent_beta.txt',
+          mtime: new Date(nowTimestamp - 2 * 60 * 60 * 1000),
+        },
+        {
+          name: 'older_banana.txt',
+          mtime: new Date(nowTimestamp - (oneDayInMs + 1 * 60 * 60 * 1000)),
+        },
+      ],
+      expected: [
+        'recent_alpha.txt',
+        'recent_beta.txt',
+        'older_apple.txt',
+        'older_banana.txt',
+        'older_zebra.txt',
+      ],
+    },
+    {
+      name: 'should sort only recent files by mtime descending',
+      entries: [
+        { name: 'c.txt', mtime: new Date(nowTimestamp - 2000) },
+        { name: 'a.txt', mtime: new Date(nowTimestamp - 3000) },
+        { name: 'b.txt', mtime: new Date(nowTimestamp - 1000) },
+      ],
+      expected: ['b.txt', 'c.txt', 'a.txt'],
+    },
+    {
+      name: 'should sort only older files alphabetically by path',
+      entries: [
+        { name: 'zebra.txt', mtime: new Date(nowTimestamp - 2 * oneDayInMs) },
+        { name: 'apple.txt', mtime: new Date(nowTimestamp - 2 * oneDayInMs) },
+        { name: 'banana.txt', mtime: new Date(nowTimestamp - 2 * oneDayInMs) },
+      ],
+      expected: ['apple.txt', 'banana.txt', 'zebra.txt'],
+    },
+    {
+      name: 'should handle an empty array',
+      entries: [],
+      expected: [],
+    },
+    {
+      name: 'should correctly sort files when mtimes are identical for recent files',
+      entries: [
+        { name: 'b.txt', mtime: new Date(nowTimestamp - 1000) },
+        { name: 'a.txt', mtime: new Date(nowTimestamp - 1000) },
+      ],
+      expectedUnordered: ['a.txt', 'b.txt'],
+    },
+    {
+      name: 'should use recencyThresholdMs parameter correctly',
+      recencyThresholdMs: 1000,
+      entries: [
+        { name: 'older_file.txt', mtime: new Date(nowTimestamp - 1001) },
+        { name: 'recent_file.txt', mtime: new Date(nowTimestamp - 999) },
+      ],
+      expected: ['recent_file.txt', 'older_file.txt'],
+    },
+  ];
 
-    const entries: GlobPath[] = [
-      createFileEntry('older_zebra.txt', olderTime2),
-      createFileEntry('recent_alpha.txt', recentTime1),
-      createFileEntry('older_apple.txt', olderTime1),
-      createFileEntry('recent_beta.txt', recentTime2),
-      createFileEntry('older_banana.txt', olderTime1), // Same mtime as apple
-    ];
+  it.each(testCases)(
+    '$name',
+    ({ entries, expected, expectedUnordered, recencyThresholdMs }) => {
+      const globPaths = entries.map((e) => createFileEntry(e.name, e.mtime));
+      const sorted = sortFileEntries(
+        globPaths,
+        nowTimestamp,
+        recencyThresholdMs ?? oneDayInMs,
+      );
+      const sortedPaths = sorted.map((e) => e.fullpath());
 
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    const sortedPaths = sorted.map((e) => e.fullpath());
-
-    expect(sortedPaths).toEqual([
-      'recent_alpha.txt', // Recent, newest
-      'recent_beta.txt', // Recent, older
-      'older_apple.txt', // Older, alphabetical
-      'older_banana.txt', // Older, alphabetical
-      'older_zebra.txt', // Older, alphabetical
-    ]);
-  });
-
-  it('should sort only recent files by mtime descending', () => {
-    const recentTime1 = new Date(nowTimestamp - 1000); // Newest
-    const recentTime2 = new Date(nowTimestamp - 2000);
-    const recentTime3 = new Date(nowTimestamp - 3000); // Oldest recent
-
-    const entries: GlobPath[] = [
-      createFileEntry('c.txt', recentTime2),
-      createFileEntry('a.txt', recentTime3),
-      createFileEntry('b.txt', recentTime1),
-    ];
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    expect(sorted.map((e) => e.fullpath())).toEqual([
-      'b.txt',
-      'c.txt',
-      'a.txt',
-    ]);
-  });
-
-  it('should sort only older files alphabetically by path', () => {
-    const olderTime = new Date(nowTimestamp - 2 * oneDayInMs); // All equally old
-    const entries: GlobPath[] = [
-      createFileEntry('zebra.txt', olderTime),
-      createFileEntry('apple.txt', olderTime),
-      createFileEntry('banana.txt', olderTime),
-    ];
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    expect(sorted.map((e) => e.fullpath())).toEqual([
-      'apple.txt',
-      'banana.txt',
-      'zebra.txt',
-    ]);
-  });
-
-  it('should handle an empty array', () => {
-    const entries: GlobPath[] = [];
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    expect(sorted).toEqual([]);
-  });
-
-  it('should correctly sort files when mtimes are identical for older files', () => {
-    const olderTime = new Date(nowTimestamp - 2 * oneDayInMs);
-    const entries: GlobPath[] = [
-      createFileEntry('b.txt', olderTime),
-      createFileEntry('a.txt', olderTime),
-    ];
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    expect(sorted.map((e) => e.fullpath())).toEqual(['a.txt', 'b.txt']);
-  });
-
-  it('should correctly sort files when mtimes are identical for recent files (maintaining mtime sort)', () => {
-    const recentTime = new Date(nowTimestamp - 1000);
-    const entries: GlobPath[] = [
-      createFileEntry('b.txt', recentTime),
-      createFileEntry('a.txt', recentTime),
-    ];
-    const sorted = sortFileEntries(entries, nowTimestamp, oneDayInMs);
-    expect(sorted.map((e) => e.fullpath())).toContain('a.txt');
-    expect(sorted.map((e) => e.fullpath())).toContain('b.txt');
-    expect(sorted.length).toBe(2);
-  });
-
-  it('should use recencyThresholdMs parameter correctly', () => {
-    const justOverThreshold = new Date(nowTimestamp - (1000 + 1)); // Barely older
-    const justUnderThreshold = new Date(nowTimestamp - (1000 - 1)); // Barely recent
-    const customThresholdMs = 1000; // 1 second
-
-    const entries: GlobPath[] = [
-      createFileEntry('older_file.txt', justOverThreshold),
-      createFileEntry('recent_file.txt', justUnderThreshold),
-    ];
-    const sorted = sortFileEntries(entries, nowTimestamp, customThresholdMs);
-    expect(sorted.map((e) => e.fullpath())).toEqual([
-      'recent_file.txt',
-      'older_file.txt',
-    ]);
-  });
+      if (expected) {
+        expect(sortedPaths).toEqual(expected);
+      } else if (expectedUnordered) {
+        expect(sortedPaths).toHaveLength(expectedUnordered.length);
+        for (const path of expectedUnordered) {
+          expect(sortedPaths).toContain(path);
+        }
+      } else {
+        throw new Error('Test case must have expected or expectedUnordered');
+      }
+    },
+  );
 });

From cca5a12839e5fb257d337c4ce1de35b70c555c69 Mon Sep 17 00:00:00 2001
From: Tekkali Rahul Lakshmi Subrahmanyam
 <159406511+Tekrah123@users.noreply.github.com>
Date: Tue, 28 Oct 2025 20:06:41 +0530
Subject: [PATCH 50/73] docs: update installation section in README (#12035)

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f2c5fe3225..494d059cd3 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,11 @@ Learn all about Gemini CLI in our [documentation](https://geminicli.com/docs/).
 
 ## 📦 Installation
 
+### Pre-requisites before installation
+
+- Node.js version 20 or higher
+- macOS, Linux, or Windows
+
 ### Quick Install
 
 #### Run instantly with npx
@@ -48,11 +53,6 @@ npm install -g @google/gemini-cli
 brew install gemini-cli
 ```
 
-#### System Requirements
-
-- Node.js version 20 or higher
-- macOS, Linux, or Windows
-
 ## Release Cadence and Tags
 
 See [Releases](./docs/releases.md) for more details.

From 034ca93907ececea1c675e2f4feb1cb74b5d965f Mon Sep 17 00:00:00 2001
From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:07:57 -0400
Subject: [PATCH 51/73] =?UTF-8?q?Revert=20"feat(ID=20token=20support):=20A?=
 =?UTF-8?q?dd=20ID=20token=20support=20for=20authenticating=20to=20MC?=
 =?UTF-8?q?=E2=80=A6"=20(#12162)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/tools/mcp-server.md                      | 25 ------
 packages/core/src/config/config.ts            |  2 -
 .../core/src/mcp/google-auth-provider.test.ts | 88 +------------------
 packages/core/src/mcp/google-auth-provider.ts | 57 ++----------
 4 files changed, 8 insertions(+), 164 deletions(-)

diff --git a/docs/tools/mcp-server.md b/docs/tools/mcp-server.md
index 685a637cf8..47f169ba38 100644
--- a/docs/tools/mcp-server.md
+++ b/docs/tools/mcp-server.md
@@ -150,11 +150,6 @@ Each server configuration supports the following properties:
   server. Tools listed here will not be available to the model, even if they are
   exposed by the server. **Note:** `excludeTools` takes precedence over
   `includeTools` - if a tool is in both lists, it will be excluded.
-- **`allow_unscoped_id_tokens_cloud_run`** (boolean): When `true` and the MCP
-  server host is a Cloud Run service (`*.run.app`), the CLI will use Google
-  Application Default Credentials (ADC) to generate an unscoped ID token and
-  send it as `Authorization: Bearer <token>`. When using this flag, do not set
-  OAuth scopes; they are not needed.
 - **`targetAudience`** (string): The OAuth Client ID allowlisted on the
   IAP-protected application you are trying to access. Used with
   `authProviderType: 'service_account_impersonation'`.
@@ -286,26 +281,6 @@ property:
 }
 ```
 
-#### Google Credential with Cloud Run ID tokens
-
-When connecting to a Cloud Run service endpoint (`*.run.app`), you must opt into
-ID token based authentication using ADC. Note that the generated ID token is
-unscoped.
-
-```json
-{
-  "mcpServers": {
-    "googleCloudServer": {
-      "url": "https://my-gcp-service.run.app/sse",
-      "authProviderType": "google_credentials",
-      "allow_unscoped_id_tokens_cloud_run": true
-    }
-  }
-}
-```
-
-Note: Only `*.run.app` hosts are supported for this flag.
-
 #### Service Account Impersonation
 
 To authenticate with a server using Service Account Impersonation, you must set
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 860a166f21..37f5f85641 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -189,8 +189,6 @@ export class MCPServerConfig {
     // OAuth configuration
     readonly oauth?: MCPOAuthConfig,
     readonly authProviderType?: AuthProviderType,
-    // When true, use Google ADC to fetch ID tokens for Cloud Run
-    readonly allow_unscoped_id_tokens_cloud_run?: boolean,
     // Service Account Configuration
     /* targetAudience format: CLIENT_ID.apps.googleusercontent.com */
     readonly targetAudience?: string,
diff --git a/packages/core/src/mcp/google-auth-provider.test.ts b/packages/core/src/mcp/google-auth-provider.test.ts
index ce86d7a2ab..efe959ff3c 100644
--- a/packages/core/src/mcp/google-auth-provider.test.ts
+++ b/packages/core/src/mcp/google-auth-provider.test.ts
@@ -20,16 +20,12 @@ describe('GoogleCredentialProvider', () => {
     },
   } as MCPServerConfig;
 
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
   it('should throw an error if no scopes are provided', () => {
     const config = {
       url: 'https://test.googleapis.com',
     } as MCPServerConfig;
     expect(() => new GoogleCredentialProvider(config)).toThrow(
-      'Scopes must be provided in the oauth config for Google Credentials provider (or enable allow_unscoped_id_tokens_for_cloud_run to use ID tokens for Cloud Run endpoints)',
+      'Scopes must be provided in the oauth config for Google Credentials provider',
     );
   });
 
@@ -84,19 +80,7 @@ describe('GoogleCredentialProvider', () => {
     );
   });
 
-  it('should not allow run.app host even when unscoped ID token flag is not present', () => {
-    const config = {
-      url: 'https://test.run.app',
-      oauth: {
-        scopes: ['scope1', 'scope2'],
-      },
-    } as MCPServerConfig;
-    expect(() => new GoogleCredentialProvider(config)).toThrow(
-      'To enable the Cloud Run MCP Server at https://test.run.app please set allow_unscoped_id_tokens_cloud_run:true in the MCP Server config.',
-    );
-  });
-
-  describe('with provider instance (Access Tokens)', () => {
+  describe('with provider instance', () => {
     let provider: GoogleCredentialProvider;
     let mockGetAccessToken: Mock;
     let mockClient: {
@@ -170,72 +154,4 @@ describe('GoogleCredentialProvider', () => {
       vi.useRealTimers();
     });
   });
-
-  describe('ID token flow (allow_unscoped_id_tokens_cloud_run)', () => {
-    let mockFetchIdToken: Mock;
-    let mockIdClient: {
-      idTokenProvider: {
-        fetchIdToken: Mock;
-      };
-    };
-
-    beforeEach(() => {
-      mockFetchIdToken = vi.fn();
-      mockIdClient = {
-        idTokenProvider: {
-          fetchIdToken: mockFetchIdToken,
-        },
-      };
-      (GoogleAuth.prototype.getIdTokenClient as Mock).mockResolvedValue(
-        mockIdClient,
-      );
-    });
-
-    it('should return ID token when flag is enabled and derive audience from hostname', async () => {
-      const config = {
-        url: 'https://test.run.app/path',
-        allow_unscoped_id_tokens_cloud_run: true,
-      } as MCPServerConfig;
-      const payload = { exp: Math.floor(Date.now() / 1000) + 3600 };
-      const validToken = `header.${Buffer.from(JSON.stringify(payload)).toString('base64')}.signature`;
-      mockFetchIdToken.mockResolvedValue(validToken);
-
-      const provider = new GoogleCredentialProvider(config);
-      const tokens = await provider.tokens();
-      expect(tokens?.access_token).toBe(validToken);
-      expect(GoogleAuth.prototype.getIdTokenClient).toHaveBeenCalledWith(
-        'test.run.app',
-      );
-      expect(mockFetchIdToken).toHaveBeenCalledWith('test.run.app');
-    });
-
-    it('should return undefined and log error when fetching ID token fails', async () => {
-      const config = {
-        url: 'https://test.run.app/path',
-        allow_unscoped_id_tokens_cloud_run: true,
-      } as MCPServerConfig;
-      const consoleErrorSpy = vi
-        .spyOn(console, 'error')
-        .mockImplementation(() => {});
-      mockFetchIdToken.mockRejectedValue(new Error('Fetch failed'));
-
-      const provider = new GoogleCredentialProvider(config);
-      const tokens = await provider.tokens();
-      expect(tokens).toBeUndefined();
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        'Failed to get ID token from Google ADC',
-        expect.any(Error),
-      );
-      consoleErrorSpy.mockRestore();
-    });
-
-    it('should not require scopes when flag allow_unscoped_id_tokens_cloud_run is true', () => {
-      const config = {
-        url: 'https://test.run.app',
-        allow_unscoped_id_tokens_cloud_run: true,
-      } as MCPServerConfig;
-
-      expect(() => new GoogleCredentialProvider(config)).not.toThrow();
-    });
-  });
 });
diff --git a/packages/core/src/mcp/google-auth-provider.ts b/packages/core/src/mcp/google-auth-provider.ts
index 3159798095..d152b4d256 100644
--- a/packages/core/src/mcp/google-auth-provider.ts
+++ b/packages/core/src/mcp/google-auth-provider.ts
@@ -13,17 +13,12 @@ import type {
 } from '@modelcontextprotocol/sdk/shared/auth.js';
 import { GoogleAuth } from 'google-auth-library';
 import type { MCPServerConfig } from '../config/config.js';
-import { OAuthUtils, FIVE_MIN_BUFFER_MS } from './oauth-utils.js';
+import { FIVE_MIN_BUFFER_MS } from './oauth-utils.js';
 
-const CLOUD_RUN_HOST_REGEX = /^(.*\.)?run\.app$/;
-
-// An array of hosts that are allowed to use the Google Credential provider.
 const ALLOWED_HOSTS = [/^.+\.googleapis\.com$/, /^(.*\.)?luci\.app$/];
 
 export class GoogleCredentialProvider implements OAuthClientProvider {
   private readonly auth: GoogleAuth;
-  private readonly useIdToken: boolean = false;
-  private readonly audience?: string;
   private cachedToken?: OAuthTokens;
   private tokenExpiryTime?: number;
 
@@ -47,35 +42,20 @@ export class GoogleCredentialProvider implements OAuthClientProvider {
     }
 
     const hostname = new URL(url).hostname;
-    const isRunAppHost = CLOUD_RUN_HOST_REGEX.test(hostname);
-    if (!this.config?.allow_unscoped_id_tokens_cloud_run && isRunAppHost) {
-      throw new Error(
-        `To enable the Cloud Run MCP Server at ${url} please set allow_unscoped_id_tokens_cloud_run:true in the MCP Server config.`,
-      );
-    }
-    if (this.config?.allow_unscoped_id_tokens_cloud_run && isRunAppHost) {
-      this.useIdToken = true;
-    }
-    this.audience = hostname;
-
-    if (
-      !this.useIdToken &&
-      !ALLOWED_HOSTS.some((pattern) => pattern.test(hostname))
-    ) {
+    if (!ALLOWED_HOSTS.some((pattern) => pattern.test(hostname))) {
       throw new Error(
         `Host "${hostname}" is not an allowed host for Google Credential provider.`,
       );
     }
 
-    // If we are using the access token flow, we MUST have scopes.
-    if (!this.useIdToken && !this.config?.oauth?.scopes) {
+    const scopes = this.config?.oauth?.scopes;
+    if (!scopes || scopes.length === 0) {
       throw new Error(
-        'Scopes must be provided in the oauth config for Google Credentials provider (or enable allow_unscoped_id_tokens_for_cloud_run to use ID tokens for Cloud Run endpoints)',
+        'Scopes must be provided in the oauth config for Google Credentials provider',
       );
     }
-
     this.auth = new GoogleAuth({
-      scopes: this.config?.oauth?.scopes,
+      scopes,
     });
   }
 
@@ -101,31 +81,6 @@ export class GoogleCredentialProvider implements OAuthClientProvider {
     this.cachedToken = undefined;
     this.tokenExpiryTime = undefined;
 
-    // If allow_unscoped_id_tokens_for_cloud_run is configured, use ID tokens.
-    if (this.useIdToken) {
-      try {
-        const idClient = await this.auth.getIdTokenClient(this.audience!);
-        const idToken = await idClient.idTokenProvider.fetchIdToken(
-          this.audience!,
-        );
-
-        const newToken: OAuthTokens = {
-          access_token: idToken,
-          token_type: 'Bearer',
-        };
-
-        const expiryTime = OAuthUtils.parseTokenExpiry(idToken);
-        if (expiryTime) {
-          this.tokenExpiryTime = expiryTime;
-          this.cachedToken = newToken;
-        }
-        return newToken;
-      } catch (e) {
-        console.error('Failed to get ID token from Google ADC', e);
-        return undefined;
-      }
-    }
-
     const client = await this.auth.getClient();
     const accessTokenResponse = await client.getAccessToken();
 

From d465a26e305865ce3f5a145d216466ecbe44de26 Mon Sep 17 00:00:00 2001
From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:26:39 -0400
Subject: [PATCH 52/73] chore(console): change console errors in
 sa-impersontation (#12165)

---
 .../core/src/mcp/sa-impersonation-provider.ts    | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/packages/core/src/mcp/sa-impersonation-provider.ts b/packages/core/src/mcp/sa-impersonation-provider.ts
index b9335e2622..2b9516d0d4 100644
--- a/packages/core/src/mcp/sa-impersonation-provider.ts
+++ b/packages/core/src/mcp/sa-impersonation-provider.ts
@@ -14,9 +14,12 @@ import { GoogleAuth } from 'google-auth-library';
 import { OAuthUtils, FIVE_MIN_BUFFER_MS } from './oauth-utils.js';
 import type { MCPServerConfig } from '../config/config.js';
 import type { OAuthClientProvider } from '@modelcontextprotocol/sdk/client/auth.js';
+import { coreEvents } from '../utils/events.js';
 
 function createIamApiUrl(targetSA: string): string {
-  return `https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/${encodeURIComponent(targetSA)}:generateIdToken`;
+  return `https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/${encodeURIComponent(
+    targetSA,
+  )}:generateIdToken`;
 }
 
 export class ServiceAccountImpersonationProvider
@@ -103,11 +106,18 @@ export class ServiceAccountImpersonationProvider
       idToken = res.data.token;
 
       if (!idToken || idToken.length === 0) {
-        console.error('Failed to get ID token from Google');
+        coreEvents.emitFeedback(
+          'error',
+          'Failed to obtain authentication token.',
+        );
         return undefined;
       }
     } catch (e) {
-      console.error('Failed to fetch ID token from Google:', e);
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to obtain authentication token.',
+        e as Error,
+      );
       return undefined;
     }
 

From 25f27509c0f4512455c88213d75c691858919aca Mon Sep 17 00:00:00 2001
From: Shreya Keshive <shreyakeshive@google.com>
Date: Tue, 28 Oct 2025 08:59:39 -0700
Subject: [PATCH 53/73] revert nightly schedule (#11653)

---
 .github/workflows/release-nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml
index 7ef200a0f1..f4214aadcf 100644
--- a/.github/workflows/release-nightly.yml
+++ b/.github/workflows/release-nightly.yml
@@ -2,7 +2,7 @@ name: 'Release: Nightly'
 
 on:
   schedule:
-    - cron: '0 20 * * *'
+    - cron: '0 0 * * *'
   workflow_dispatch:
     inputs:
       dry_run:

From 1b302deefffbbc51f414b560a333025efc0c5e15 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Tue, 28 Oct 2025 09:04:30 -0700
Subject: [PATCH 54/73] Add ExtensionLoader interface, use that on Config
 object (#12116)

---
 packages/a2a-server/src/agent/executor.ts     |  11 +-
 packages/a2a-server/src/config/config.ts      |   9 +-
 packages/a2a-server/src/http/app.ts           |   7 +-
 .../cli/src/commands/extensions/disable.ts    |   3 +-
 .../cli/src/commands/extensions/enable.ts     |   4 +-
 .../src/commands/extensions/install.test.ts   |   1 +
 .../cli/src/commands/extensions/install.ts    |   3 +-
 packages/cli/src/commands/extensions/link.ts  |   3 +-
 packages/cli/src/commands/extensions/list.ts  |   2 +-
 .../cli/src/commands/extensions/uninstall.ts  |   3 +-
 .../cli/src/commands/extensions/update.ts     |   2 +-
 packages/cli/src/commands/mcp/list.ts         |   2 +-
 packages/cli/src/config/config.test.ts        | 388 ++++++++----------
 packages/cli/src/config/config.ts             |  26 +-
 packages/cli/src/config/extension-manager.ts  | 224 +++++-----
 packages/cli/src/config/extension.test.ts     | 115 ++++--
 .../cli/src/config/extensions/github.test.ts  |   2 +-
 packages/cli/src/config/extensions/github.ts  |   8 +-
 .../cli/src/config/extensions/update.test.ts  |  46 ++-
 packages/cli/src/config/extensions/update.ts  |  20 +-
 packages/cli/src/config/settings.test.ts      |   4 +-
 packages/cli/src/gemini.tsx                   |  27 +-
 packages/cli/src/ui/AppContainer.test.tsx     |  12 +
 packages/cli/src/ui/AppContainer.tsx          |  25 +-
 .../cli/src/ui/commands/directoryCommand.tsx  |   2 +-
 .../cli/src/ui/commands/memoryCommand.test.ts |   3 +
 packages/cli/src/ui/commands/memoryCommand.ts |   2 +-
 .../src/ui/hooks/useExtensionUpdates.test.tsx |  44 +-
 .../cli/src/ui/hooks/useExtensionUpdates.ts   |   2 +-
 .../cli/src/zed-integration/zedIntegration.ts |  14 +-
 packages/core/src/config/config.ts            |  17 +-
 packages/core/src/index.ts                    |   1 +
 packages/core/src/utils/extensionLoader.ts    |  48 +++
 .../core/src/utils/memoryDiscovery.test.ts    |  37 +-
 packages/core/src/utils/memoryDiscovery.ts    |   7 +-
 35 files changed, 619 insertions(+), 505 deletions(-)
 create mode 100644 packages/core/src/utils/extensionLoader.ts

diff --git a/packages/a2a-server/src/agent/executor.ts b/packages/a2a-server/src/agent/executor.ts
index a5360a94b7..1e3afd6e81 100644
--- a/packages/a2a-server/src/agent/executor.ts
+++ b/packages/a2a-server/src/agent/executor.ts
@@ -17,7 +17,10 @@ import type {
   ServerGeminiToolCallRequestEvent,
   Config,
 } from '@google/gemini-cli-core';
-import { GeminiEventType } from '@google/gemini-cli-core';
+import {
+  GeminiEventType,
+  SimpleExtensionLoader,
+} from '@google/gemini-cli-core';
 import { v4 as uuidv4 } from 'uuid';
 
 import { logger } from '../utils/logger.js';
@@ -96,7 +99,11 @@ export class CoderAgentExecutor implements AgentExecutor {
     loadEnvironment(); // Will override any global env with workspace envs
     const settings = loadSettings(workspaceRoot);
     const extensions = loadExtensions(workspaceRoot);
-    return await loadConfig(settings, extensions, taskId);
+    return await loadConfig(
+      settings,
+      new SimpleExtensionLoader(extensions),
+      taskId,
+    );
   }
 
   /**
diff --git a/packages/a2a-server/src/config/config.ts b/packages/a2a-server/src/config/config.ts
index c75c902ca5..5492bb9b0a 100644
--- a/packages/a2a-server/src/config/config.ts
+++ b/packages/a2a-server/src/config/config.ts
@@ -21,6 +21,7 @@ import {
   DEFAULT_GEMINI_EMBEDDING_MODEL,
   DEFAULT_GEMINI_MODEL,
   type GeminiCLIExtension,
+  type ExtensionLoader,
   debugLogger,
 } from '@google/gemini-cli-core';
 
@@ -30,10 +31,10 @@ import { type AgentSettings, CoderAgentEvent } from '../types.js';
 
 export async function loadConfig(
   settings: Settings,
-  extensions: GeminiCLIExtension[],
+  extensionLoader: ExtensionLoader,
   taskId: string,
 ): Promise<Config> {
-  const mcpServers = mergeMcpServers(settings, extensions);
+  const mcpServers = mergeMcpServers(settings, extensionLoader.getExtensions());
   const workspaceDir = process.cwd();
   const adcFilePath = process.env['GOOGLE_APPLICATION_CREDENTIALS'];
 
@@ -71,7 +72,7 @@ export async function loadConfig(
     },
     ideMode: false,
     folderTrust: settings.folderTrust === true,
-    extensions,
+    extensionLoader,
   };
 
   const fileService = new FileDiscoveryService(workspaceDir);
@@ -80,7 +81,7 @@ export async function loadConfig(
     [workspaceDir],
     false,
     fileService,
-    extensions,
+    extensionLoader,
     settings.folderTrust === true,
   );
   configParams.userMemory = memoryContent;
diff --git a/packages/a2a-server/src/http/app.ts b/packages/a2a-server/src/http/app.ts
index e7b45d347c..89bfa2cf25 100644
--- a/packages/a2a-server/src/http/app.ts
+++ b/packages/a2a-server/src/http/app.ts
@@ -20,6 +20,7 @@ import { loadConfig, loadEnvironment, setTargetDir } from '../config/config.js';
 import { loadSettings } from '../config/settings.js';
 import { loadExtensions } from '../config/extension.js';
 import { commandRegistry } from '../commands/command-registry.js';
+import { SimpleExtensionLoader } from '@google/gemini-cli-core';
 
 const coderAgentCard: AgentCard = {
   name: 'Gemini SDLC Agent',
@@ -70,7 +71,11 @@ export async function createApp() {
     loadEnvironment();
     const settings = loadSettings(workspaceRoot);
     const extensions = loadExtensions(workspaceRoot);
-    const config = await loadConfig(settings, extensions, 'a2a-server');
+    const config = await loadConfig(
+      settings,
+      new SimpleExtensionLoader(extensions),
+      'a2a-server',
+    );
 
     // loadEnvironment() is called within getConfig now
     const bucketName = process.env['GCS_BUCKET_NAME'];
diff --git a/packages/cli/src/commands/extensions/disable.ts b/packages/cli/src/commands/extensions/disable.ts
index 184d11a410..40bed33f83 100644
--- a/packages/cli/src/commands/extensions/disable.ts
+++ b/packages/cli/src/commands/extensions/disable.ts
@@ -23,8 +23,9 @@ export function handleDisable(args: DisableArgs) {
     workspaceDir,
     requestConsent: requestConsentNonInteractive,
     requestSetting: promptForSetting,
-    loadedSettings: loadSettings(workspaceDir),
+    settings: loadSettings(workspaceDir).merged,
   });
+  extensionManager.loadExtensions();
 
   try {
     if (args.scope?.toLowerCase() === 'workspace') {
diff --git a/packages/cli/src/commands/extensions/enable.ts b/packages/cli/src/commands/extensions/enable.ts
index 43523af372..468353f6a1 100644
--- a/packages/cli/src/commands/extensions/enable.ts
+++ b/packages/cli/src/commands/extensions/enable.ts
@@ -26,8 +26,10 @@ export function handleEnable(args: EnableArgs) {
     workspaceDir: workingDir,
     requestConsent: requestConsentNonInteractive,
     requestSetting: promptForSetting,
-    loadedSettings: loadSettings(workingDir),
+    settings: loadSettings(workingDir).merged,
   });
+  extensionManager.loadExtensions();
+
   try {
     if (args.scope?.toLowerCase() === 'workspace') {
       extensionManager.enableExtension(args.name, SettingScope.Workspace);
diff --git a/packages/cli/src/commands/extensions/install.test.ts b/packages/cli/src/commands/extensions/install.test.ts
index 7348bf89ec..1e5ff94eb6 100644
--- a/packages/cli/src/commands/extensions/install.test.ts
+++ b/packages/cli/src/commands/extensions/install.test.ts
@@ -23,6 +23,7 @@ vi.mock('../../config/extension-manager.ts', async (importOriginal) => {
     ...actual,
     ExtensionManager: vi.fn().mockImplementation(() => ({
       installOrUpdateExtension: mockInstallOrUpdateExtension,
+      loadExtensions: vi.fn(),
     })),
   };
 });
diff --git a/packages/cli/src/commands/extensions/install.ts b/packages/cli/src/commands/extensions/install.ts
index 13c59a1855..95d2e17b7a 100644
--- a/packages/cli/src/commands/extensions/install.ts
+++ b/packages/cli/src/commands/extensions/install.ts
@@ -74,8 +74,9 @@ export async function handleInstall(args: InstallArgs) {
       workspaceDir,
       requestConsent,
       requestSetting: promptForSetting,
-      loadedSettings: loadSettings(workspaceDir),
+      settings: loadSettings(workspaceDir).merged,
     });
+    extensionManager.loadExtensions();
     const name =
       await extensionManager.installOrUpdateExtension(installMetadata);
     debugLogger.log(`Extension "${name}" installed successfully and enabled.`);
diff --git a/packages/cli/src/commands/extensions/link.ts b/packages/cli/src/commands/extensions/link.ts
index 9f0693cd7e..69c18d8bbe 100644
--- a/packages/cli/src/commands/extensions/link.ts
+++ b/packages/cli/src/commands/extensions/link.ts
@@ -31,8 +31,9 @@ export async function handleLink(args: InstallArgs) {
       workspaceDir,
       requestConsent: requestConsentNonInteractive,
       requestSetting: promptForSetting,
-      loadedSettings: loadSettings(workspaceDir),
+      settings: loadSettings(workspaceDir).merged,
     });
+    extensionManager.loadExtensions();
     const extensionName =
       await extensionManager.installOrUpdateExtension(installMetadata);
     debugLogger.log(
diff --git a/packages/cli/src/commands/extensions/list.ts b/packages/cli/src/commands/extensions/list.ts
index 432299c902..a0b31e45f3 100644
--- a/packages/cli/src/commands/extensions/list.ts
+++ b/packages/cli/src/commands/extensions/list.ts
@@ -19,7 +19,7 @@ export async function handleList() {
       workspaceDir,
       requestConsent: requestConsentNonInteractive,
       requestSetting: promptForSetting,
-      loadedSettings: loadSettings(workspaceDir),
+      settings: loadSettings(workspaceDir).merged,
     });
     const extensions = extensionManager.loadExtensions();
     if (extensions.length === 0) {
diff --git a/packages/cli/src/commands/extensions/uninstall.ts b/packages/cli/src/commands/extensions/uninstall.ts
index 59dc8c828f..91242fe3a1 100644
--- a/packages/cli/src/commands/extensions/uninstall.ts
+++ b/packages/cli/src/commands/extensions/uninstall.ts
@@ -23,8 +23,9 @@ export async function handleUninstall(args: UninstallArgs) {
       workspaceDir,
       requestConsent: requestConsentNonInteractive,
       requestSetting: promptForSetting,
-      loadedSettings: loadSettings(workspaceDir),
+      settings: loadSettings(workspaceDir).merged,
     });
+    extensionManager.loadExtensions();
     await extensionManager.uninstallExtension(args.name, false);
     debugLogger.log(`Extension "${args.name}" successfully uninstalled.`);
   } catch (error) {
diff --git a/packages/cli/src/commands/extensions/update.ts b/packages/cli/src/commands/extensions/update.ts
index 5523149f18..b5c1620810 100644
--- a/packages/cli/src/commands/extensions/update.ts
+++ b/packages/cli/src/commands/extensions/update.ts
@@ -34,7 +34,7 @@ export async function handleUpdate(args: UpdateArgs) {
     workspaceDir,
     requestConsent: requestConsentNonInteractive,
     requestSetting: promptForSetting,
-    loadedSettings: loadSettings(workspaceDir),
+    settings: loadSettings(workspaceDir).merged,
   });
 
   const extensions = extensionManager.loadExtensions();
diff --git a/packages/cli/src/commands/mcp/list.ts b/packages/cli/src/commands/mcp/list.ts
index 3253641894..9e41964d17 100644
--- a/packages/cli/src/commands/mcp/list.ts
+++ b/packages/cli/src/commands/mcp/list.ts
@@ -28,7 +28,7 @@ async function getMcpServersFromConfig(): Promise<
 > {
   const settings = loadSettings();
   const extensionManager = new ExtensionManager({
-    loadedSettings: settings,
+    settings: settings.merged,
     workspaceDir: process.cwd(),
     requestConsent: requestConsentNonInteractive,
     requestSetting: promptForSetting,
diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index ebf448930f..c6e8a71458 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -12,15 +12,16 @@ import {
   DEFAULT_GEMINI_MODEL,
   DEFAULT_GEMINI_MODEL_AUTO,
   OutputFormat,
-  type GeminiCLIExtension,
   SHELL_TOOL_NAME,
   WRITE_FILE_TOOL_NAME,
   EDIT_TOOL_NAME,
+  type ExtensionLoader,
 } from '@google/gemini-cli-core';
 import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
 import type { Settings } from './settings.js';
 import * as ServerConfig from '@google/gemini-cli-core';
 import { isWorkspaceTrusted } from './trustedFolders.js';
+import { ExtensionManager } from './extension-manager.js';
 
 vi.mock('./trustedFolders.js', () => ({
   isWorkspaceTrusted: vi
@@ -97,11 +98,22 @@ vi.mock('@google/gemini-cli-core', async () => {
     },
     loadEnvironment: vi.fn(),
     loadServerHierarchicalMemory: vi.fn(
-      (cwd, dirs, debug, fileService, extensionPaths, _maxDirs) =>
-        Promise.resolve({
-          memoryContent: extensionPaths?.join(',') || '',
+      (
+        cwd,
+        dirs,
+        debug,
+        fileService,
+        extensionLoader: ExtensionLoader,
+        _maxDirs,
+      ) => {
+        const extensionPaths = extensionLoader
+          .getExtensions()
+          .flatMap((e) => e.contextFiles);
+        return Promise.resolve({
+          memoryContent: extensionPaths.join(',') || '',
           fileCount: extensionPaths?.length || 0,
-        }),
+        });
+      },
     ),
     DEFAULT_MEMORY_FILE_FILTERING_OPTIONS: {
       respectGitIgnore: false,
@@ -114,6 +126,8 @@ vi.mock('@google/gemini-cli-core', async () => {
   };
 });
 
+vi.mock('./extension-manager.js');
+
 // Global setup to ensure clean environment for all tests in this file
 const originalArgv = process.argv;
 const originalGeminiModel = process.env['GEMINI_MODEL'];
@@ -509,6 +523,7 @@ describe('loadCliConfig', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -546,7 +561,7 @@ describe('loadCliConfig', () => {
       process.argv = ['node', 'script.js'];
       const argv = await parseArguments({} as Settings);
       const settings: Settings = {};
-      const config = await loadCliConfig(settings, [], 'test-session', argv);
+      const config = await loadCliConfig(settings, 'test-session', argv);
       expect(config.getProxy()).toBeFalsy();
     });
 
@@ -587,7 +602,7 @@ describe('loadCliConfig', () => {
         process.argv = ['node', 'script.js'];
         const argv = await parseArguments({} as Settings);
         const settings: Settings = {};
-        const config = await loadCliConfig(settings, [], 'test-session', argv);
+        const config = await loadCliConfig(settings, 'test-session', argv);
         expect(config.getProxy()).toBe(expected);
       });
     });
@@ -597,7 +612,7 @@ describe('loadCliConfig', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getFileFilteringRespectGitIgnore()).toBe(
       DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore,
     );
@@ -621,7 +636,7 @@ describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => {
   it('should pass extension context file paths to loadServerHierarchicalMemory', async () => {
     process.argv = ['node', 'script.js'];
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext1',
         name: 'ext1',
@@ -649,15 +664,15 @@ describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => {
         ],
         isActive: true,
       },
-    ];
+    ]);
     const argv = await parseArguments({} as Settings);
-    await loadCliConfig(settings, extensions, 'session-id', argv);
+    await loadCliConfig(settings, 'session-id', argv);
     expect(ServerConfig.loadServerHierarchicalMemory).toHaveBeenCalledWith(
       expect.any(String),
       [],
       false,
       expect.any(Object),
-      extensions,
+      expect.any(ExtensionManager),
       true,
       'tree',
       {
@@ -711,7 +726,8 @@ describe('mergeMcpServers', () => {
         },
       },
     };
-    const extensions: GeminiCLIExtension[] = [
+
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext1',
         name: 'ext1',
@@ -726,11 +742,11 @@ describe('mergeMcpServers', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     const originalSettings = JSON.parse(JSON.stringify(settings));
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    await loadCliConfig(settings, extensions, 'test-session', argv);
+    await loadCliConfig(settings, 'test-session', argv);
     expect(settings).toEqual(originalSettings);
   });
 });
@@ -744,6 +760,7 @@ describe('mergeExcludeTools', () => {
   const originalIsTTY = process.stdin.isTTY;
 
   beforeEach(() => {
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
     process.stdin.isTTY = true;
   });
 
@@ -753,7 +770,7 @@ describe('mergeExcludeTools', () => {
 
   it('should merge excludeTools from settings and extensions', async () => {
     const settings: Settings = { tools: { exclude: ['tool1', 'tool2'] } };
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext1',
         name: 'ext1',
@@ -772,12 +789,12 @@ describe('mergeExcludeTools', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       settings,
-      extensions,
+
       'test-session',
       argv,
     );
@@ -789,7 +806,7 @@ describe('mergeExcludeTools', () => {
 
   it('should handle overlapping excludeTools between settings and extensions', async () => {
     const settings: Settings = { tools: { exclude: ['tool1', 'tool2'] } };
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext1',
         name: 'ext1',
@@ -799,15 +816,10 @@ describe('mergeExcludeTools', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual(
       expect.arrayContaining(['tool1', 'tool2', 'tool3']),
     );
@@ -816,7 +828,7 @@ describe('mergeExcludeTools', () => {
 
   it('should handle overlapping excludeTools between extensions', async () => {
     const settings: Settings = { tools: { exclude: ['tool1'] } };
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext1',
         name: 'ext1',
@@ -835,15 +847,10 @@ describe('mergeExcludeTools', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual(
       expect.arrayContaining(['tool1', 'tool2', 'tool3', 'tool4']),
     );
@@ -853,30 +860,18 @@ describe('mergeExcludeTools', () => {
   it('should return an empty array when no excludeTools are specified and it is interactive', async () => {
     process.stdin.isTTY = true;
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual([]);
   });
 
   it('should return default excludes when no excludeTools are specified and it is not interactive', async () => {
     process.stdin.isTTY = false;
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
     process.argv = ['node', 'script.js', '-p', 'test'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual(defaultExcludes);
   });
 
@@ -884,13 +879,8 @@ describe('mergeExcludeTools', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { tools: { exclude: ['tool1', 'tool2'] } };
-    const extensions: GeminiCLIExtension[] = [];
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual(
       expect.arrayContaining(['tool1', 'tool2']),
     );
@@ -899,7 +889,7 @@ describe('mergeExcludeTools', () => {
 
   it('should handle extensions with excludeTools but no settings', async () => {
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext',
         name: 'ext1',
@@ -909,15 +899,10 @@ describe('mergeExcludeTools', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getExcludeTools()).toEqual(
       expect.arrayContaining(['tool1', 'tool2']),
     );
@@ -926,7 +911,7 @@ describe('mergeExcludeTools', () => {
 
   it('should not modify the original settings object', async () => {
     const settings: Settings = { tools: { exclude: ['tool1'] } };
-    const extensions: GeminiCLIExtension[] = [
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([
       {
         path: '/path/to/ext',
         name: 'ext1',
@@ -936,11 +921,11 @@ describe('mergeExcludeTools', () => {
         contextFiles: [],
         isActive: true,
       },
-    ];
+    ]);
     const originalSettings = JSON.parse(JSON.stringify(settings));
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    await loadCliConfig(settings, extensions, 'test-session', argv);
+    await loadCliConfig(settings, 'test-session', argv);
     expect(settings).toEqual(originalSettings);
   });
 });
@@ -954,6 +939,7 @@ describe('Approval mode tool exclusion logic', () => {
       isTrusted: true,
       source: undefined,
     });
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -964,14 +950,7 @@ describe('Approval mode tool exclusion logic', () => {
     process.argv = ['node', 'script.js', '-p', 'test'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
-
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).toContain(SHELL_TOOL_NAME);
@@ -990,14 +969,8 @@ describe('Approval mode tool exclusion logic', () => {
     ];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
 
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).toContain(SHELL_TOOL_NAME);
@@ -1016,14 +989,8 @@ describe('Approval mode tool exclusion logic', () => {
     ];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
 
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).toContain(SHELL_TOOL_NAME);
@@ -1042,14 +1009,8 @@ describe('Approval mode tool exclusion logic', () => {
     ];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
 
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).not.toContain(SHELL_TOOL_NAME);
@@ -1061,14 +1022,8 @@ describe('Approval mode tool exclusion logic', () => {
     process.argv = ['node', 'script.js', '--yolo', '-p', 'test'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
 
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).not.toContain(SHELL_TOOL_NAME);
@@ -1091,14 +1046,8 @@ describe('Approval mode tool exclusion logic', () => {
       process.argv = testCase.args;
       const argv = await parseArguments({} as Settings);
       const settings: Settings = {};
-      const extensions: GeminiCLIExtension[] = [];
 
-      const config = await loadCliConfig(
-        settings,
-        extensions,
-        'test-session',
-        argv,
-      );
+      const config = await loadCliConfig(settings, 'test-session', argv);
 
       const excludedTools = config.getExcludeTools();
       expect(excludedTools).not.toContain(SHELL_TOOL_NAME);
@@ -1118,14 +1067,8 @@ describe('Approval mode tool exclusion logic', () => {
     ];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { tools: { exclude: ['custom_tool'] } };
-    const extensions: GeminiCLIExtension[] = [];
 
-    const config = await loadCliConfig(
-      settings,
-      extensions,
-      'test-session',
-      argv,
-    );
+    const config = await loadCliConfig(settings, 'test-session', argv);
 
     const excludedTools = config.getExcludeTools();
     expect(excludedTools).toContain('custom_tool'); // From settings
@@ -1142,11 +1085,8 @@ describe('Approval mode tool exclusion logic', () => {
         disableYoloMode: true,
       },
     };
-    const extensions: GeminiCLIExtension[] = [];
 
-    await expect(
-      loadCliConfig(settings, extensions, 'test-session', argv),
-    ).rejects.toThrow(
+    await expect(loadCliConfig(settings, 'test-session', argv)).rejects.toThrow(
       'Cannot start in YOLO mode when it is disabled by settings',
     );
   });
@@ -1161,14 +1101,8 @@ describe('Approval mode tool exclusion logic', () => {
     };
 
     const settings: Settings = {};
-    const extensions: GeminiCLIExtension[] = [];
     await expect(
-      loadCliConfig(
-        settings,
-        extensions,
-        'test-session',
-        invalidArgv as CliArgs,
-      ),
+      loadCliConfig(settings, 'test-session', invalidArgv as CliArgs),
     ).rejects.toThrow(
       'Invalid approval mode: invalid_mode. Valid values are: yolo, auto_edit, default',
     );
@@ -1180,6 +1114,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1198,7 +1133,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
   it('should allow all MCP servers if the flag is not provided', async () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(baseSettings, [], 'test-session', argv);
+    const config = await loadCliConfig(baseSettings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual(baseSettings.mcpServers);
   });
 
@@ -1210,7 +1145,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
       'server1',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(baseSettings, [], 'test-session', argv);
+    const config = await loadCliConfig(baseSettings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server1: { url: 'http://localhost:8080' },
     });
@@ -1226,7 +1161,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
       'server3',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(baseSettings, [], 'test-session', argv);
+    const config = await loadCliConfig(baseSettings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server1: { url: 'http://localhost:8080' },
       server3: { url: 'http://localhost:8082' },
@@ -1243,7 +1178,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
       'server4',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(baseSettings, [], 'test-session', argv);
+    const config = await loadCliConfig(baseSettings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server1: { url: 'http://localhost:8080' },
     });
@@ -1252,7 +1187,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
   it('should allow no MCP servers if the flag is provided but empty', async () => {
     process.argv = ['node', 'script.js', '--allowed-mcp-server-names', ''];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(baseSettings, [], 'test-session', argv);
+    const config = await loadCliConfig(baseSettings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({});
   });
 
@@ -1263,7 +1198,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
       ...baseSettings,
       mcp: { allowed: ['server1', 'server2'] },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server1: { url: 'http://localhost:8080' },
       server2: { url: 'http://localhost:8081' },
@@ -1277,7 +1212,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
       ...baseSettings,
       mcp: { excluded: ['server1', 'server2'] },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server3: { url: 'http://localhost:8082' },
     });
@@ -1293,7 +1228,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
         allowed: ['server1', 'server2'],
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server2: { url: 'http://localhost:8081' },
     });
@@ -1314,7 +1249,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
         allowed: ['server2'],
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server1: { url: 'http://localhost:8080' },
     });
@@ -1337,7 +1272,7 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
         excluded: ['server3'], // Should be ignored
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getMcpServers()).toEqual({
       server2: { url: 'http://localhost:8081' },
       server3: { url: 'http://localhost:8082' },
@@ -1346,6 +1281,14 @@ describe('loadCliConfig with allowed-mcp-server-names', () => {
 });
 
 describe('loadCliConfig model selection', () => {
+  beforeEach(() => {
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
+  });
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
   it('selects a model from settings.json if provided', async () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
@@ -1355,7 +1298,6 @@ describe('loadCliConfig model selection', () => {
           name: 'gemini-2.5-pro',
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1370,7 +1312,6 @@ describe('loadCliConfig model selection', () => {
       {
         // No model set.
       },
-      [],
       'test-session',
       argv,
     );
@@ -1387,7 +1328,6 @@ describe('loadCliConfig model selection', () => {
           name: 'gemini-2.5-pro',
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1402,7 +1342,6 @@ describe('loadCliConfig model selection', () => {
       {
         // No model provided via settings.
       },
-      [],
       'test-session',
       argv,
     );
@@ -1412,6 +1351,14 @@ describe('loadCliConfig model selection', () => {
 });
 
 describe('loadCliConfig model selection with model router', () => {
+  beforeEach(() => {
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
+  });
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
   it('should use auto model when useModelRouter is true and no model is provided', async () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
@@ -1421,7 +1368,6 @@ describe('loadCliConfig model selection with model router', () => {
           useModelRouter: true,
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1438,7 +1384,6 @@ describe('loadCliConfig model selection with model router', () => {
           useModelRouter: false,
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1455,7 +1400,6 @@ describe('loadCliConfig model selection with model router', () => {
           useModelRouter: true,
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1475,7 +1419,6 @@ describe('loadCliConfig model selection with model router', () => {
           name: 'gemini-from-settings',
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1493,7 +1436,6 @@ describe('loadCliConfig model selection with model router', () => {
           useModelRouter: true,
         },
       },
-      [],
       'test-session',
       argv,
     );
@@ -1507,6 +1449,7 @@ describe('loadCliConfig folderTrust', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1524,7 +1467,7 @@ describe('loadCliConfig folderTrust', () => {
       },
     };
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getFolderTrust()).toBe(false);
   });
 
@@ -1538,7 +1481,7 @@ describe('loadCliConfig folderTrust', () => {
         },
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getFolderTrust()).toBe(true);
   });
 
@@ -1546,7 +1489,7 @@ describe('loadCliConfig folderTrust', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getFolderTrust()).toBe(false);
   });
 });
@@ -1559,6 +1502,7 @@ describe('loadCliConfig with includeDirectories', () => {
     vi.spyOn(process, 'cwd').mockReturnValue(
       path.resolve(path.sep, 'home', 'user', 'project'),
     );
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1583,7 +1527,7 @@ describe('loadCliConfig with includeDirectories', () => {
         ],
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     const expected = [
       mockCwd,
       path.resolve(path.sep, 'cli', 'path1'),
@@ -1606,6 +1550,7 @@ describe('loadCliConfig chatCompression', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1623,7 +1568,7 @@ describe('loadCliConfig chatCompression', () => {
         },
       },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getChatCompression()).toEqual({
       contextPercentageThreshold: 0.5,
     });
@@ -1633,7 +1578,7 @@ describe('loadCliConfig chatCompression', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getChatCompression()).toBeUndefined();
   });
 });
@@ -1643,6 +1588,7 @@ describe('loadCliConfig useRipgrep', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1654,7 +1600,7 @@ describe('loadCliConfig useRipgrep', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getUseRipgrep()).toBe(true);
   });
 
@@ -1662,7 +1608,7 @@ describe('loadCliConfig useRipgrep', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { tools: { useRipgrep: false } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getUseRipgrep()).toBe(false);
   });
 
@@ -1670,7 +1616,7 @@ describe('loadCliConfig useRipgrep', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { tools: { useRipgrep: true } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getUseRipgrep()).toBe(true);
   });
 
@@ -1679,7 +1625,7 @@ describe('loadCliConfig useRipgrep', () => {
       process.argv = ['node', 'script.js'];
       const argv = await parseArguments({} as Settings);
       const settings: Settings = {};
-      const config = await loadCliConfig(settings, [], 'test-session', argv);
+      const config = await loadCliConfig(settings, 'test-session', argv);
       expect(config.getUseModelRouter()).toBe(true);
     });
 
@@ -1687,7 +1633,7 @@ describe('loadCliConfig useRipgrep', () => {
       process.argv = ['node', 'script.js'];
       const argv = await parseArguments({} as Settings);
       const settings: Settings = { experimental: { useModelRouter: true } };
-      const config = await loadCliConfig(settings, [], 'test-session', argv);
+      const config = await loadCliConfig(settings, 'test-session', argv);
       expect(config.getUseModelRouter()).toBe(true);
     });
 
@@ -1695,7 +1641,7 @@ describe('loadCliConfig useRipgrep', () => {
       process.argv = ['node', 'script.js'];
       const argv = await parseArguments({} as Settings);
       const settings: Settings = { experimental: { useModelRouter: false } };
-      const config = await loadCliConfig(settings, [], 'test-session', argv);
+      const config = await loadCliConfig(settings, 'test-session', argv);
       expect(config.getUseModelRouter()).toBe(false);
     });
   });
@@ -1706,6 +1652,7 @@ describe('screenReader configuration', () => {
     vi.resetAllMocks();
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1719,7 +1666,7 @@ describe('screenReader configuration', () => {
     const settings: Settings = {
       ui: { accessibility: { screenReader: true } },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getScreenReader()).toBe(true);
   });
 
@@ -1729,7 +1676,7 @@ describe('screenReader configuration', () => {
     const settings: Settings = {
       ui: { accessibility: { screenReader: false } },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getScreenReader()).toBe(false);
   });
 
@@ -1739,7 +1686,7 @@ describe('screenReader configuration', () => {
     const settings: Settings = {
       ui: { accessibility: { screenReader: false } },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getScreenReader()).toBe(true);
   });
 
@@ -1747,7 +1694,7 @@ describe('screenReader configuration', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = {};
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getScreenReader()).toBe(false);
   });
 });
@@ -1764,6 +1711,7 @@ describe('loadCliConfig tool exclusions', () => {
       isTrusted: true,
       source: undefined,
     });
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1776,7 +1724,7 @@ describe('loadCliConfig tool exclusions', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain('run_shell_command');
     expect(config.getExcludeTools()).not.toContain('replace');
     expect(config.getExcludeTools()).not.toContain('write_file');
@@ -1786,7 +1734,7 @@ describe('loadCliConfig tool exclusions', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', '--yolo'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain('run_shell_command');
     expect(config.getExcludeTools()).not.toContain('replace');
     expect(config.getExcludeTools()).not.toContain('write_file');
@@ -1796,7 +1744,7 @@ describe('loadCliConfig tool exclusions', () => {
     process.stdin.isTTY = false;
     process.argv = ['node', 'script.js', '-p', 'test'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).toContain('run_shell_command');
     expect(config.getExcludeTools()).toContain('replace');
     expect(config.getExcludeTools()).toContain('write_file');
@@ -1806,7 +1754,7 @@ describe('loadCliConfig tool exclusions', () => {
     process.stdin.isTTY = false;
     process.argv = ['node', 'script.js', '-p', 'test', '--yolo'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain('run_shell_command');
     expect(config.getExcludeTools()).not.toContain('replace');
     expect(config.getExcludeTools()).not.toContain('write_file');
@@ -1823,7 +1771,7 @@ describe('loadCliConfig tool exclusions', () => {
       'ShellTool',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain(SHELL_TOOL_NAME);
   });
 
@@ -1838,7 +1786,7 @@ describe('loadCliConfig tool exclusions', () => {
       'run_shell_command',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain(SHELL_TOOL_NAME);
   });
 
@@ -1853,7 +1801,7 @@ describe('loadCliConfig tool exclusions', () => {
       'ShellTool(wc)',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getExcludeTools()).not.toContain(SHELL_TOOL_NAME);
   });
 });
@@ -1866,6 +1814,7 @@ describe('loadCliConfig interactive', () => {
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
     process.stdin.isTTY = true;
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -1878,7 +1827,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(true);
   });
 
@@ -1886,7 +1835,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = false;
     process.argv = ['node', 'script.js', '--prompt-interactive', 'test'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(true);
   });
 
@@ -1894,7 +1843,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = false;
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
   });
 
@@ -1902,7 +1851,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', '--prompt', 'test'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
   });
 
@@ -1910,7 +1859,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', '--model', 'gemini-2.5-pro', 'Hello'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
   });
 
@@ -1925,7 +1874,7 @@ describe('loadCliConfig interactive', () => {
       'Hello world',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     // Verify the question is preserved for one-shot execution
     expect(argv.prompt).toBe('Hello world');
@@ -1936,7 +1885,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', '-e', 'none', 'hello'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     expect(argv.query).toBe('hello');
     expect(argv.extensions).toEqual(['none']);
@@ -1946,7 +1895,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', 'hello world how are you'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     expect(argv.query).toBe('hello world how are you');
     expect(argv.prompt).toBe('hello world how are you');
@@ -1967,7 +1916,7 @@ describe('loadCliConfig interactive', () => {
       'array',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     expect(argv.query).toBe('write a function to sort array');
     expect(argv.model).toBe('gemini-2.5-pro');
@@ -1977,7 +1926,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', ''];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(true);
     expect(argv.query).toBeUndefined();
   });
@@ -1996,7 +1945,7 @@ describe('loadCliConfig interactive', () => {
       'you',
     ];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(false);
     expect(argv.query).toBe('hello world how are you');
     expect(argv.extensions).toEqual(['none']);
@@ -2006,7 +1955,7 @@ describe('loadCliConfig interactive', () => {
     process.stdin.isTTY = true;
     process.argv = ['node', 'script.js', '--model', 'gemini-2.5-pro'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.isInteractive()).toBe(true);
   });
 });
@@ -2023,6 +1972,7 @@ describe('loadCliConfig approval mode', () => {
       isTrusted: true,
       source: undefined,
     });
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -2034,42 +1984,42 @@ describe('loadCliConfig approval mode', () => {
   it('should default to DEFAULT approval mode when no flags are set', async () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
   });
 
   it('should set YOLO approval mode when --yolo flag is used', async () => {
     process.argv = ['node', 'script.js', '--yolo'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO);
   });
 
   it('should set YOLO approval mode when -y flag is used', async () => {
     process.argv = ['node', 'script.js', '-y'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO);
   });
 
   it('should set DEFAULT approval mode when --approval-mode=default', async () => {
     process.argv = ['node', 'script.js', '--approval-mode', 'default'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
   });
 
   it('should set AUTO_EDIT approval mode when --approval-mode=auto_edit', async () => {
     process.argv = ['node', 'script.js', '--approval-mode', 'auto_edit'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.AUTO_EDIT);
   });
 
   it('should set YOLO approval mode when --approval-mode=yolo', async () => {
     process.argv = ['node', 'script.js', '--approval-mode', 'yolo'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO);
   });
 
@@ -2080,14 +2030,14 @@ describe('loadCliConfig approval mode', () => {
     const argv = await parseArguments({} as Settings);
     // Manually set yolo to true to simulate what would happen if validation didn't prevent it
     argv.yolo = true;
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
   });
 
   it('should fall back to --yolo behavior when --approval-mode is not set', async () => {
     process.argv = ['node', 'script.js', '--yolo'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO);
   });
 
@@ -2103,28 +2053,28 @@ describe('loadCliConfig approval mode', () => {
     it('should override --approval-mode=yolo to DEFAULT', async () => {
       process.argv = ['node', 'script.js', '--approval-mode', 'yolo'];
       const argv = await parseArguments({} as Settings);
-      const config = await loadCliConfig({}, [], 'test-session', argv);
+      const config = await loadCliConfig({}, 'test-session', argv);
       expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
     });
 
     it('should override --approval-mode=auto_edit to DEFAULT', async () => {
       process.argv = ['node', 'script.js', '--approval-mode', 'auto_edit'];
       const argv = await parseArguments({} as Settings);
-      const config = await loadCliConfig({}, [], 'test-session', argv);
+      const config = await loadCliConfig({}, 'test-session', argv);
       expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
     });
 
     it('should override --yolo flag to DEFAULT', async () => {
       process.argv = ['node', 'script.js', '--yolo'];
       const argv = await parseArguments({} as Settings);
-      const config = await loadCliConfig({}, [], 'test-session', argv);
+      const config = await loadCliConfig({}, 'test-session', argv);
       expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
     });
 
     it('should remain DEFAULT when --approval-mode=default', async () => {
       process.argv = ['node', 'script.js', '--approval-mode', 'default'];
       const argv = await parseArguments({} as Settings);
-      const config = await loadCliConfig({}, [], 'test-session', argv);
+      const config = await loadCliConfig({}, 'test-session', argv);
       expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT);
     });
   });
@@ -2138,6 +2088,7 @@ describe('loadCliConfig fileFiltering', () => {
     vi.mocked(os.homedir).mockReturnValue('/mock/home/user');
     vi.stubEnv('GEMINI_API_KEY', 'test-api-key');
     process.argv = ['node', 'script.js']; // Reset argv for each test
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
   });
 
   afterEach(() => {
@@ -2205,17 +2156,25 @@ describe('loadCliConfig fileFiltering', () => {
         },
       };
       const argv = await parseArguments(settings);
-      const config = await loadCliConfig(settings, [], 'test-session', argv);
+      const config = await loadCliConfig(settings, 'test-session', argv);
       expect(getter(config)).toBe(value);
     },
   );
 });
 
 describe('Output format', () => {
+  beforeEach(() => {
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
+  });
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
   it('should default to TEXT', async () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getOutputFormat()).toBe(OutputFormat.TEXT);
   });
 
@@ -2224,7 +2183,6 @@ describe('Output format', () => {
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       { output: { format: OutputFormat.JSON } },
-      [],
       'test-session',
       argv,
     );
@@ -2236,7 +2194,6 @@ describe('Output format', () => {
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       { output: { format: OutputFormat.JSON } },
-      [],
       'test-session',
       argv,
     );
@@ -2246,7 +2203,7 @@ describe('Output format', () => {
   it('should accept stream-json as a valid output format', async () => {
     process.argv = ['node', 'script.js', '--output-format', 'stream-json'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getOutputFormat()).toBe(OutputFormat.STREAM_JSON);
   });
 
@@ -2335,12 +2292,19 @@ describe('parseArguments with positional prompt', () => {
 });
 
 describe('Telemetry configuration via environment variables', () => {
+  beforeEach(() => {
+    vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]);
+  });
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
   it('should prioritize GEMINI_TELEMETRY_ENABLED over settings', async () => {
     vi.stubEnv('GEMINI_TELEMETRY_ENABLED', 'true');
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { telemetry: { enabled: false } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryEnabled()).toBe(true);
   });
 
@@ -2351,7 +2315,7 @@ describe('Telemetry configuration via environment variables', () => {
     const settings: Settings = {
       telemetry: { target: ServerConfig.TelemetryTarget.LOCAL },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryTarget()).toBe('gcp');
   });
 
@@ -2362,9 +2326,7 @@ describe('Telemetry configuration via environment variables', () => {
     const settings: Settings = {
       telemetry: { target: ServerConfig.TelemetryTarget.GCP },
     };
-    await expect(
-      loadCliConfig(settings, [], 'test-session', argv),
-    ).rejects.toThrow(
+    await expect(loadCliConfig(settings, 'test-session', argv)).rejects.toThrow(
       /Invalid telemetry configuration: .*Invalid telemetry target/i,
     );
     vi.unstubAllEnvs();
@@ -2378,7 +2340,7 @@ describe('Telemetry configuration via environment variables', () => {
     const settings: Settings = {
       telemetry: { otlpEndpoint: 'http://settings.com' },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryOtlpEndpoint()).toBe('http://gemini.env.com');
   });
 
@@ -2387,7 +2349,7 @@ describe('Telemetry configuration via environment variables', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { telemetry: { otlpProtocol: 'grpc' } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryOtlpProtocol()).toBe('http');
   });
 
@@ -2396,7 +2358,7 @@ describe('Telemetry configuration via environment variables', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { telemetry: { logPrompts: true } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryLogPromptsEnabled()).toBe(false);
   });
 
@@ -2407,7 +2369,7 @@ describe('Telemetry configuration via environment variables', () => {
     const settings: Settings = {
       telemetry: { outfile: '/settings/telemetry.log' },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryOutfile()).toBe('/gemini/env/telemetry.log');
   });
 
@@ -2416,7 +2378,7 @@ describe('Telemetry configuration via environment variables', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { telemetry: { useCollector: false } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryUseCollector()).toBe(true);
   });
 
@@ -2425,7 +2387,7 @@ describe('Telemetry configuration via environment variables', () => {
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
     const settings: Settings = { telemetry: { enabled: true } };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryEnabled()).toBe(true);
   });
 
@@ -2436,7 +2398,7 @@ describe('Telemetry configuration via environment variables', () => {
     const settings: Settings = {
       telemetry: { target: ServerConfig.TelemetryTarget.LOCAL },
     };
-    const config = await loadCliConfig(settings, [], 'test-session', argv);
+    const config = await loadCliConfig(settings, 'test-session', argv);
     expect(config.getTelemetryTarget()).toBe('local');
   });
 
@@ -2444,7 +2406,7 @@ describe('Telemetry configuration via environment variables', () => {
     vi.stubEnv('GEMINI_TELEMETRY_ENABLED', '1');
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getTelemetryEnabled()).toBe(true);
   });
 
@@ -2454,7 +2416,6 @@ describe('Telemetry configuration via environment variables', () => {
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       { telemetry: { enabled: true } },
-      [],
       'test-session',
       argv,
     );
@@ -2465,7 +2426,7 @@ describe('Telemetry configuration via environment variables', () => {
     vi.stubEnv('GEMINI_TELEMETRY_LOG_PROMPTS', '1');
     process.argv = ['node', 'script.js'];
     const argv = await parseArguments({} as Settings);
-    const config = await loadCliConfig({}, [], 'test-session', argv);
+    const config = await loadCliConfig({}, 'test-session', argv);
     expect(config.getTelemetryLogPromptsEnabled()).toBe(true);
   });
 
@@ -2475,7 +2436,6 @@ describe('Telemetry configuration via environment variables', () => {
     const argv = await parseArguments({} as Settings);
     const config = await loadCliConfig(
       { telemetry: { logPrompts: true } },
-      [],
       'test-session',
       argv,
     );
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 7617770b79..76ee5e8a21 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -48,6 +48,10 @@ import { appEvents } from '../utils/events.js';
 
 import { isWorkspaceTrusted } from './trustedFolders.js';
 import { createPolicyEngineConfig } from './policy.js';
+import { ExtensionManager } from './extension-manager.js';
+import type { ExtensionLoader } from '@google/gemini-cli-core/src/utils/extensionLoader.js';
+import { requestConsentNonInteractive } from './extensions/consent.js';
+import { promptForSetting } from './extensions/extensionSettings.js';
 
 export interface CliArgs {
   query: string | undefined;
@@ -293,7 +297,7 @@ export async function loadHierarchicalGeminiMemory(
   debugMode: boolean,
   fileService: FileDiscoveryService,
   settings: Settings,
-  extensions: GeminiCLIExtension[],
+  extensionLoader: ExtensionLoader,
   folderTrust: boolean,
   memoryImportFormat: 'flat' | 'tree' = 'tree',
   fileFilteringOptions?: FileFilteringOptions,
@@ -319,7 +323,7 @@ export async function loadHierarchicalGeminiMemory(
     includeDirectoriesToReadGemini,
     debugMode,
     fileService,
-    extensions,
+    extensionLoader,
     folderTrust,
     memoryImportFormat,
     fileFilteringOptions,
@@ -368,7 +372,6 @@ export function isDebugMode(argv: CliArgs): boolean {
 
 export async function loadCliConfig(
   settings: Settings,
-  allExtensions: GeminiCLIExtension[],
   sessionId: string,
   argv: CliArgs,
   cwd: string = process.cwd(),
@@ -413,6 +416,15 @@ export async function loadCliConfig(
     .map(resolvePath)
     .concat((argv.includeDirectories || []).map(resolvePath));
 
+  const extensionManager = new ExtensionManager({
+    settings,
+    requestConsent: requestConsentNonInteractive,
+    requestSetting: promptForSetting,
+    workspaceDir: cwd,
+    enabledExtensionOverrides: argv.extensions,
+  });
+  extensionManager.loadExtensions();
+
   // Call the (now wrapper) loadHierarchicalGeminiMemory which calls the server's version
   const { memoryContent, fileCount, filePaths } =
     await loadHierarchicalGeminiMemory(
@@ -423,13 +435,13 @@ export async function loadCliConfig(
       debugMode,
       fileService,
       settings,
-      allExtensions,
+      extensionManager,
       trustedFolder,
       memoryImportFormat,
       memoryFileFiltering,
     );
 
-  let mcpServers = mergeMcpServers(settings, allExtensions);
+  let mcpServers = mergeMcpServers(settings, extensionManager.getExtensions());
   const question = argv.promptInteractive || argv.prompt || '';
 
   // Determine approval mode with backward compatibility
@@ -540,7 +552,7 @@ export async function loadCliConfig(
 
   const excludeTools = mergeExcludeTools(
     settings,
-    allExtensions,
+    extensionManager.getExtensions(),
     extraExcludes.length > 0 ? extraExcludes : undefined,
   );
   const blockedMcpServers: Array<{ name: string; extensionName: string }> = [];
@@ -636,7 +648,7 @@ export async function loadCliConfig(
     experimentalZedIntegration: argv.experimentalAcp || false,
     listExtensions: argv.listExtensions || false,
     enabledExtensions: argv.extensions,
-    extensions: allExtensions,
+    extensionLoader: extensionManager,
     blockedMcpServers,
     noBrowser: !!process.env['NO_BROWSER'],
     summarizeToolOutput: settings.model?.summarizeToolOutput,
diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts
index 9fb8263758..d25591fd48 100644
--- a/packages/cli/src/config/extension-manager.ts
+++ b/packages/cli/src/config/extension-manager.ts
@@ -9,7 +9,7 @@ import * as path from 'node:path';
 import * as os from 'node:os';
 import chalk from 'chalk';
 import { ExtensionEnablementManager } from './extensions/extensionEnablement.js';
-import { type LoadedSettings, SettingScope } from './settings.js';
+import { type Settings, SettingScope } from './settings.js';
 import { createHash, randomUUID } from 'node:crypto';
 import { loadInstallMetadata, type ExtensionConfig } from './extension.js';
 import { isWorkspaceTrusted } from './trustedFolders.js';
@@ -50,33 +50,45 @@ import {
   maybePromptForSettings,
   type ExtensionSetting,
 } from './extensions/extensionSettings.js';
+import type {
+  ExtensionEvents,
+  ExtensionLoader,
+} from '@google/gemini-cli-core/src/utils/extensionLoader.js';
+import { EventEmitter } from 'node:events';
 
 interface ExtensionManagerParams {
   enabledExtensionOverrides?: string[];
-  loadedSettings: LoadedSettings;
+  settings: Settings;
   requestConsent: (consent: string) => Promise<boolean>;
   requestSetting: ((setting: ExtensionSetting) => Promise<string>) | null;
   workspaceDir: string;
 }
 
-export class ExtensionManager {
+/**
+ * Actual implementation of an ExtensionLoader.
+ *
+ * You must call `loadExtensions` prior to calling other methods on this class.
+ */
+export class ExtensionManager implements ExtensionLoader {
   private extensionEnablementManager: ExtensionEnablementManager;
-  private loadedSettings: LoadedSettings;
+  private settings: Settings;
   private requestConsent: (consent: string) => Promise<boolean>;
   private requestSetting:
     | ((setting: ExtensionSetting) => Promise<string>)
-    | null;
+    | undefined;
   private telemetryConfig: Config;
   private workspaceDir: string;
+  private loadedExtensions: GeminiCLIExtension[] | undefined;
+  private eventEmitter: EventEmitter<ExtensionEvents>;
 
   constructor(options: ExtensionManagerParams) {
     this.workspaceDir = options.workspaceDir;
     this.extensionEnablementManager = new ExtensionEnablementManager(
       options.enabledExtensionOverrides,
     );
-    this.loadedSettings = options.loadedSettings;
+    this.settings = options.settings;
     this.telemetryConfig = new Config({
-      telemetry: options.loadedSettings.merged.telemetry,
+      telemetry: options.settings.telemetry,
       interactive: false,
       sessionId: randomUUID(),
       targetDir: options.workspaceDir,
@@ -85,19 +97,45 @@ export class ExtensionManager {
       debugMode: false,
     });
     this.requestConsent = options.requestConsent;
-    this.requestSetting = options.requestSetting;
+    this.requestSetting = options.requestSetting ?? undefined;
+    this.eventEmitter = new EventEmitter();
+  }
+
+  setRequestConsent(
+    requestConsent: (consent: string) => Promise<boolean>,
+  ): void {
+    this.requestConsent = requestConsent;
+  }
+
+  setRequestSetting(
+    requestSetting?: (setting: ExtensionSetting) => Promise<string>,
+  ): void {
+    this.requestSetting = requestSetting;
+  }
+
+  getExtensions(): GeminiCLIExtension[] {
+    if (!this.loadedExtensions) {
+      throw new Error(
+        'Extensions not yet loaded, must call `loadExtensions` first',
+      );
+    }
+    return this.loadedExtensions!;
+  }
+
+  extensionEvents(): EventEmitter<ExtensionEvents> {
+    return this.eventEmitter;
   }
 
   async installOrUpdateExtension(
     installMetadata: ExtensionInstallMetadata,
     previousExtensionConfig?: ExtensionConfig,
-  ): Promise<string> {
+  ): Promise<GeminiCLIExtension> {
     const isUpdate = !!previousExtensionConfig;
     let newExtensionConfig: ExtensionConfig | null = null;
     let localSourcePath: string | undefined;
+    let extension: GeminiCLIExtension;
     try {
-      const settings = this.loadedSettings.merged;
-      if (!isWorkspaceTrusted(settings).isTrusted) {
+      if (!isWorkspaceTrusted(this.settings).isTrusted) {
         throw new Error(
           `Could not install extension from untrusted folder at ${installMetadata.source}`,
         );
@@ -187,17 +225,17 @@ export class ExtensionManager {
         }
 
         const newExtensionName = newExtensionConfig.name;
-        if (!isUpdate) {
-          const installedExtensions = this.loadExtensions();
-          if (
-            installedExtensions.some(
-              (installed) => installed.name === newExtensionName,
-            )
-          ) {
-            throw new Error(
-              `Extension "${newExtensionName}" is already installed. Please uninstall it first.`,
-            );
-          }
+        const previous = this.getExtensions().find(
+          (installed) => installed.name === newExtensionName,
+        );
+        if (isUpdate && !previous) {
+          throw new Error(
+            `Extension "${newExtensionName}" was not already installed, cannot update it.`,
+          );
+        } else if (!isUpdate && previous) {
+          throw new Error(
+            `Extension "${newExtensionName}" is already installed. Please uninstall it first.`,
+          );
         }
 
         await maybeRequestConsentOrFail(
@@ -245,39 +283,43 @@ export class ExtensionManager {
           INSTALL_METADATA_FILENAME,
         );
         await fs.promises.writeFile(metadataPath, metadataString);
+
+        // TODO: Gracefully handle this call failing, we should back up the old
+        // extension prior to overwriting it and then restore it.
+        extension = this.loadExtension(destinationPath)!;
+        if (isUpdate) {
+          logExtensionUpdateEvent(
+            this.telemetryConfig,
+            new ExtensionUpdateEvent(
+              hashValue(newExtensionConfig.name),
+              getExtensionId(newExtensionConfig, installMetadata),
+              newExtensionConfig.version,
+              previousExtensionConfig.version,
+              installMetadata.type,
+              'success',
+            ),
+          );
+          this.eventEmitter.emit('extensionUpdated', { extension });
+        } else {
+          logExtensionInstallEvent(
+            this.telemetryConfig,
+            new ExtensionInstallEvent(
+              hashValue(newExtensionConfig.name),
+              getExtensionId(newExtensionConfig, installMetadata),
+              newExtensionConfig.version,
+              installMetadata.type,
+              'success',
+            ),
+          );
+          this.eventEmitter.emit('extensionInstalled', { extension });
+          this.enableExtension(newExtensionConfig.name, SettingScope.User);
+        }
       } finally {
         if (tempDir) {
           await fs.promises.rm(tempDir, { recursive: true, force: true });
         }
       }
-
-      if (isUpdate) {
-        logExtensionUpdateEvent(
-          this.telemetryConfig,
-          new ExtensionUpdateEvent(
-            hashValue(newExtensionConfig.name),
-            getExtensionId(newExtensionConfig, installMetadata),
-            newExtensionConfig.version,
-            previousExtensionConfig.version,
-            installMetadata.type,
-            'success',
-          ),
-        );
-      } else {
-        logExtensionInstallEvent(
-          this.telemetryConfig,
-          new ExtensionInstallEvent(
-            hashValue(newExtensionConfig.name),
-            getExtensionId(newExtensionConfig, installMetadata),
-            newExtensionConfig.version,
-            installMetadata.type,
-            'success',
-          ),
-        );
-        this.enableExtension(newExtensionConfig.name, SettingScope.User);
-      }
-
-      return newExtensionConfig!.name;
+      return extension;
     } catch (error) {
       // Attempt to load config from the source path even if installation fails
       // to get the name and version for logging.
@@ -324,7 +366,7 @@ export class ExtensionManager {
     extensionIdentifier: string,
     isUpdate: boolean,
   ): Promise<void> {
-    const installedExtensions = this.loadExtensions();
+    const installedExtensions = this.getExtensions();
     const extension = installedExtensions.find(
       (installed) =>
         installed.name.toLowerCase() === extensionIdentifier.toLowerCase() ||
@@ -334,6 +376,7 @@ export class ExtensionManager {
     if (!extension) {
       throw new Error(`Extension not found.`);
     }
+    this.unloadExtension(extension);
     const storage = new ExtensionStorage(extension.name);
 
     await fs.promises.rm(storage.getExtensionDir(), {
@@ -355,36 +398,28 @@ export class ExtensionManager {
         'success',
       ),
     );
+    this.eventEmitter.emit('extensionUninstalled', { extension });
   }
 
   loadExtensions(): GeminiCLIExtension[] {
-    const extensionsDir = ExtensionStorage.getUserExtensionsDir();
-    if (!fs.existsSync(extensionsDir)) {
-      return [];
+    if (this.loadedExtensions) {
+      throw new Error('Extensions already loaded, only load extensions once.');
+    }
+    const extensionsDir = ExtensionStorage.getUserExtensionsDir();
+    this.loadedExtensions = [];
+    if (!fs.existsSync(extensionsDir)) {
+      return this.loadedExtensions;
     }
-
-    const extensions: GeminiCLIExtension[] = [];
     for (const subdir of fs.readdirSync(extensionsDir)) {
       const extensionDir = path.join(extensionsDir, subdir);
 
-      const extension = this.loadExtension(extensionDir);
-      if (extension != null) {
-        extensions.push(extension);
-      }
+      this.loadExtension(extensionDir);
     }
-
-    const uniqueExtensions = new Map<string, GeminiCLIExtension>();
-
-    for (const extension of extensions) {
-      if (!uniqueExtensions.has(extension.name)) {
-        uniqueExtensions.set(extension.name, extension);
-      }
-    }
-
-    return Array.from(uniqueExtensions.values());
+    return this.loadedExtensions;
   }
 
-  loadExtension(extensionDir: string): GeminiCLIExtension | null {
+  private loadExtension(extensionDir: string): GeminiCLIExtension | null {
+    this.loadedExtensions ??= [];
     if (!fs.statSync(extensionDir).isDirectory()) {
       return null;
     }
@@ -398,6 +433,13 @@ export class ExtensionManager {
 
     try {
       let config = this.loadExtensionConfig(effectiveExtensionPath);
+      if (
+        this.getExtensions().find((extension) => extension.name === config.name)
+      ) {
+        throw new Error(
+          `Extension with name ${config.name} already was loaded.`,
+        );
+      }
 
       const customEnv = getEnvContents(new ExtensionStorage(config.name));
       config = resolveEnvVarsInObject(config, customEnv);
@@ -417,7 +459,7 @@ export class ExtensionManager {
         )
         .filter((contextFilePath) => fs.existsSync(contextFilePath));
 
-      return {
+      const extension = {
         name: config.name,
         version: config.version,
         path: effectiveExtensionPath,
@@ -431,6 +473,9 @@ export class ExtensionManager {
         ),
         id: getExtensionId(config, installMetadata),
       };
+      this.eventEmitter.emit('extensionLoaded', { extension });
+      this.getExtensions().push(extension);
+      return extension;
     } catch (e) {
       debugLogger.error(
         `Warning: Skipping extension in ${effectiveExtensionPath}: ${getErrorMessage(
@@ -441,24 +486,11 @@ export class ExtensionManager {
     }
   }
 
-  loadExtensionByName(name: string): GeminiCLIExtension | null {
-    const userExtensionsDir = ExtensionStorage.getUserExtensionsDir();
-    if (!fs.existsSync(userExtensionsDir)) {
-      return null;
-    }
-
-    for (const subdir of fs.readdirSync(userExtensionsDir)) {
-      const extensionDir = path.join(userExtensionsDir, subdir);
-      if (!fs.statSync(extensionDir).isDirectory()) {
-        continue;
-      }
-      const extension = this.loadExtension(extensionDir);
-      if (extension && extension.name.toLowerCase() === name.toLowerCase()) {
-        return extension;
-      }
-    }
-
-    return null;
+  private unloadExtension(extension: GeminiCLIExtension) {
+    this.loadedExtensions = this.getExtensions().filter(
+      (entry) => extension !== entry,
+    );
+    this.eventEmitter.emit('extensionUnloaded', { extension });
   }
 
   loadExtensionConfig(extensionDir: string): ExtensionConfig {
@@ -548,7 +580,9 @@ export class ExtensionManager {
     ) {
       throw new Error('System and SystemDefaults scopes are not supported.');
     }
-    const extension = this.loadExtensionByName(name);
+    const extension = this.getExtensions().find(
+      (extension) => extension.name === name,
+    );
     if (!extension) {
       throw new Error(`Extension with name ${name} does not exist.`);
     }
@@ -560,6 +594,8 @@ export class ExtensionManager {
       this.telemetryConfig,
       new ExtensionDisableEvent(hashValue(name), extension.id, scope),
     );
+    extension.isActive = false;
+    this.eventEmitter.emit('extensionDisabled', { extension });
   }
 
   enableExtension(name: string, scope: SettingScope) {
@@ -569,7 +605,9 @@ export class ExtensionManager {
     ) {
       throw new Error('System and SystemDefaults scopes are not supported.');
     }
-    const extension = this.loadExtensionByName(name);
+    const extension = this.getExtensions().find(
+      (extension) => extension.name === name,
+    );
     if (!extension) {
       throw new Error(`Extension with name ${name} does not exist.`);
     }
@@ -580,6 +618,8 @@ export class ExtensionManager {
       this.telemetryConfig,
       new ExtensionEnableEvent(hashValue(name), extension.id, scope),
     );
+    extension.isActive = true;
+    this.eventEmitter.emit('extensionEnabled', { extension });
   }
 }
 
diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index f701e3cb3e..3ce23405f3 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -140,7 +140,7 @@ describe('extension tests', () => {
       workspaceDir: tempWorkspaceDir,
       requestConsent: mockRequestConsent,
       requestSetting: mockPromptForSettings,
-      loadedSettings: loadSettings(tempWorkspaceDir),
+      settings: loadSettings(tempWorkspaceDir).merged,
     });
   });
 
@@ -220,11 +220,12 @@ describe('extension tests', () => {
         name: 'enabled-extension',
         version: '2.0.0',
       });
+      extensionManager.loadExtensions();
       extensionManager.disableExtension(
         'disabled-extension',
         SettingScope.User,
       );
-      const extensions = extensionManager.loadExtensions();
+      const extensions = extensionManager.getExtensions();
       expect(extensions).toHaveLength(2);
       expect(extensions[0].name).toBe('disabled-extension');
       expect(extensions[0].isActive).toBe(false);
@@ -265,13 +266,14 @@ describe('extension tests', () => {
       });
       fs.writeFileSync(path.join(sourceExtDir, 'context.md'), 'linked context');
 
-      const extensionName = await extensionManager.installOrUpdateExtension({
+      extensionManager.loadExtensions();
+      const extension = await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
       });
 
-      expect(extensionName).toEqual('my-linked-extension');
-      const extensions = extensionManager.loadExtensions();
+      expect(extension.name).toEqual('my-linked-extension');
+      const extensions = extensionManager.getExtensions();
       expect(extensions).toHaveLength(1);
 
       const linkedExt = extensions[0];
@@ -301,12 +303,13 @@ describe('extension tests', () => {
         },
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = extensionManager.getExtensions();
       expect(extensions).toHaveLength(1);
       expect(extensions[0].mcpServers?.['test-server'].cwd).toBe(
         path.join(sourceExtDir, 'server'),
@@ -525,15 +528,17 @@ describe('extension tests', () => {
       const consoleSpy = vi
         .spyOn(console, 'error')
         .mockImplementation(() => {});
-      const badExtDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'bad_name',
         version: '1.0.0',
       });
 
-      const extension = extensionManager.loadExtension(badExtDir);
+      const extension = extensionManager
+        .loadExtensions()
+        .find((e) => e.name === 'bad_name');
 
-      expect(extension).toBeNull();
+      expect(extension).toBeUndefined();
       expect(consoleSpy).toHaveBeenCalledWith(
         expect.stringContaining('Invalid extension name: "bad_name"'),
       );
@@ -542,7 +547,7 @@ describe('extension tests', () => {
 
     describe('id generation', () => {
       it('should generate id from source for non-github git urls', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
           version: '1.0.0',
@@ -552,12 +557,14 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('http://somehost.com/foo/bar'));
       });
 
       it('should generate id from owner/repo for github http urls', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
           version: '1.0.0',
@@ -567,12 +574,14 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
       it('should generate id from owner/repo for github ssh urls', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
           version: '1.0.0',
@@ -582,12 +591,14 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
       it('should generate id from source for github-release extension', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
           version: '1.0.0',
@@ -597,12 +608,14 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
       it('should generate id from the original source for local extension', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'local-ext-name',
           version: '1.0.0',
@@ -612,7 +625,9 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'local-ext-name');
         expect(extension?.id).toBe(hashValue('/some/path'));
       });
 
@@ -623,25 +638,28 @@ describe('extension tests', () => {
           name: 'link-ext-name',
           version: '1.0.0',
         });
-        const extensionName = await extensionManager.installOrUpdateExtension({
+        extensionManager.loadExtensions();
+        await extensionManager.installOrUpdateExtension({
           type: 'link',
           source: actualExtensionDir,
         });
 
-        const extension = extensionManager.loadExtension(
-          new ExtensionStorage(extensionName).getExtensionDir(),
-        );
+        const extension = extensionManager
+          .getExtensions()
+          .find((e) => e.name === 'link-ext-name');
         expect(extension?.id).toBe(hashValue(actualExtensionDir));
       });
 
       it('should generate id from name for extension with no install metadata', () => {
-        const extensionDir = createExtension({
+        createExtension({
           extensionsDir: userExtensionsDir,
           name: 'no-meta-name',
           version: '1.0.0',
         });
 
-        const extension = extensionManager.loadExtension(extensionDir);
+        const extension = extensionManager
+          .loadExtensions()
+          .find((e) => e.name === 'no-meta-name');
         expect(extension?.id).toBe(hashValue('no-meta-name'));
       });
     });
@@ -657,6 +675,7 @@ describe('extension tests', () => {
       const targetExtDir = path.join(userExtensionsDir, 'my-local-extension');
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -678,6 +697,7 @@ describe('extension tests', () => {
         name: 'my-local-extension',
         version: '1.0.0',
       });
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -771,6 +791,7 @@ describe('extension tests', () => {
         type: 'github-release',
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: gitUrl,
         type: 'git',
@@ -795,6 +816,7 @@ describe('extension tests', () => {
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
       const configPath = path.join(targetExtDir, EXTENSIONS_CONFIG_FILENAME);
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
@@ -824,6 +846,7 @@ describe('extension tests', () => {
             name: 'my-local-extension',
             version: '1.1.0',
           });
+          extensionManager.loadExtensions();
           if (isUpdate) {
             await extensionManager.installOrUpdateExtension({
               source: sourceExtDir,
@@ -897,12 +920,15 @@ describe('extension tests', () => {
         },
       });
 
+      extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
           type: 'local',
         }),
-      ).resolves.toBe('my-local-extension');
+      ).resolves.toMatchObject({
+        name: 'my-local-extension',
+      });
 
       expect(mockRequestConsent).toHaveBeenCalledWith(
         `Installing extension "my-local-extension".
@@ -926,12 +952,13 @@ This extension will run the following MCP servers:
         },
       });
 
+      extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
           type: 'local',
         }),
-      ).resolves.toBe('my-local-extension');
+      ).resolves.toMatchObject({ name: 'my-local-extension' });
     });
 
     it('should cancel installation if user declines prompt for local extension with mcp servers', async () => {
@@ -947,6 +974,7 @@ This extension will run the following MCP servers:
         },
       });
       mockRequestConsent.mockResolvedValue(false);
+      extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
@@ -964,6 +992,7 @@ This extension will run the following MCP servers:
       const targetExtDir = path.join(userExtensionsDir, 'my-local-extension');
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -994,6 +1023,7 @@ This extension will run the following MCP servers:
         },
       });
 
+      extensionManager.loadExtensions();
       // Install it with hard coded consent first.
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
@@ -1008,7 +1038,7 @@ This extension will run the following MCP servers:
           // Provide its own existing config as the previous config.
           await extensionManager.loadExtensionConfig(sourceExtDir),
         ),
-      ).resolves.toBe('my-local-extension');
+      ).resolves.toMatchObject({ name: 'my-local-extension' });
 
       // Still only called once
       expect(mockRequestConsent).toHaveBeenCalledOnce();
@@ -1028,6 +1058,7 @@ This extension will run the following MCP servers:
         ],
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -1054,9 +1085,10 @@ This extension will run the following MCP servers:
         workspaceDir: tempWorkspaceDir,
         requestConsent: mockRequestConsent,
         requestSetting: null,
-        loadedSettings: loadSettings(tempWorkspaceDir),
+        settings: loadSettings(tempWorkspaceDir).merged,
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -1079,6 +1111,7 @@ This extension will run the following MCP servers:
       });
 
       mockPromptForSettings.mockResolvedValueOnce('old-api-key');
+      extensionManager.loadExtensions();
       // Install it so it exists in the userExtensionsDir
       await extensionManager.installOrUpdateExtension({
         source: oldSourceExtDir,
@@ -1148,6 +1181,7 @@ This extension will run the following MCP servers:
           },
         ],
       });
+      extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: oldSourceExtDir,
         type: 'local',
@@ -1239,6 +1273,7 @@ This extension will run the following MCP servers:
           join(tempDir, extensionName),
         );
 
+        extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension({
           source: gitUrl,
           type: 'github-release',
@@ -1263,6 +1298,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
+        extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension(
           { source: gitUrl, type: 'github-release' }, // Use github-release to force consent
         );
@@ -1293,6 +1329,7 @@ This extension will run the following MCP servers:
         });
         mockRequestConsent.mockResolvedValue(false);
 
+        extensionManager.loadExtensions();
         await expect(
           extensionManager.installOrUpdateExtension({
             source: gitUrl,
@@ -1317,6 +1354,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
+        extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension({
           source: gitUrl,
           type: 'git',
@@ -1347,6 +1385,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
+        extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension(
           { source: gitUrl, type: 'github-release' }, // Note the type
         );
@@ -1369,6 +1408,7 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.uninstallExtension('my-local-extension', false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
@@ -1386,14 +1426,16 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.uninstallExtension('my-local-extension', false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
-      expect(extensionManager.loadExtensions()).toHaveLength(1);
+      expect(extensionManager.getExtensions()).toHaveLength(1);
       expect(fs.existsSync(otherExtDir)).toBe(true);
     });
 
     it('should throw an error if the extension does not exist', async () => {
+      extensionManager.loadExtensions();
       await expect(
         extensionManager.uninstallExtension('nonexistent-extension', false),
       ).rejects.toThrow('Extension not found.');
@@ -1411,6 +1453,7 @@ This extension will run the following MCP servers:
           },
         });
 
+        extensionManager.loadExtensions();
         await extensionManager.uninstallExtension(
           'my-local-extension',
           isUpdate,
@@ -1438,6 +1481,7 @@ This extension will run the following MCP servers:
         const enablementManager = new ExtensionEnablementManager();
         enablementManager.enable('test-extension', true, '/some/scope');
 
+        extensionManager.loadExtensions();
         await extensionManager.uninstallExtension('test-extension', isUpdate);
 
         const config = enablementManager.readConfig()['test-extension'];
@@ -1462,6 +1506,7 @@ This extension will run the following MCP servers:
         },
       });
 
+      extensionManager.loadExtensions();
       await extensionManager.uninstallExtension(gitUrl, false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
@@ -1481,6 +1526,7 @@ This extension will run the following MCP servers:
         // No installMetadata provided
       });
 
+      extensionManager.loadExtensions();
       await expect(
         extensionManager.uninstallExtension(
           'https://github.com/google/no-metadata-extension',
@@ -1498,6 +1544,7 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.User);
       expect(
         isEnabled({
@@ -1514,6 +1561,7 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.Workspace);
       expect(
         isEnabled({
@@ -1536,6 +1584,7 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.User);
       extensionManager.disableExtension('my-extension', SettingScope.User);
       expect(
@@ -1563,6 +1612,7 @@ This extension will run the following MCP servers:
         },
       });
 
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
 
       expect(mockLogExtensionDisable).toHaveBeenCalled();
@@ -1580,7 +1630,7 @@ This extension will run the following MCP servers:
     });
 
     const getActiveExtensions = (): GeminiCLIExtension[] => {
-      const extensions = extensionManager.loadExtensions();
+      const extensions = extensionManager.getExtensions();
       return extensions.filter((e) => e.isActive);
     };
 
@@ -1590,6 +1640,7 @@ This extension will run the following MCP servers:
         name: 'ext1',
         version: '1.0.0',
       });
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.User);
       let activeExtensions = getActiveExtensions();
       expect(activeExtensions).toHaveLength(0);
@@ -1606,6 +1657,7 @@ This extension will run the following MCP servers:
         name: 'ext1',
         version: '1.0.0',
       });
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
       let activeExtensions = getActiveExtensions();
       expect(activeExtensions).toHaveLength(0);
@@ -1626,6 +1678,7 @@ This extension will run the following MCP servers:
           type: 'local',
         },
       });
+      extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
       extensionManager.enableExtension('ext1', SettingScope.Workspace);
 
diff --git a/packages/cli/src/config/extensions/github.test.ts b/packages/cli/src/config/extensions/github.test.ts
index 57eaa3e32e..06a43cb93e 100644
--- a/packages/cli/src/config/extensions/github.test.ts
+++ b/packages/cli/src/config/extensions/github.test.ts
@@ -170,7 +170,7 @@ describe('git extension helpers', () => {
         workspaceDir: tempWorkspaceDir,
         requestConsent: mockRequestConsent,
         requestSetting: mockPromptForSettings,
-        loadedSettings: loadSettings(tempWorkspaceDir),
+        settings: loadSettings(tempWorkspaceDir).merged,
       });
     });
 
diff --git a/packages/cli/src/config/extensions/github.ts b/packages/cli/src/config/extensions/github.ts
index 5e5e5cde7d..f2b1973064 100644
--- a/packages/cli/src/config/extensions/github.ts
+++ b/packages/cli/src/config/extensions/github.ts
@@ -157,14 +157,16 @@ export async function checkForExtensionUpdate(
 ): Promise<ExtensionUpdateState> {
   const installMetadata = extension.installMetadata;
   if (installMetadata?.type === 'local') {
-    const newExtension = extensionManager.loadExtension(installMetadata.source);
-    if (!newExtension) {
+    const latestConfig = extensionManager.loadExtensionConfig(
+      installMetadata.source,
+    );
+    if (!latestConfig) {
       debugLogger.error(
         `Failed to check for update for local extension "${extension.name}". Could not load extension from source path: ${installMetadata.source}`,
       );
       return ExtensionUpdateState.ERROR;
     }
-    if (newExtension.version !== extension.version) {
+    if (latestConfig.version !== extension.version) {
       return ExtensionUpdateState.UPDATE_AVAILABLE;
     }
     return ExtensionUpdateState.UP_TO_DATE;
diff --git a/packages/cli/src/config/extensions/update.test.ts b/packages/cli/src/config/extensions/update.test.ts
index 66bf99fabc..8c02168164 100644
--- a/packages/cli/src/config/extensions/update.test.ts
+++ b/packages/cli/src/config/extensions/update.test.ts
@@ -109,7 +109,7 @@ describe('update tests', () => {
       workspaceDir: tempWorkspaceDir,
       requestConsent: mockRequestConsent,
       requestSetting: mockPromptForSettings,
-      loadedSettings: loadSettings(tempWorkspaceDir),
+      settings: loadSettings(tempWorkspaceDir).merged,
     });
   });
 
@@ -145,7 +145,9 @@ describe('update tests', () => {
         );
       });
       mockGit.getRemotes.mockResolvedValue([{ name: 'origin' }]);
-      const extension = extensionManager.loadExtension(targetExtDir)!;
+      const extension = extensionManager
+        .loadExtensions()
+        .find((e) => e.name === extensionName)!;
       const updateInfo = await updateExtension(
         extension,
         extensionManager,
@@ -170,7 +172,7 @@ describe('update tests', () => {
 
     it('should call setExtensionUpdateState with UPDATING and then UPDATED_NEEDS_RESTART on success', async () => {
       const extensionName = 'test-extension';
-      const extensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: extensionName,
         version: '1.0.0',
@@ -192,7 +194,10 @@ describe('update tests', () => {
       mockGit.getRemotes.mockResolvedValue([{ name: 'origin' }]);
 
       const dispatch = vi.fn();
-      const extension = extensionManager.loadExtension(extensionDir)!;
+
+      const extension = extensionManager
+        .loadExtensions()
+        .find((e) => e.name === extensionName)!;
       await updateExtension(
         extension,
         extensionManager,
@@ -218,7 +223,7 @@ describe('update tests', () => {
 
     it('should call setExtensionUpdateState with ERROR on failure', async () => {
       const extensionName = 'test-extension';
-      const extensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: extensionName,
         version: '1.0.0',
@@ -232,7 +237,9 @@ describe('update tests', () => {
       mockGit.getRemotes.mockResolvedValue([{ name: 'origin' }]);
 
       const dispatch = vi.fn();
-      const extension = extensionManager.loadExtension(extensionDir)!;
+      const extension = extensionManager
+        .loadExtensions()
+        .find((e) => e.name === extensionName)!;
       await expect(
         updateExtension(
           extension,
@@ -261,7 +268,7 @@ describe('update tests', () => {
 
   describe('checkForAllExtensionUpdates', () => {
     it('should return UpdateAvailable for a git extension with updates', async () => {
-      const extensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'test-extension',
         version: '1.0.0',
@@ -270,7 +277,6 @@ describe('update tests', () => {
           type: 'git',
         },
       });
-      const extension = extensionManager.loadExtension(extensionDir)!;
 
       mockGit.getRemotes.mockResolvedValue([
         { name: 'origin', refs: { fetch: 'https://some.git/repo' } },
@@ -280,7 +286,7 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        [extension],
+        extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -294,7 +300,7 @@ describe('update tests', () => {
     });
 
     it('should return UpToDate for a git extension with no updates', async () => {
-      const extensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'test-extension',
         version: '1.0.0',
@@ -303,7 +309,6 @@ describe('update tests', () => {
           type: 'git',
         },
       });
-      const extension = extensionManager.loadExtension(extensionDir)!;
 
       mockGit.getRemotes.mockResolvedValue([
         { name: 'origin', refs: { fetch: 'https://some.git/repo' } },
@@ -313,7 +318,7 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        [extension],
+        extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -334,16 +339,15 @@ describe('update tests', () => {
         version: '1.0.0',
       });
 
-      const installedExtensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'local-extension',
         version: '1.0.0',
         installMetadata: { source: sourceExtensionDir, type: 'local' },
       });
-      const extension = extensionManager.loadExtension(installedExtensionDir)!;
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        [extension],
+        extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -360,20 +364,19 @@ describe('update tests', () => {
       const localExtensionSourcePath = path.join(tempHomeDir, 'local-source');
       const sourceExtensionDir = createExtension({
         extensionsDir: localExtensionSourcePath,
-        name: 'my-local-ext',
+        name: 'local-extension',
         version: '1.1.0',
       });
 
-      const installedExtensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'local-extension',
         version: '1.0.0',
         installMetadata: { source: sourceExtensionDir, type: 'local' },
       });
-      const extension = extensionManager.loadExtension(installedExtensionDir)!;
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        [extension],
+        extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -387,7 +390,7 @@ describe('update tests', () => {
     });
 
     it('should return Error when git check fails', async () => {
-      const extensionDir = createExtension({
+      createExtension({
         extensionsDir: userExtensionsDir,
         name: 'error-extension',
         version: '1.0.0',
@@ -396,13 +399,12 @@ describe('update tests', () => {
           type: 'git',
         },
       });
-      const extension = extensionManager.loadExtension(extensionDir)!;
 
       mockGit.getRemotes.mockRejectedValue(new Error('Git error'));
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        [extension],
+        extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
diff --git a/packages/cli/src/config/extensions/update.ts b/packages/cli/src/config/extensions/update.ts
index 141ace88d8..40f1330bc7 100644
--- a/packages/cli/src/config/extensions/update.ts
+++ b/packages/cli/src/config/extensions/update.ts
@@ -61,20 +61,20 @@ export async function updateExtension(
     const previousExtensionConfig = await extensionManager.loadExtensionConfig(
       extension.path,
     );
-    await extensionManager.installOrUpdateExtension(
-      installMetadata,
-      previousExtensionConfig,
-    );
-    const updatedExtensionStorage = new ExtensionStorage(extension.name);
-    const updatedExtension = extensionManager.loadExtension(
-      updatedExtensionStorage.getExtensionDir(),
-    );
-    if (!updatedExtension) {
+    let updatedExtension: GeminiCLIExtension;
+    try {
+      updatedExtension = await extensionManager.installOrUpdateExtension(
+        installMetadata,
+        previousExtensionConfig,
+      );
+    } catch (e) {
       dispatchExtensionStateUpdate({
         type: 'SET_STATE',
         payload: { name: extension.name, state: ExtensionUpdateState.ERROR },
       });
-      throw new Error('Updated extension not found after installation.');
+      throw new Error(
+        `Updated extension not found after installation, got error:\n${e}`,
+      );
     }
     const updatedVersion = updatedExtension.version;
     dispatchExtensionStateUpdate({
diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts
index a0e3b5196e..78e85041f2 100644
--- a/packages/cli/src/config/settings.test.ts
+++ b/packages/cli/src/config/settings.test.ts
@@ -2433,7 +2433,7 @@ describe('Settings Loading and Merging', () => {
       const loadedSettings = loadSettings(MOCK_WORKSPACE_DIR);
       const setValueSpy = vi.spyOn(loadedSettings, 'setValue');
       const extensionManager = new ExtensionManager({
-        loadedSettings,
+        settings: loadedSettings.merged,
         workspaceDir: MOCK_WORKSPACE_DIR,
         requestConsent: vi.fn(),
         requestSetting: vi.fn(),
@@ -2506,7 +2506,7 @@ describe('Settings Loading and Merging', () => {
       const loadedSettings = loadSettings(MOCK_WORKSPACE_DIR);
       const setValueSpy = vi.spyOn(loadedSettings, 'setValue');
       const extensionManager = new ExtensionManager({
-        loadedSettings,
+        settings: loadedSettings.merged,
         workspaceDir: MOCK_WORKSPACE_DIR,
         requestConsent: vi.fn(),
         requestSetting: vi.fn(),
diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx
index 05388524f3..8aa68e72c2 100644
--- a/packages/cli/src/gemini.tsx
+++ b/packages/cli/src/gemini.tsx
@@ -67,8 +67,8 @@ import {
 } from './utils/relaunch.js';
 import { loadSandboxConfig } from './config/sandboxConfig.js';
 import { ExtensionManager } from './config/extension-manager.js';
-import { requestConsentNonInteractive } from './config/extensions/consent.js';
 import { createPolicyUpdater } from './config/policy.js';
+import { requestConsentNonInteractive } from './config/extensions/consent.js';
 
 export function validateDnsResolutionOrder(
   order: string | undefined,
@@ -230,7 +230,7 @@ export async function main() {
     // Temporary extension manager only used during this non-interactive UI phase.
     new ExtensionManager({
       workspaceDir: process.cwd(),
-      loadedSettings: settings,
+      settings: settings.merged,
       enabledExtensionOverrides: [],
       requestConsent: requestConsentNonInteractive,
       requestSetting: null,
@@ -299,7 +299,6 @@ export async function main() {
     if (sandboxConfig) {
       const partialConfig = await loadCliConfig(
         settings.merged,
-        [],
         sessionId,
         argv,
       );
@@ -370,23 +369,7 @@ export async function main() {
   // to run Gemini CLI. It is now safe to perform expensive initialization that
   // may have side effects.
   {
-    // Eventually, `extensions` should move off of `config` entirely and into
-    // the UI state instead.
-    const extensionManager = new ExtensionManager({
-      loadedSettings: settings,
-      workspaceDir: process.cwd(),
-      // At this stage, we still don't have an interactive UI.
-      requestConsent: requestConsentNonInteractive,
-      requestSetting: null,
-      enabledExtensionOverrides: argv.extensions,
-    });
-    const extensions = extensionManager.loadExtensions();
-    const config = await loadCliConfig(
-      settings.merged,
-      extensions,
-      sessionId,
-      argv,
-    );
+    const config = await loadCliConfig(settings.merged, sessionId, argv);
 
     const policyEngine = config.getPolicyEngine();
     const messageBus = config.getMessageBus();
@@ -397,7 +380,7 @@ export async function main() {
 
     if (config.getListExtensions()) {
       debugLogger.log('Installed extensions:');
-      for (const extension of extensions) {
+      for (const extension of config.getExtensions()) {
         debugLogger.log(`- ${extension.name}`);
       }
       process.exit(0);
@@ -434,7 +417,7 @@ export async function main() {
     }
 
     if (config.getExperimentalZedIntegration()) {
-      return runZedIntegration(config, settings, extensions, argv);
+      return runZedIntegration(config, settings, argv);
     }
 
     let input = config.getQuestion();
diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx
index 5864437880..0337a6bc1a 100644
--- a/packages/cli/src/ui/AppContainer.test.tsx
+++ b/packages/cli/src/ui/AppContainer.test.tsx
@@ -12,6 +12,7 @@ import {
   beforeEach,
   afterEach,
   type Mock,
+  type MockedObject,
 } from 'vitest';
 import { render, cleanup } from 'ink-testing-library';
 import { AppContainer } from './AppContainer.js';
@@ -131,11 +132,13 @@ import { useKeypress, type Key } from './hooks/useKeypress.js';
 import { measureElement } from 'ink';
 import { useTerminalSize } from './hooks/useTerminalSize.js';
 import { ShellExecutionService } from '@google/gemini-cli-core';
+import { type ExtensionManager } from '../config/extension-manager.js';
 
 describe('AppContainer State Management', () => {
   let mockConfig: Config;
   let mockSettings: LoadedSettings;
   let mockInitResult: InitializationResult;
+  let mockExtensionManager: MockedObject<ExtensionManager>;
 
   // Create typed mocks for all hooks
   const mockedUseQuotaAndFallback = useQuotaAndFallback as Mock;
@@ -282,6 +285,15 @@ describe('AppContainer State Management', () => {
     // Mock config's getTargetDir to return consistent workspace directory
     vi.spyOn(mockConfig, 'getTargetDir').mockReturnValue('/test/workspace');
 
+    mockExtensionManager = vi.mockObject({
+      getExtensions: vi.fn().mockReturnValue([]),
+      setRequestConsent: vi.fn(),
+      setRequestSetting: vi.fn(),
+    } as unknown as ExtensionManager);
+    vi.spyOn(mockConfig, 'getExtensionLoader').mockReturnValue(
+      mockExtensionManager,
+    );
+
     // Mock LoadedSettings
     mockSettings = {
       merged: {
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index a6ff6c0eeb..a81ad63280 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -98,7 +98,7 @@ import {
   useExtensionUpdates,
 } from './hooks/useExtensionUpdates.js';
 import { ShellFocusContext } from './contexts/ShellFocusContext.js';
-import { ExtensionManager } from '../config/extension-manager.js';
+import { type ExtensionManager } from '../config/extension-manager.js';
 import { requestConsentInteractive } from '../config/extensions/consent.js';
 
 const CTRL_EXIT_PROMPT_DURATION_MS = 1000;
@@ -168,21 +168,12 @@ export const AppContainer = (props: AppContainerProps) => {
     null,
   );
 
-  const extensions = config.getExtensions();
-  const [extensionManager] = useState<ExtensionManager>(
-    new ExtensionManager({
-      enabledExtensionOverrides: config.getEnabledExtensions(),
-      workspaceDir: config.getWorkingDir(),
-      requestConsent: (description) =>
-        requestConsentInteractive(
-          description,
-          addConfirmUpdateExtensionRequest,
-        ),
-      // TODO: Support requesting settings in the interactive CLI
-      requestSetting: null,
-      loadedSettings: settings,
-    }),
+  const extensionManager = config.getExtensionLoader() as ExtensionManager;
+  // We are in the interactive CLI, update how we request consent and settings.
+  extensionManager.setRequestConsent((description) =>
+    requestConsentInteractive(description, addConfirmUpdateExtensionRequest),
   );
+  extensionManager.setRequestSetting();
 
   const { addConfirmUpdateExtensionRequest, confirmUpdateExtensionRequests } =
     useConfirmUpdateRequests();
@@ -190,7 +181,7 @@ export const AppContainer = (props: AppContainerProps) => {
     extensionsUpdateState,
     extensionsUpdateStateInternal,
     dispatchExtensionStateUpdate,
-  } = useExtensionUpdates(extensions, extensionManager, historyManager.addItem);
+  } = useExtensionUpdates(extensionManager, historyManager.addItem);
 
   const [isPermissionsDialogOpen, setPermissionsDialogOpen] = useState(false);
   const openPermissionsDialog = useCallback(
@@ -548,7 +539,7 @@ Logging in with Google... Please restart Gemini CLI to continue.
           config.getDebugMode(),
           config.getFileService(),
           settings.merged,
-          config.getExtensions(),
+          config.getExtensionLoader(),
           config.isTrustedFolder(),
           settings.merged.context?.importFormat || 'tree', // Use setting or default to 'tree'
           config.getFileFilteringOptions(),
diff --git a/packages/cli/src/ui/commands/directoryCommand.tsx b/packages/cli/src/ui/commands/directoryCommand.tsx
index b174b1d8d5..ee078356c5 100644
--- a/packages/cli/src/ui/commands/directoryCommand.tsx
+++ b/packages/cli/src/ui/commands/directoryCommand.tsx
@@ -103,7 +103,7 @@ export const directoryCommand: SlashCommand = {
                 ],
                 config.getDebugMode(),
                 config.getFileService(),
-                config.getExtensions(),
+                config.getExtensionLoader(),
                 config.getFolderTrust(),
                 context.services.settings.merged.context?.importFormat ||
                   'tree', // Use setting or default to 'tree'
diff --git a/packages/cli/src/ui/commands/memoryCommand.test.ts b/packages/cli/src/ui/commands/memoryCommand.test.ts
index b1f65a8a5f..523e0be0f1 100644
--- a/packages/cli/src/ui/commands/memoryCommand.test.ts
+++ b/packages/cli/src/ui/commands/memoryCommand.test.ts
@@ -13,6 +13,7 @@ import { MessageType } from '../types.js';
 import type { LoadedSettings } from '../../config/settings.js';
 import {
   getErrorMessage,
+  SimpleExtensionLoader,
   type FileDiscoveryService,
 } from '@google/gemini-cli-core';
 import type { LoadServerHierarchicalMemoryResponse } from '@google/gemini-cli-core/index.js';
@@ -72,6 +73,7 @@ describe('memoryCommand', () => {
           config: {
             getUserMemory: mockGetUserMemory,
             getGeminiMdFileCount: mockGetGeminiMdFileCount,
+            getExtensionLoader: () => new SimpleExtensionLoader([]),
           },
         },
       });
@@ -176,6 +178,7 @@ describe('memoryCommand', () => {
         getWorkingDir: () => '/test/dir',
         getDebugMode: () => false,
         getFileService: () => ({}) as FileDiscoveryService,
+        getExtensionLoader: () => new SimpleExtensionLoader([]),
         getExtensions: () => [],
         shouldLoadMemoryFromIncludeDirectories: () => false,
         getWorkspaceContext: () => ({
diff --git a/packages/cli/src/ui/commands/memoryCommand.ts b/packages/cli/src/ui/commands/memoryCommand.ts
index 988c611291..ffe04fbe08 100644
--- a/packages/cli/src/ui/commands/memoryCommand.ts
+++ b/packages/cli/src/ui/commands/memoryCommand.ts
@@ -91,7 +91,7 @@ export const memoryCommand: SlashCommand = {
                 config.getDebugMode(),
                 config.getFileService(),
                 settings.merged,
-                config.getExtensions(),
+                config.getExtensionLoader(),
                 config.isTrustedFolder(),
                 settings.merged.context?.importFormat || 'tree',
                 config.getFileFilteringOptions(),
diff --git a/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
index 7d17a57611..be1a415538 100644
--- a/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
+++ b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
@@ -10,7 +10,7 @@ import * as os from 'node:os';
 import * as path from 'node:path';
 import { createExtension } from '../../test-utils/createExtension.js';
 import { useExtensionUpdates } from './useExtensionUpdates.js';
-import { GEMINI_DIR, type GeminiCLIExtension } from '@google/gemini-cli-core';
+import { GEMINI_DIR } from '@google/gemini-cli-core';
 import { render } from 'ink-testing-library';
 import { MessageType } from '../types.js';
 import {
@@ -57,7 +57,7 @@ describe('useExtensionUpdates', () => {
       workspaceDir: tempHomeDir,
       requestConsent: vi.fn(),
       requestSetting: vi.fn(),
-      loadedSettings: loadSettings(),
+      settings: loadSettings().merged,
     });
   });
 
@@ -66,11 +66,10 @@ describe('useExtensionUpdates', () => {
   });
 
   it('should check for updates and log a message if an update is available', async () => {
-    const extensions = [
+    vi.spyOn(extensionManager, 'getExtensions').mockReturnValue([
       {
         name: 'test-extension',
         id: 'test-extension-id',
-        type: 'git',
         version: '1.0.0',
         path: '/some/path',
         isActive: true,
@@ -81,7 +80,7 @@ describe('useExtensionUpdates', () => {
         },
         contextFiles: [],
       },
-    ];
+    ]);
     const addItem = vi.fn();
 
     vi.mocked(checkForAllExtensionUpdates).mockImplementation(
@@ -97,11 +96,7 @@ describe('useExtensionUpdates', () => {
     );
 
     function TestComponent() {
-      useExtensionUpdates(
-        extensions as GeminiCLIExtension[],
-        extensionManager,
-        addItem,
-      );
+      useExtensionUpdates(extensionManager, addItem);
       return null;
     }
 
@@ -119,7 +114,7 @@ describe('useExtensionUpdates', () => {
   });
 
   it('should check for updates and automatically update if autoUpdate is true', async () => {
-    const extensionDir = createExtension({
+    createExtension({
       extensionsDir: userExtensionsDir,
       name: 'test-extension',
       version: '1.0.0',
@@ -129,7 +124,6 @@ describe('useExtensionUpdates', () => {
         autoUpdate: true,
       },
     });
-    const extension = extensionManager.loadExtension(extensionDir)!;
 
     const addItem = vi.fn();
 
@@ -151,8 +145,9 @@ describe('useExtensionUpdates', () => {
       name: '',
     });
 
+    extensionManager.loadExtensions();
     function TestComponent() {
-      useExtensionUpdates([extension], extensionManager, addItem);
+      useExtensionUpdates(extensionManager, addItem);
       return null;
     }
 
@@ -173,7 +168,7 @@ describe('useExtensionUpdates', () => {
   });
 
   it('should batch update notifications for multiple extensions', async () => {
-    const extensionDir1 = createExtension({
+    createExtension({
       extensionsDir: userExtensionsDir,
       name: 'test-extension-1',
       version: '1.0.0',
@@ -183,7 +178,7 @@ describe('useExtensionUpdates', () => {
         autoUpdate: true,
       },
     });
-    const extensionDir2 = createExtension({
+    createExtension({
       extensionsDir: userExtensionsDir,
       name: 'test-extension-2',
       version: '2.0.0',
@@ -194,10 +189,7 @@ describe('useExtensionUpdates', () => {
       },
     });
 
-    const extensions = [
-      extensionManager.loadExtension(extensionDir1)!,
-      extensionManager.loadExtension(extensionDir2)!,
-    ];
+    extensionManager.loadExtensions();
 
     const addItem = vi.fn();
 
@@ -233,7 +225,7 @@ describe('useExtensionUpdates', () => {
       });
 
     function TestComponent() {
-      useExtensionUpdates(extensions, extensionManager, addItem);
+      useExtensionUpdates(extensionManager, addItem);
       return null;
     }
 
@@ -262,11 +254,10 @@ describe('useExtensionUpdates', () => {
   });
 
   it('should batch update notifications for multiple extensions with autoUpdate: false', async () => {
-    const extensions = [
+    vi.spyOn(extensionManager, 'getExtensions').mockReturnValue([
       {
         name: 'test-extension-1',
         id: 'test-extension-1-id',
-        type: 'git',
         version: '1.0.0',
         path: '/some/path1',
         isActive: true,
@@ -281,7 +272,6 @@ describe('useExtensionUpdates', () => {
         name: 'test-extension-2',
         id: 'test-extension-2-id',
 
-        type: 'git',
         version: '2.0.0',
         path: '/some/path2',
         isActive: true,
@@ -292,7 +282,7 @@ describe('useExtensionUpdates', () => {
         },
         contextFiles: [],
       },
-    ];
+    ]);
     const addItem = vi.fn();
 
     vi.mocked(checkForAllExtensionUpdates).mockImplementation(
@@ -318,11 +308,7 @@ describe('useExtensionUpdates', () => {
     );
 
     function TestComponent() {
-      useExtensionUpdates(
-        extensions as GeminiCLIExtension[],
-        extensionManager,
-        addItem,
-      );
+      useExtensionUpdates(extensionManager, addItem);
       return null;
     }
 
diff --git a/packages/cli/src/ui/hooks/useExtensionUpdates.ts b/packages/cli/src/ui/hooks/useExtensionUpdates.ts
index 3bad4f771b..43dc5f2e20 100644
--- a/packages/cli/src/ui/hooks/useExtensionUpdates.ts
+++ b/packages/cli/src/ui/hooks/useExtensionUpdates.ts
@@ -78,7 +78,6 @@ export const useConfirmUpdateRequests = () => {
 };
 
 export const useExtensionUpdates = (
-  extensions: GeminiCLIExtension[],
   extensionManager: ExtensionManager,
   addItem: UseHistoryManagerReturn['addItem'],
 ) => {
@@ -86,6 +85,7 @@ export const useExtensionUpdates = (
     extensionUpdatesReducer,
     initialExtensionUpdatesState,
   );
+  const extensions = extensionManager.getExtensions();
 
   useEffect(() => {
     const extensionsToCheck = extensions.filter((extension) => {
diff --git a/packages/cli/src/zed-integration/zedIntegration.ts b/packages/cli/src/zed-integration/zedIntegration.ts
index c320bbe3a9..50a20a3a05 100644
--- a/packages/cli/src/zed-integration/zedIntegration.ts
+++ b/packages/cli/src/zed-integration/zedIntegration.ts
@@ -11,7 +11,6 @@ import type {
   GeminiChat,
   ToolResult,
   ToolCallConfirmationDetails,
-  GeminiCLIExtension,
   FilterFilesOptions,
 } from '@google/gemini-cli-core';
 import {
@@ -63,7 +62,6 @@ export function resolveModel(model: string, isInFallbackMode: boolean): string {
 export async function runZedIntegration(
   config: Config,
   settings: LoadedSettings,
-  extensions: GeminiCLIExtension[],
   argv: CliArgs,
 ) {
   const stdout = Writable.toWeb(process.stdout) as WritableStream;
@@ -76,8 +74,7 @@ export async function runZedIntegration(
   console.debug = console.error;
 
   new acp.AgentSideConnection(
-    (client: acp.Client) =>
-      new GeminiAgent(config, settings, extensions, argv, client),
+    (client: acp.Client) => new GeminiAgent(config, settings, argv, client),
     stdout,
     stdin,
   );
@@ -90,7 +87,6 @@ class GeminiAgent {
   constructor(
     private config: Config,
     private settings: LoadedSettings,
-    private extensions: GeminiCLIExtension[],
     private argv: CliArgs,
     private client: acp.Client,
   ) {}
@@ -204,13 +200,7 @@ class GeminiAgent {
 
     const settings = { ...this.settings.merged, mcpServers: mergedMcpServers };
 
-    const config = await loadCliConfig(
-      settings,
-      this.extensions,
-      sessionId,
-      this.argv,
-      cwd,
-    );
+    const config = await loadCliConfig(settings, sessionId, this.argv, cwd);
 
     await config.initialize();
     return config;
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 37f5f85641..398914ae5a 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -154,6 +154,10 @@ import {
   DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
 } from './constants.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import {
+  type ExtensionLoader,
+  SimpleExtensionLoader,
+} from '../utils/extensionLoader.js';
 
 export type { FileFilteringOptions };
 export {
@@ -248,7 +252,7 @@ export interface ConfigParameters {
   maxSessionTurns?: number;
   experimentalZedIntegration?: boolean;
   listExtensions?: boolean;
-  extensions?: GeminiCLIExtension[];
+  extensionLoader?: ExtensionLoader;
   enabledExtensions?: string[];
   blockedMcpServers?: Array<{ name: string; extensionName: string }>;
   noBrowser?: boolean;
@@ -337,7 +341,7 @@ export class Config {
   private inFallbackMode = false;
   private readonly maxSessionTurns: number;
   private readonly listExtensions: boolean;
-  private readonly _extensions: GeminiCLIExtension[];
+  private readonly _extensionLoader: ExtensionLoader;
   private readonly _enabledExtensions: string[];
   private readonly _blockedMcpServers: Array<{
     name: string;
@@ -440,7 +444,8 @@ export class Config {
     this.experimentalZedIntegration =
       params.experimentalZedIntegration ?? false;
     this.listExtensions = params.listExtensions ?? false;
-    this._extensions = params.extensions ?? [];
+    this._extensionLoader =
+      params.extensionLoader ?? new SimpleExtensionLoader([]);
     this._enabledExtensions = params.enabledExtensions ?? [];
     this._blockedMcpServers = params.blockedMcpServers ?? [];
     this.noBrowser = params.noBrowser ?? false;
@@ -885,7 +890,11 @@ export class Config {
   }
 
   getExtensions(): GeminiCLIExtension[] {
-    return this._extensions;
+    return this._extensionLoader.getExtensions();
+  }
+
+  getExtensionLoader(): ExtensionLoader {
+    return this._extensionLoader;
   }
 
   // The list of explicitly enabled extensions, if any were given, may contain
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index bc2eab2147..8754c23bfe 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -66,6 +66,7 @@ export * from './utils/promptIdContext.js';
 export * from './utils/thoughtUtils.js';
 export * from './utils/debugLogger.js';
 export * from './utils/events.js';
+export * from './utils/extensionLoader.js';
 
 // Export services
 export * from './services/fileDiscoveryService.js';
diff --git a/packages/core/src/utils/extensionLoader.ts b/packages/core/src/utils/extensionLoader.ts
new file mode 100644
index 0000000000..d42fcf6084
--- /dev/null
+++ b/packages/core/src/utils/extensionLoader.ts
@@ -0,0 +1,48 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { EventEmitter } from 'node:events';
+import type { GeminiCLIExtension } from '../config/config.js';
+
+export interface ExtensionLoader {
+  getExtensions(): GeminiCLIExtension[];
+
+  extensionEvents(): EventEmitter<ExtensionEvents>;
+}
+
+export interface ExtensionEvents {
+  extensionEnabled: ExtensionEnableEvent[];
+  extensionDisabled: ExtensionDisableEvent[];
+  extensionLoaded: ExtensionLoadEvent[];
+  extensionUnloaded: ExtensionUnloadEvent[];
+  extensionInstalled: ExtensionInstallEvent[];
+  extensionUninstalled: ExtensionUninstallEvent[];
+  extensionUpdated: ExtensionUpdateEvent[];
+}
+
+interface BaseExtensionEvent {
+  extension: GeminiCLIExtension;
+}
+export type ExtensionDisableEvent = BaseExtensionEvent;
+export type ExtensionEnableEvent = BaseExtensionEvent;
+export type ExtensionInstallEvent = BaseExtensionEvent;
+export type ExtensionLoadEvent = BaseExtensionEvent;
+export type ExtensionUnloadEvent = BaseExtensionEvent;
+export type ExtensionUninstallEvent = BaseExtensionEvent;
+export type ExtensionUpdateEvent = BaseExtensionEvent;
+
+export class SimpleExtensionLoader implements ExtensionLoader {
+  private _eventEmitter = new EventEmitter<ExtensionEvents>();
+  constructor(private readonly extensions: GeminiCLIExtension[]) {}
+
+  extensionEvents(): EventEmitter<ExtensionEvents> {
+    return this._eventEmitter;
+  }
+
+  getExtensions(): GeminiCLIExtension[] {
+    return this.extensions;
+  }
+}
diff --git a/packages/core/src/utils/memoryDiscovery.test.ts b/packages/core/src/utils/memoryDiscovery.test.ts
index 6d7d4da971..5f8c4b534c 100644
--- a/packages/core/src/utils/memoryDiscovery.test.ts
+++ b/packages/core/src/utils/memoryDiscovery.test.ts
@@ -16,6 +16,7 @@ import {
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { GEMINI_DIR } from './paths.js';
 import type { GeminiCLIExtension } from '../config/config.js';
+import { SimpleExtensionLoader } from './extensionLoader.js';
 
 vi.mock('os', async (importOriginal) => {
   const actualOs = await importOriginal<typeof os>();
@@ -88,7 +89,7 @@ describe('loadServerHierarchicalMemory', () => {
         [],
         false,
         new FileDiscoveryService(projectRoot),
-        [], // extensions
+        new SimpleExtensionLoader([]),
         false, // untrusted
       );
 
@@ -117,7 +118,7 @@ describe('loadServerHierarchicalMemory', () => {
           [],
           false,
           new FileDiscoveryService(projectRoot),
-          [], // extensions
+          new SimpleExtensionLoader([]),
           false, // untrusted
         );
 
@@ -133,7 +134,7 @@ describe('loadServerHierarchicalMemory', () => {
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -155,7 +156,7 @@ describe('loadServerHierarchicalMemory', () => {
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -182,7 +183,7 @@ default context content
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -213,7 +214,7 @@ custom context content
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -248,7 +249,7 @@ cwd context content
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -280,7 +281,7 @@ Subdir custom memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -312,7 +313,7 @@ Src directory memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -356,7 +357,7 @@ Subdir memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -409,7 +410,7 @@ Subdir memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
       'tree',
       {
@@ -445,7 +446,7 @@ My code memory
       [],
       true,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
       'tree', // importFormat
       {
@@ -467,7 +468,7 @@ My code memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -489,12 +490,12 @@ My code memory
       [],
       false,
       new FileDiscoveryService(projectRoot),
-      [
+      new SimpleExtensionLoader([
         {
           contextFiles: [extensionFilePath],
           isActive: true,
         } as GeminiCLIExtension,
-      ], // extensions
+      ]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -521,7 +522,7 @@ Extension memory content
       [includedDir],
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -556,7 +557,7 @@ included directory memory
       createdFiles.map((f) => path.dirname(f)),
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
@@ -591,7 +592,7 @@ included directory memory
       [childDir, parentDir], // Deliberately include duplicates
       false,
       new FileDiscoveryService(projectRoot),
-      [], // extensions
+      new SimpleExtensionLoader([]),
       DEFAULT_FOLDER_TRUST,
     );
 
diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts
index 8d77f0bad4..212dd7f935 100644
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -15,7 +15,7 @@ import { processImports } from './memoryImportProcessor.js';
 import type { FileFilteringOptions } from '../config/constants.js';
 import { DEFAULT_MEMORY_FILE_FILTERING_OPTIONS } from '../config/constants.js';
 import { GEMINI_DIR } from './paths.js';
-import type { GeminiCLIExtension } from '../config/config.js';
+import type { ExtensionLoader } from './extensionLoader.js';
 import { debugLogger } from './debugLogger.js';
 
 // Simple console logger, similar to the one previously in CLI's config.ts
@@ -338,7 +338,7 @@ export async function loadServerHierarchicalMemory(
   includeDirectoriesToReadGemini: readonly string[],
   debugMode: boolean,
   fileService: FileDiscoveryService,
-  extensions: GeminiCLIExtension[],
+  extensionLoader: ExtensionLoader,
   folderTrust: boolean,
   importFormat: 'flat' | 'tree' = 'tree',
   fileFilteringOptions?: FileFilteringOptions,
@@ -365,7 +365,8 @@ export async function loadServerHierarchicalMemory(
 
   // Add extension file paths separately since they may be conditionally enabled.
   filePaths.push(
-    ...extensions
+    ...extensionLoader
+      .getExtensions()
       .filter((ext) => ext.isActive)
       .flatMap((ext) => ext.contextFiles),
   );

From 064edc52f528f61760e06919850a4055a872613f Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Tue, 28 Oct 2025 09:20:57 -0700
Subject: [PATCH 55/73] feat(policy): Introduce config-based policy engine with
 TOML configuration (#11992)

---
 packages/cli/src/config/config.ts             |   29 +-
 .../cli/src/config/policies/read-only.toml    |   56 +
 packages/cli/src/config/policies/write.toml   |   63 +
 packages/cli/src/config/policies/yolo.toml    |   31 +
 .../config/policy-engine.integration.test.ts  |  117 +-
 .../cli/src/config/policy-toml-loader.test.ts |  982 ++++++++++++
 packages/cli/src/config/policy-toml-loader.ts |  394 +++++
 packages/cli/src/config/policy.test.ts        | 1322 +++++++++++++++--
 packages/cli/src/config/policy.ts             |  288 ++--
 packages/cli/src/ui/AppContainer.tsx          |   15 +-
 packages/core/src/config/config.ts            |    5 +
 packages/core/src/config/storage.ts           |    4 +
 packages/core/src/policy/policy-engine.ts     |   11 +
 packages/core/src/tools/mcp-client.test.ts    |   10 +-
 packages/core/src/tools/mcp-client.ts         |   40 +-
 packages/core/src/tools/mcp-tool.ts           |   15 +-
 packages/core/src/tools/shell.ts              |    8 +-
 packages/core/src/tools/tool-registry.ts      |    9 +
 packages/core/src/tools/write-todos.ts        |   16 +-
 scripts/copy_files.js                         |    2 +-
 20 files changed, 3146 insertions(+), 271 deletions(-)
 create mode 100644 packages/cli/src/config/policies/read-only.toml
 create mode 100644 packages/cli/src/config/policies/write.toml
 create mode 100644 packages/cli/src/config/policies/yolo.toml
 create mode 100644 packages/cli/src/config/policy-toml-loader.test.ts
 create mode 100644 packages/cli/src/config/policy-toml-loader.ts

diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 76ee5e8a21..2a102f78bc 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -507,7 +507,31 @@ export async function loadCliConfig(
     throw err;
   }
 
-  const policyEngineConfig = createPolicyEngineConfig(settings, approvalMode);
+  const policyEngineConfig = await createPolicyEngineConfig(
+    settings,
+    approvalMode,
+  );
+
+  // Debug: Log the merged policy configuration
+  // Only log when message bus integration is enabled (when policies are active)
+  const enableMessageBusIntegration =
+    settings.tools?.enableMessageBusIntegration ?? false;
+  if (enableMessageBusIntegration) {
+    debugLogger.debug('=== Policy Engine Configuration ===');
+    debugLogger.debug(
+      `Default decision: ${policyEngineConfig.defaultDecision}`,
+    );
+    debugLogger.debug(`Total rules: ${policyEngineConfig.rules?.length || 0}`);
+    if (policyEngineConfig.rules && policyEngineConfig.rules.length > 0) {
+      debugLogger.debug('Rules (sorted by priority):');
+      policyEngineConfig.rules.forEach((rule, index) => {
+        debugLogger.debug(
+          `  [${index}] toolName: ${rule.toolName || '*'}, decision: ${rule.decision}, priority: ${rule.priority}, argsPattern: ${rule.argsPattern ? rule.argsPattern.source : 'none'}`,
+        );
+      });
+    }
+    debugLogger.debug('===================================');
+  }
 
   const allowedTools = argv.allowedTools || settings.tools?.allowed || [];
   const allowedToolsSet = new Set(allowedTools);
@@ -672,8 +696,7 @@ export async function loadCliConfig(
       format: (argv.outputFormat ?? settings.output?.format) as OutputFormat,
     },
     useModelRouter,
-    enableMessageBusIntegration:
-      settings.tools?.enableMessageBusIntegration ?? false,
+    enableMessageBusIntegration,
     codebaseInvestigatorSettings:
       settings.experimental?.codebaseInvestigatorSettings,
     fakeResponses: argv.fakeResponses,
diff --git a/packages/cli/src/config/policies/read-only.toml b/packages/cli/src/config/policies/read-only.toml
new file mode 100644
index 0000000000..0c36faf003
--- /dev/null
+++ b/packages/cli/src/config/policies/read-only.toml
@@ -0,0 +1,56 @@
+# Priority system for policy rules:
+# - Higher priority numbers win over lower priority numbers
+# - When multiple rules match, the highest priority rule is applied
+# - Rules are evaluated in order of priority (highest first)
+#
+# Priority bands (tiers):
+# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+#
+# This ensures Admin > User > Default hierarchy is always preserved,
+# while allowing user-specified priorities to work within each tier.
+#
+# Settings-based and dynamic rules (all in user tier 2.x):
+#   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   2.9:  MCP servers excluded list (security: persistent server blocks)
+#   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   2.2:  MCP servers with trust=true (persistent trusted servers)
+#   2.1:  MCP servers allowed list (persistent general server allows)
+#
+# TOML policy priorities (before transformation):
+#   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+#   15: Auto-edit tool override (becomes 1.015 in default tier)
+#   50: Read-only tools (becomes 1.050 in default tier)
+#   999: YOLO mode allow-all (becomes 1.999 in default tier)
+
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 50
+
+[[rule]]
+toolName = "search_file_content"
+decision = "allow"
+priority = 50
+
+[[rule]]
+toolName = "list_directory"
+decision = "allow"
+priority = 50
+
+[[rule]]
+toolName = "read_file"
+decision = "allow"
+priority = 50
+
+[[rule]]
+toolName = "read_many_files"
+decision = "allow"
+priority = 50
+
+[[rule]]
+toolName = "google_web_search"
+decision = "allow"
+priority = 50
diff --git a/packages/cli/src/config/policies/write.toml b/packages/cli/src/config/policies/write.toml
new file mode 100644
index 0000000000..8e4c1ae70e
--- /dev/null
+++ b/packages/cli/src/config/policies/write.toml
@@ -0,0 +1,63 @@
+# Priority system for policy rules:
+# - Higher priority numbers win over lower priority numbers
+# - When multiple rules match, the highest priority rule is applied
+# - Rules are evaluated in order of priority (highest first)
+#
+# Priority bands (tiers):
+# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+#
+# This ensures Admin > User > Default hierarchy is always preserved,
+# while allowing user-specified priorities to work within each tier.
+#
+# Settings-based and dynamic rules (all in user tier 2.x):
+#   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   2.9:  MCP servers excluded list (security: persistent server blocks)
+#   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   2.2:  MCP servers with trust=true (persistent trusted servers)
+#   2.1:  MCP servers allowed list (persistent general server allows)
+#
+# TOML policy priorities (before transformation):
+#   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+#   15: Auto-edit tool override (becomes 1.015 in default tier)
+#   50: Read-only tools (becomes 1.050 in default tier)
+#   999: YOLO mode allow-all (becomes 1.999 in default tier)
+
+[[rule]]
+toolName = "replace"
+decision = "ask_user"
+priority = 10
+
+[[rule]]
+toolName = "replace"
+decision = "allow"
+priority = 15
+modes = ["autoEdit"]
+
+[[rule]]
+toolName = "save_memory"
+decision = "ask_user"
+priority = 10
+
+[[rule]]
+toolName = "run_shell_command"
+decision = "ask_user"
+priority = 10
+
+[[rule]]
+toolName = "write_file"
+decision = "ask_user"
+priority = 10
+
+[[rule]]
+toolName = "write_file"
+decision = "allow"
+priority = 15
+modes = ["autoEdit"]
+
+[[rule]]
+toolName = "web_fetch"
+decision = "ask_user"
+priority = 10
diff --git a/packages/cli/src/config/policies/yolo.toml b/packages/cli/src/config/policies/yolo.toml
new file mode 100644
index 0000000000..0c5f9e9221
--- /dev/null
+++ b/packages/cli/src/config/policies/yolo.toml
@@ -0,0 +1,31 @@
+# Priority system for policy rules:
+# - Higher priority numbers win over lower priority numbers
+# - When multiple rules match, the highest priority rule is applied
+# - Rules are evaluated in order of priority (highest first)
+#
+# Priority bands (tiers):
+# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+#
+# This ensures Admin > User > Default hierarchy is always preserved,
+# while allowing user-specified priorities to work within each tier.
+#
+# Settings-based and dynamic rules (all in user tier 2.x):
+#   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   2.9:  MCP servers excluded list (security: persistent server blocks)
+#   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   2.2:  MCP servers with trust=true (persistent trusted servers)
+#   2.1:  MCP servers allowed list (persistent general server allows)
+#
+# TOML policy priorities (before transformation):
+#   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+#   15: Auto-edit tool override (becomes 1.015 in default tier)
+#   50: Read-only tools (becomes 1.050 in default tier)
+#   999: YOLO mode allow-all (becomes 1.999 in default tier)
+
+[[rule]]
+decision = "allow"
+priority = 999
+modes = ["yolo"]
diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts
index 3b19121d5f..9b8457bc33 100644
--- a/packages/cli/src/config/policy-engine.integration.test.ts
+++ b/packages/cli/src/config/policy-engine.integration.test.ts
@@ -15,7 +15,7 @@ import type { Settings } from './settings.js';
 
 describe('Policy Engine Integration Tests', () => {
   describe('Policy configuration produces valid PolicyEngine config', () => {
-    it('should create a working PolicyEngine from basic settings', () => {
+    it('should create a working PolicyEngine from basic settings', async () => {
       const settings: Settings = {
         tools: {
           allowed: ['run_shell_command'],
@@ -23,7 +23,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Allowed tool should be allowed
@@ -43,7 +46,7 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should handle MCP server wildcard patterns correctly', () => {
+    it('should handle MCP server wildcard patterns correctly', async () => {
       const settings: Settings = {
         mcp: {
           allowed: ['allowed-server'],
@@ -58,7 +61,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Tools from allowed server should be allowed
@@ -91,7 +97,7 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should correctly prioritize specific tool rules over MCP server wildcards', () => {
+    it('should correctly prioritize specific tool excludes over MCP server wildcards', async () => {
       const settings: Settings = {
         mcp: {
           allowed: ['my-server'],
@@ -101,19 +107,23 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
-      // Server is allowed, but specific tool is excluded
+      // MCP server allowed (priority 2.1) provides general allow for server
       expect(engine.check({ name: 'my-server__safe-tool' })).toBe(
         PolicyDecision.ALLOW,
       );
+      // But specific tool exclude (priority 2.4) wins over server allow
       expect(engine.check({ name: 'my-server__dangerous-tool' })).toBe(
         PolicyDecision.DENY,
       );
     });
 
-    it('should handle complex mixed configurations', () => {
+    it('should handle complex mixed configurations', async () => {
       const settings: Settings = {
         tools: {
           autoAccept: true, // Allows read-only tools
@@ -133,7 +143,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Read-only tools should be allowed (autoAccept)
@@ -171,14 +184,17 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should handle YOLO mode correctly', () => {
+    it('should handle YOLO mode correctly', async () => {
       const settings: Settings = {
         tools: {
           exclude: ['dangerous-tool'], // Even in YOLO, excludes should be respected
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.YOLO);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.YOLO,
+      );
       const engine = new PolicyEngine(config);
 
       // Most tools should be allowed in YOLO mode
@@ -194,25 +210,26 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should handle AUTO_EDIT mode correctly', () => {
+    it('should handle AUTO_EDIT mode correctly', async () => {
       const settings: Settings = {};
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.AUTO_EDIT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.AUTO_EDIT,
+      );
       const engine = new PolicyEngine(config);
 
-      // Edit tool should be allowed (EditTool.Name = 'replace')
+      // Edit tools should be allowed in AUTO_EDIT mode
       expect(engine.check({ name: 'replace' })).toBe(PolicyDecision.ALLOW);
+      expect(engine.check({ name: 'write_file' })).toBe(PolicyDecision.ALLOW);
 
       // Other tools should follow normal rules
       expect(engine.check({ name: 'run_shell_command' })).toBe(
         PolicyDecision.ASK_USER,
       );
-      expect(engine.check({ name: 'write_file' })).toBe(
-        PolicyDecision.ASK_USER,
-      );
     });
 
-    it('should verify priority ordering works correctly in practice', () => {
+    it('should verify priority ordering works correctly in practice', async () => {
       const settings: Settings = {
         tools: {
           autoAccept: true, // Priority 50
@@ -232,7 +249,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Test that priorities are applied correctly
@@ -240,28 +260,29 @@ describe('Policy Engine Integration Tests', () => {
 
       // Find rules and verify their priorities
       const blockedToolRule = rules.find((r) => r.toolName === 'blocked-tool');
-      expect(blockedToolRule?.priority).toBe(200);
+      expect(blockedToolRule?.priority).toBe(2.4); // Command line exclude
 
       const blockedServerRule = rules.find(
         (r) => r.toolName === 'blocked-server__*',
       );
-      expect(blockedServerRule?.priority).toBe(195);
+      expect(blockedServerRule?.priority).toBe(2.9); // MCP server exclude
 
       const specificToolRule = rules.find(
         (r) => r.toolName === 'specific-tool',
       );
-      expect(specificToolRule?.priority).toBe(100);
+      expect(specificToolRule?.priority).toBe(2.3); // Command line allow
 
       const trustedServerRule = rules.find(
         (r) => r.toolName === 'trusted-server__*',
       );
-      expect(trustedServerRule?.priority).toBe(90);
+      expect(trustedServerRule?.priority).toBe(2.2); // MCP trusted server
 
       const mcpServerRule = rules.find((r) => r.toolName === 'mcp-server__*');
-      expect(mcpServerRule?.priority).toBe(85);
+      expect(mcpServerRule?.priority).toBe(2.1); // MCP allowed server
 
       const readOnlyToolRule = rules.find((r) => r.toolName === 'glob');
-      expect(readOnlyToolRule?.priority).toBe(50);
+      // Priority 50 in default tier → 1.05
+      expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5);
 
       // Verify the engine applies these priorities correctly
       expect(engine.check({ name: 'blocked-tool' })).toBe(PolicyDecision.DENY);
@@ -280,7 +301,7 @@ describe('Policy Engine Integration Tests', () => {
       expect(engine.check({ name: 'glob' })).toBe(PolicyDecision.ALLOW);
     });
 
-    it('should handle edge case: MCP server with both trust and exclusion', () => {
+    it('should handle edge case: MCP server with both trust and exclusion', async () => {
       const settings: Settings = {
         mcpServers: {
           'conflicted-server': {
@@ -294,7 +315,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Exclusion (195) should win over trust (90)
@@ -303,7 +327,7 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should handle edge case: specific tool allowed but server excluded', () => {
+    it('should handle edge case: specific tool allowed but server excluded', async () => {
       const settings: Settings = {
         mcp: {
           excluded: ['my-server'], // Priority 195 - DENY
@@ -313,7 +337,10 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Server exclusion (195) wins over specific tool allow (100)
@@ -326,10 +353,13 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should verify non-interactive mode transformation', () => {
+    it('should verify non-interactive mode transformation', async () => {
       const settings: Settings = {};
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       // Enable non-interactive mode
       const engineConfig = { ...config, nonInteractive: true };
       const engine = new PolicyEngine(engineConfig);
@@ -341,10 +371,13 @@ describe('Policy Engine Integration Tests', () => {
       );
     });
 
-    it('should handle empty settings gracefully', () => {
+    it('should handle empty settings gracefully', async () => {
       const settings: Settings = {};
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const engine = new PolicyEngine(config);
 
       // Should have default rules for write tools
@@ -357,7 +390,7 @@ describe('Policy Engine Integration Tests', () => {
       expect(engine.check({ name: 'unknown' })).toBe(PolicyDecision.ASK_USER);
     });
 
-    it('should verify rules are created with correct priorities', () => {
+    it('should verify rules are created with correct priorities', async () => {
       const settings: Settings = {
         tools: {
           autoAccept: true,
@@ -370,24 +403,28 @@ describe('Policy Engine Integration Tests', () => {
         },
       };
 
-      const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.DEFAULT,
+      );
       const rules = config.rules || [];
 
       // Verify each rule has the expected priority
       const tool3Rule = rules.find((r) => r.toolName === 'tool3');
-      expect(tool3Rule?.priority).toBe(200); // Excluded tools
+      expect(tool3Rule?.priority).toBe(2.4); // Excluded tools (user tier)
 
       const server2Rule = rules.find((r) => r.toolName === 'server2__*');
-      expect(server2Rule?.priority).toBe(195); // Excluded servers
+      expect(server2Rule?.priority).toBe(2.9); // Excluded servers (user tier)
 
       const tool1Rule = rules.find((r) => r.toolName === 'tool1');
-      expect(tool1Rule?.priority).toBe(100); // Allowed tools
+      expect(tool1Rule?.priority).toBe(2.3); // Allowed tools (user tier)
 
       const server1Rule = rules.find((r) => r.toolName === 'server1__*');
-      expect(server1Rule?.priority).toBe(85); // Allowed servers
+      expect(server1Rule?.priority).toBe(2.1); // Allowed servers (user tier)
 
       const globRule = rules.find((r) => r.toolName === 'glob');
-      expect(globRule?.priority).toBe(50); // Auto-accept read-only
+      // Priority 50 in default tier → 1.05
+      expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only
 
       // The PolicyEngine will sort these by priority when it's created
       const engine = new PolicyEngine(config);
diff --git a/packages/cli/src/config/policy-toml-loader.test.ts b/packages/cli/src/config/policy-toml-loader.test.ts
new file mode 100644
index 0000000000..e05996b16c
--- /dev/null
+++ b/packages/cli/src/config/policy-toml-loader.test.ts
@@ -0,0 +1,982 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { ApprovalMode, PolicyDecision } from '@google/gemini-cli-core';
+import type { Dirent } from 'node:fs';
+import nodePath from 'node:path';
+
+describe('policy-toml-loader', () => {
+  beforeEach(() => {
+    vi.resetModules();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.doUnmock('node:fs/promises');
+  });
+
+  describe('loadPoliciesFromToml', () => {
+    it('should load and parse a simple policy file', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'test.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'test.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(1);
+      expect(result.rules[0]).toEqual({
+        toolName: 'glob',
+        decision: PolicyDecision.ALLOW,
+        priority: 1.1, // tier 1 + 100/1000
+      });
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should expand commandPrefix array to multiple rules', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'shell.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = ["git status", "git log"]
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 2;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(2);
+      expect(result.rules[0].toolName).toBe('run_shell_command');
+      expect(result.rules[1].toolName).toBe('run_shell_command');
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git status"}'),
+      ).toBe(true);
+      expect(result.rules[1].argsPattern?.test('{"command":"git log"}')).toBe(
+        true,
+      );
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should transform commandRegex to argsPattern', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'shell.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandRegex = "git (status|log).*"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 2;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(1);
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git status"}'),
+      ).toBe(true);
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git log --all"}'),
+      ).toBe(true);
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git branch"}'),
+      ).toBe(false);
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should expand toolName array', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'tools.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'tools.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = ["glob", "grep", "read"]
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(3);
+      expect(result.rules.map((r) => r.toolName)).toEqual([
+        'glob',
+        'grep',
+        'read',
+      ]);
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should transform mcpName to composite toolName', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'mcp.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'mcp.toml'))
+        ) {
+          return `
+[[rule]]
+mcpName = "google-workspace"
+toolName = ["calendar.list", "calendar.get"]
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 2;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(2);
+      expect(result.rules[0].toolName).toBe('google-workspace__calendar.list');
+      expect(result.rules[1].toolName).toBe('google-workspace__calendar.get');
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should filter rules by mode', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'modes.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'modes.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 100
+modes = ["default", "yolo"]
+
+[[rule]]
+toolName = "grep"
+decision = "allow"
+priority = 100
+modes = ["yolo"]
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      // Only the first rule should be included (modes includes "default")
+      expect(result.rules).toHaveLength(1);
+      expect(result.rules[0].toolName).toBe('glob');
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should handle TOML parse errors', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]
+toolName = "glob"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('toml_parse');
+      expect(result.errors[0].fileName).toBe('invalid.toml');
+    });
+
+    it('should handle schema validation errors', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('schema_validation');
+      expect(result.errors[0].details).toContain('decision');
+    });
+
+    it('should reject commandPrefix without run_shell_command', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+commandPrefix = "git status"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('rule_validation');
+      expect(result.errors[0].details).toContain('run_shell_command');
+    });
+
+    it('should reject commandPrefix + argsPattern combination', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = "git status"
+argsPattern = "test"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('rule_validation');
+      expect(result.errors[0].details).toContain('mutually exclusive');
+    });
+
+    it('should handle invalid regex patterns', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandRegex = "git (status|branch"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('regex_compilation');
+      expect(result.errors[0].details).toContain('git (status|branch');
+    });
+
+    it('should escape regex special characters in commandPrefix', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'shell.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = "git log *.txt"
+decision = "allow"
+priority = 100
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(1);
+      // Should match literal asterisk, not wildcard
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git log *.txt"}'),
+      ).toBe(true);
+      expect(
+        result.rules[0].argsPattern?.test('{"command":"git log a.txt"}'),
+      ).toBe(false);
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should handle non-existent directory gracefully', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(async (_path: string): Promise<Dirent[]> => {
+        const error: NodeJS.ErrnoException = new Error('ENOENT');
+        error.code = 'ENOENT';
+        throw error;
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readdir: mockReaddir },
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/non-existent'],
+        getPolicyTier,
+      );
+
+      // Should not error for missing directories
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('should reject priority >= 1000 with helpful error message', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 1000
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('schema_validation');
+      expect(result.errors[0].details).toContain('priority');
+      expect(result.errors[0].details).toContain('tier overflow');
+      expect(result.errors[0].details).toContain(
+        'Priorities >= 1000 would jump to the next tier',
+      );
+      expect(result.errors[0].details).toContain('<= 999');
+    });
+
+    it('should reject negative priority with helpful error message', async () => {
+      const actualFs =
+        await vi.importActual<typeof import('node:fs/promises')>(
+          'node:fs/promises',
+        );
+
+      const mockReaddir = vi.fn(
+        async (
+          path: string,
+          _options?: { withFileTypes: boolean },
+        ): Promise<Dirent[]> => {
+          if (nodePath.normalize(path) === nodePath.normalize('/policies')) {
+            return [
+              {
+                name: 'invalid.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              } as Dirent,
+            ];
+          }
+          return [];
+        },
+      );
+
+      const mockReadFile = vi.fn(async (path: string): Promise<string> => {
+        if (
+          nodePath.normalize(path) ===
+          nodePath.normalize(nodePath.join('/policies', 'invalid.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = -1
+`;
+        }
+        throw new Error('File not found');
+      });
+
+      vi.doMock('node:fs/promises', () => ({
+        ...actualFs,
+        default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+        readFile: mockReadFile,
+        readdir: mockReaddir,
+      }));
+
+      const { loadPoliciesFromToml: load } = await import(
+        './policy-toml-loader.js'
+      );
+
+      const getPolicyTier = (_dir: string) => 1;
+      const result = await load(
+        ApprovalMode.DEFAULT,
+        ['/policies'],
+        getPolicyTier,
+      );
+
+      expect(result.rules).toHaveLength(0);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].errorType).toBe('schema_validation');
+      expect(result.errors[0].details).toContain('priority');
+      expect(result.errors[0].details).toContain('>= 0');
+      expect(result.errors[0].details).toContain('must be >= 0');
+    });
+  });
+});
diff --git a/packages/cli/src/config/policy-toml-loader.ts b/packages/cli/src/config/policy-toml-loader.ts
new file mode 100644
index 0000000000..fb5a7d1253
--- /dev/null
+++ b/packages/cli/src/config/policy-toml-loader.ts
@@ -0,0 +1,394 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  type PolicyRule,
+  PolicyDecision,
+  type ApprovalMode,
+} from '@google/gemini-cli-core';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import toml from '@iarna/toml';
+import { z, type ZodError } from 'zod';
+
+/**
+ * Schema for a single policy rule in the TOML file (before transformation).
+ */
+const PolicyRuleSchema = z.object({
+  toolName: z.union([z.string(), z.array(z.string())]).optional(),
+  mcpName: z.string().optional(),
+  argsPattern: z.string().optional(),
+  commandPrefix: z.union([z.string(), z.array(z.string())]).optional(),
+  commandRegex: z.string().optional(),
+  decision: z.nativeEnum(PolicyDecision),
+  // Priority must be in range [0, 999] to prevent tier overflow.
+  // With tier transformation (tier + priority/1000), this ensures:
+  // - Tier 1 (default): range [1.000, 1.999]
+  // - Tier 2 (user): range [2.000, 2.999]
+  // - Tier 3 (admin): range [3.000, 3.999]
+  priority: z
+    .number({
+      required_error: 'priority is required',
+      invalid_type_error: 'priority must be a number',
+    })
+    .int({ message: 'priority must be an integer' })
+    .min(0, { message: 'priority must be >= 0' })
+    .max(999, {
+      message:
+        'priority must be <= 999 to prevent tier overflow. Priorities >= 1000 would jump to the next tier.',
+    }),
+  modes: z.array(z.string()).optional(),
+});
+
+/**
+ * Schema for the entire policy TOML file.
+ */
+const PolicyFileSchema = z.object({
+  rule: z.array(PolicyRuleSchema),
+});
+
+/**
+ * Type for a raw policy rule from TOML (before transformation).
+ */
+type PolicyRuleToml = z.infer<typeof PolicyRuleSchema>;
+
+/**
+ * Types of errors that can occur while loading policy files.
+ */
+export type PolicyFileErrorType =
+  | 'file_read'
+  | 'toml_parse'
+  | 'schema_validation'
+  | 'rule_validation'
+  | 'regex_compilation';
+
+/**
+ * Detailed error information for policy file loading failures.
+ */
+export interface PolicyFileError {
+  filePath: string;
+  fileName: string;
+  tier: 'default' | 'user' | 'admin';
+  ruleIndex?: number;
+  errorType: PolicyFileErrorType;
+  message: string;
+  details?: string;
+  suggestion?: string;
+}
+
+/**
+ * Result of loading policies from TOML files.
+ */
+export interface PolicyLoadResult {
+  rules: PolicyRule[];
+  errors: PolicyFileError[];
+}
+
+/**
+ * Escapes special regex characters in a string for use in a regex pattern.
+ * This is used for commandPrefix to ensure literal string matching.
+ *
+ * @param str The string to escape
+ * @returns The escaped string safe for use in a regex
+ */
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+/**
+ * Converts a tier number to a human-readable tier name.
+ */
+function getTierName(tier: number): 'default' | 'user' | 'admin' {
+  if (tier === 1) return 'default';
+  if (tier === 2) return 'user';
+  if (tier === 3) return 'admin';
+  return 'default';
+}
+
+/**
+ * Formats a Zod validation error into a readable error message.
+ */
+function formatSchemaError(error: ZodError, ruleIndex: number): string {
+  const issues = error.issues
+    .map((issue) => {
+      const path = issue.path.join('.');
+      return `  - Field "${path}": ${issue.message}`;
+    })
+    .join('\n');
+  return `Invalid policy rule (rule #${ruleIndex + 1}):\n${issues}`;
+}
+
+/**
+ * Validates shell command convenience syntax rules.
+ * Returns an error message if invalid, or null if valid.
+ */
+function validateShellCommandSyntax(
+  rule: PolicyRuleToml,
+  ruleIndex: number,
+): string | null {
+  const hasCommandPrefix = rule.commandPrefix !== undefined;
+  const hasCommandRegex = rule.commandRegex !== undefined;
+  const hasArgsPattern = rule.argsPattern !== undefined;
+
+  if (hasCommandPrefix || hasCommandRegex) {
+    // Must have exactly toolName = "run_shell_command"
+    if (rule.toolName !== 'run_shell_command' || Array.isArray(rule.toolName)) {
+      return (
+        `Rule #${ruleIndex + 1}: commandPrefix and commandRegex can only be used with toolName = "run_shell_command"\n` +
+        `  Found: toolName = ${JSON.stringify(rule.toolName)}\n` +
+        `  Fix: Set toolName = "run_shell_command" (not an array)`
+      );
+    }
+
+    // Can't combine with argsPattern
+    if (hasArgsPattern) {
+      return (
+        `Rule #${ruleIndex + 1}: cannot use both commandPrefix/commandRegex and argsPattern\n` +
+        `  These fields are mutually exclusive\n` +
+        `  Fix: Use either commandPrefix/commandRegex OR argsPattern, not both`
+      );
+    }
+
+    // Can't use both commandPrefix and commandRegex
+    if (hasCommandPrefix && hasCommandRegex) {
+      return (
+        `Rule #${ruleIndex + 1}: cannot use both commandPrefix and commandRegex\n` +
+        `  These fields are mutually exclusive\n` +
+        `  Fix: Use either commandPrefix OR commandRegex, not both`
+      );
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Transforms a priority number based on the policy tier.
+ * Formula: tier + priority/1000
+ *
+ * @param priority The priority value from the TOML file
+ * @param tier The tier (1=default, 2=user, 3=admin)
+ * @returns The transformed priority
+ */
+function transformPriority(priority: number, tier: number): number {
+  return tier + priority / 1000;
+}
+
+/**
+ * Loads and parses policies from TOML files in the specified directories.
+ *
+ * This function:
+ * 1. Scans directories for .toml files
+ * 2. Parses and validates each file
+ * 3. Transforms rules (commandPrefix, arrays, mcpName, priorities)
+ * 4. Filters rules by approval mode
+ * 5. Collects detailed error information for any failures
+ *
+ * @param approvalMode The current approval mode (for filtering rules by mode)
+ * @param policyDirs Array of directory paths to scan for policy files
+ * @param getPolicyTier Function to determine tier (1-3) for a directory
+ * @returns Object containing successfully parsed rules and any errors encountered
+ */
+export async function loadPoliciesFromToml(
+  approvalMode: ApprovalMode,
+  policyDirs: string[],
+  getPolicyTier: (dir: string) => number,
+): Promise<PolicyLoadResult> {
+  const rules: PolicyRule[] = [];
+  const errors: PolicyFileError[] = [];
+
+  for (const dir of policyDirs) {
+    const tier = getPolicyTier(dir);
+    const tierName = getTierName(tier);
+
+    // Scan directory for all .toml files
+    let filesToLoad: string[];
+    try {
+      const dirEntries = await fs.readdir(dir, { withFileTypes: true });
+      filesToLoad = dirEntries
+        .filter((entry) => entry.isFile() && entry.name.endsWith('.toml'))
+        .map((entry) => entry.name);
+    } catch (e) {
+      const error = e as NodeJS.ErrnoException;
+      if (error.code === 'ENOENT') {
+        // Directory doesn't exist, skip it (not an error)
+        continue;
+      }
+      errors.push({
+        filePath: dir,
+        fileName: path.basename(dir),
+        tier: tierName,
+        errorType: 'file_read',
+        message: `Failed to read policy directory`,
+        details: error.message,
+      });
+      continue;
+    }
+
+    for (const file of filesToLoad) {
+      const filePath = path.join(dir, file);
+
+      try {
+        // Read file
+        const fileContent = await fs.readFile(filePath, 'utf-8');
+
+        // Parse TOML
+        let parsed: unknown;
+        try {
+          parsed = toml.parse(fileContent);
+        } catch (e) {
+          const error = e as Error;
+          errors.push({
+            filePath,
+            fileName: file,
+            tier: tierName,
+            errorType: 'toml_parse',
+            message: 'TOML parsing failed',
+            details: error.message,
+            suggestion:
+              'Check for syntax errors like missing quotes, brackets, or commas',
+          });
+          continue;
+        }
+
+        // Validate schema
+        const validationResult = PolicyFileSchema.safeParse(parsed);
+        if (!validationResult.success) {
+          errors.push({
+            filePath,
+            fileName: file,
+            tier: tierName,
+            errorType: 'schema_validation',
+            message: 'Schema validation failed',
+            details: formatSchemaError(validationResult.error, 0),
+            suggestion:
+              'Ensure all required fields (decision, priority) are present with correct types',
+          });
+          continue;
+        }
+
+        // Validate shell command convenience syntax
+        for (let i = 0; i < validationResult.data.rule.length; i++) {
+          const rule = validationResult.data.rule[i];
+          const validationError = validateShellCommandSyntax(rule, i);
+          if (validationError) {
+            errors.push({
+              filePath,
+              fileName: file,
+              tier: tierName,
+              ruleIndex: i,
+              errorType: 'rule_validation',
+              message: 'Invalid shell command syntax',
+              details: validationError,
+            });
+            // Continue to next rule, don't skip the entire file
+          }
+        }
+
+        // Transform rules
+        const parsedRules: PolicyRule[] = validationResult.data.rule
+          .filter((rule) => {
+            // Filter by mode
+            if (!rule.modes || rule.modes.length === 0) {
+              return true;
+            }
+            return rule.modes.includes(approvalMode);
+          })
+          .flatMap((rule) => {
+            // Transform commandPrefix/commandRegex to argsPattern
+            let effectiveArgsPattern = rule.argsPattern;
+            const commandPrefixes: string[] = [];
+
+            if (rule.commandPrefix) {
+              const prefixes = Array.isArray(rule.commandPrefix)
+                ? rule.commandPrefix
+                : [rule.commandPrefix];
+              commandPrefixes.push(...prefixes);
+            } else if (rule.commandRegex) {
+              effectiveArgsPattern = `"command":"${rule.commandRegex}`;
+            }
+
+            // Expand command prefixes to multiple patterns
+            const argsPatterns: Array<string | undefined> =
+              commandPrefixes.length > 0
+                ? commandPrefixes.map(
+                    (prefix) => `"command":"${escapeRegex(prefix)}`,
+                  )
+                : [effectiveArgsPattern];
+
+            // For each argsPattern, expand toolName arrays
+            return argsPatterns.flatMap((argsPattern) => {
+              const toolNames: Array<string | undefined> = rule.toolName
+                ? Array.isArray(rule.toolName)
+                  ? rule.toolName
+                  : [rule.toolName]
+                : [undefined];
+
+              // Create a policy rule for each tool name
+              return toolNames.map((toolName) => {
+                // Transform mcpName field to composite toolName format
+                let effectiveToolName: string | undefined;
+                if (rule.mcpName && toolName) {
+                  effectiveToolName = `${rule.mcpName}__${toolName}`;
+                } else if (rule.mcpName) {
+                  effectiveToolName = `${rule.mcpName}__*`;
+                } else {
+                  effectiveToolName = toolName;
+                }
+
+                const policyRule: PolicyRule = {
+                  toolName: effectiveToolName,
+                  decision: rule.decision,
+                  priority: transformPriority(rule.priority, tier),
+                };
+
+                // Compile regex pattern
+                if (argsPattern) {
+                  try {
+                    policyRule.argsPattern = new RegExp(argsPattern);
+                  } catch (e) {
+                    const error = e as Error;
+                    errors.push({
+                      filePath,
+                      fileName: file,
+                      tier: tierName,
+                      errorType: 'regex_compilation',
+                      message: 'Invalid regex pattern',
+                      details: `Pattern: ${argsPattern}\nError: ${error.message}`,
+                      suggestion:
+                        'Check regex syntax for errors like unmatched brackets or invalid escape sequences',
+                    });
+                    // Skip this rule if regex compilation fails
+                    return null;
+                  }
+                }
+
+                return policyRule;
+              });
+            });
+          })
+          .filter((rule): rule is PolicyRule => rule !== null);
+
+        rules.push(...parsedRules);
+      } catch (e) {
+        const error = e as NodeJS.ErrnoException;
+        // Catch-all for unexpected errors
+        if (error.code !== 'ENOENT') {
+          errors.push({
+            filePath,
+            fileName: file,
+            tier: tierName,
+            errorType: 'file_read',
+            message: 'Failed to read policy file',
+            details: error.message,
+          });
+        }
+      }
+    }
+  }
+
+  return { rules, errors };
+}
diff --git a/packages/cli/src/config/policy.test.ts b/packages/cli/src/config/policy.test.ts
index f6c442a9e6..8589165750 100644
--- a/packages/cli/src/config/policy.test.ts
+++ b/packages/cli/src/config/policy.test.ts
@@ -4,8 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { describe, it, expect } from 'vitest';
-import { createPolicyEngineConfig } from './policy.js';
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import nodePath from 'node:path';
+
 import type { Settings } from './settings.js';
 import {
   ApprovalMode,
@@ -13,129 +14,191 @@ import {
   WEB_FETCH_TOOL_NAME,
 } from '@google/gemini-cli-core';
 
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
 describe('createPolicyEngineConfig', () => {
-  it('should return ASK_USER for write tools and ALLOW for read-only tools by default', () => {
+  it('should return ASK_USER for write tools and ALLOW for read-only tools by default', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          // Return empty array for user policies
+          return [] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readdir: mockReaddir },
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
     const settings: Settings = {};
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     expect(config.defaultDecision).toBe(PolicyDecision.ASK_USER);
     // The order of the rules is not guaranteed, so we sort them by tool name.
     config.rules?.sort((a, b) =>
       (a.toolName ?? '').localeCompare(b.toolName ?? ''),
     );
+    // Default policies are transformed to tier 1: 1 + priority/1000
     expect(config.rules).toEqual([
       {
         toolName: 'glob',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05, // 1 + 50/1000
       },
       {
         toolName: 'google_web_search',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05,
       },
       {
         toolName: 'list_directory',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05,
       },
       {
         toolName: 'read_file',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05,
       },
       {
         toolName: 'read_many_files',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05,
       },
       {
         toolName: 'replace',
         decision: PolicyDecision.ASK_USER,
-        priority: 10,
+        priority: 1.01, // 1 + 10/1000
       },
       {
         toolName: 'run_shell_command',
         decision: PolicyDecision.ASK_USER,
-        priority: 10,
+        priority: 1.01,
       },
       {
         toolName: 'save_memory',
         decision: PolicyDecision.ASK_USER,
-        priority: 10,
+        priority: 1.01,
       },
       {
         toolName: 'search_file_content',
         decision: PolicyDecision.ALLOW,
-        priority: 50,
+        priority: 1.05,
       },
       {
         toolName: 'web_fetch',
         decision: PolicyDecision.ASK_USER,
-        priority: 10,
+        priority: 1.01,
       },
       {
         toolName: 'write_file',
         decision: PolicyDecision.ASK_USER,
-        priority: 10,
+        priority: 1.01,
       },
     ]);
+
+    vi.doUnmock('node:fs/promises');
   });
 
-  it('should allow tools in tools.allowed', () => {
+  it('should allow tools in tools.allowed', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       tools: { allowed: ['run_shell_command'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     const rule = config.rules?.find(
       (r) =>
         r.toolName === 'run_shell_command' &&
         r.decision === PolicyDecision.ALLOW,
     );
     expect(rule).toBeDefined();
-    expect(rule?.priority).toBe(100);
+    expect(rule?.priority).toBeCloseTo(2.3, 5); // Command line allow
   });
 
-  it('should deny tools in tools.exclude', () => {
+  it('should deny tools in tools.exclude', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       tools: { exclude: ['run_shell_command'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     const rule = config.rules?.find(
       (r) =>
         r.toolName === 'run_shell_command' &&
         r.decision === PolicyDecision.DENY,
     );
     expect(rule).toBeDefined();
-    expect(rule?.priority).toBe(200);
+    expect(rule?.priority).toBeCloseTo(2.4, 5); // Command line exclude
   });
 
-  it('should allow tools from allowed MCP servers', () => {
+  it('should allow tools from allowed MCP servers', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcp: { allowed: ['my-server'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     const rule = config.rules?.find(
       (r) =>
         r.toolName === 'my-server__*' && r.decision === PolicyDecision.ALLOW,
     );
     expect(rule).toBeDefined();
-    expect(rule?.priority).toBe(85);
+    expect(rule?.priority).toBe(2.1); // MCP allowed server
   });
 
-  it('should deny tools from excluded MCP servers', () => {
+  it('should deny tools from excluded MCP servers', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcp: { excluded: ['my-server'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     const rule = config.rules?.find(
       (r) =>
         r.toolName === 'my-server__*' && r.decision === PolicyDecision.DENY,
     );
     expect(rule).toBeDefined();
-    expect(rule?.priority).toBe(195);
+    expect(rule?.priority).toBe(2.9); // MCP excluded server
   });
 
-  it('should allow tools from trusted MCP servers', () => {
+  it('should allow tools from trusted MCP servers', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcpServers: {
         'trusted-server': {
@@ -150,7 +213,10 @@ describe('createPolicyEngineConfig', () => {
         },
       },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     const trustedRule = config.rules?.find(
       (r) =>
@@ -158,7 +224,7 @@ describe('createPolicyEngineConfig', () => {
         r.decision === PolicyDecision.ALLOW,
     );
     expect(trustedRule).toBeDefined();
-    expect(trustedRule?.priority).toBe(90);
+    expect(trustedRule?.priority).toBe(2.2); // MCP trusted server
 
     // Untrusted server should not have an allow rule
     const untrustedRule = config.rules?.find(
@@ -169,7 +235,8 @@ describe('createPolicyEngineConfig', () => {
     expect(untrustedRule).toBeUndefined();
   });
 
-  it('should handle multiple MCP server configurations together', () => {
+  it('should handle multiple MCP server configurations together', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcp: {
         allowed: ['allowed-server'],
@@ -183,7 +250,10 @@ describe('createPolicyEngineConfig', () => {
         },
       },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     // Check allowed server
     const allowedRule = config.rules?.find(
@@ -192,7 +262,7 @@ describe('createPolicyEngineConfig', () => {
         r.decision === PolicyDecision.ALLOW,
     );
     expect(allowedRule).toBeDefined();
-    expect(allowedRule?.priority).toBe(85);
+    expect(allowedRule?.priority).toBe(2.1); // MCP allowed server
 
     // Check trusted server
     const trustedRule = config.rules?.find(
@@ -201,7 +271,7 @@ describe('createPolicyEngineConfig', () => {
         r.decision === PolicyDecision.ALLOW,
     );
     expect(trustedRule).toBeDefined();
-    expect(trustedRule?.priority).toBe(90);
+    expect(trustedRule?.priority).toBe(2.2); // MCP trusted server
 
     // Check excluded server
     const excludedRule = config.rules?.find(
@@ -210,33 +280,45 @@ describe('createPolicyEngineConfig', () => {
         r.decision === PolicyDecision.DENY,
     );
     expect(excludedRule).toBeDefined();
-    expect(excludedRule?.priority).toBe(195);
+    expect(excludedRule?.priority).toBe(2.9); // MCP excluded server
   });
 
-  it('should allow all tools in YOLO mode', () => {
+  it('should allow all tools in YOLO mode', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {};
-    const config = createPolicyEngineConfig(settings, ApprovalMode.YOLO);
+    const config = await createPolicyEngineConfig(settings, ApprovalMode.YOLO);
     const rule = config.rules?.find(
-      (r) => r.decision === PolicyDecision.ALLOW && r.priority === 0,
+      (r) => r.decision === PolicyDecision.ALLOW && !r.toolName,
     );
     expect(rule).toBeDefined();
+    // Priority 999 in default tier → 1.999
+    expect(rule?.priority).toBeCloseTo(1.999, 5);
   });
 
-  it('should allow edit tool in AUTO_EDIT mode', () => {
+  it('should allow edit tool in AUTO_EDIT mode', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {};
-    const config = createPolicyEngineConfig(settings, ApprovalMode.AUTO_EDIT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.AUTO_EDIT,
+    );
     const rule = config.rules?.find(
       (r) => r.toolName === 'replace' && r.decision === PolicyDecision.ALLOW,
     );
     expect(rule).toBeDefined();
-    expect(rule?.priority).toBe(15);
+    // Priority 15 in default tier → 1.015
+    expect(rule?.priority).toBeCloseTo(1.015, 5);
   });
 
-  it('should prioritize exclude over allow', () => {
+  it('should prioritize exclude over allow', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       tools: { allowed: ['run_shell_command'], exclude: ['run_shell_command'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
     const denyRule = config.rules?.find(
       (r) =>
         r.toolName === 'run_shell_command' &&
@@ -252,12 +334,16 @@ describe('createPolicyEngineConfig', () => {
     expect(denyRule!.priority).toBeGreaterThan(allowRule!.priority!);
   });
 
-  it('should prioritize specific tool allows over MCP server excludes', () => {
+  it('should prioritize specific tool allows over MCP server excludes', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcp: { excluded: ['my-server'] },
       tools: { allowed: ['my-server__specific-tool'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     const serverDenyRule = config.rules?.find(
       (r) =>
@@ -270,15 +356,16 @@ describe('createPolicyEngineConfig', () => {
     );
 
     expect(serverDenyRule).toBeDefined();
-    expect(serverDenyRule?.priority).toBe(195);
+    expect(serverDenyRule?.priority).toBe(2.9); // MCP excluded server
     expect(toolAllowRule).toBeDefined();
-    expect(toolAllowRule?.priority).toBe(100);
+    expect(toolAllowRule?.priority).toBeCloseTo(2.3, 5); // Command line allow
 
-    // Tool allow (100) has lower priority than server deny (195),
-    // so server deny wins - this might be counterintuitive
+    // Server deny (2.9) has higher priority than tool allow (2.3),
+    // so server deny wins (this is expected behavior - server-level blocks are security critical)
   });
 
-  it('should prioritize specific tool excludes over MCP server allows', () => {
+  it('should handle MCP server allows and tool excludes', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcp: { allowed: ['my-server'] },
       mcpServers: {
@@ -290,7 +377,10 @@ describe('createPolicyEngineConfig', () => {
       },
       tools: { exclude: ['my-server__dangerous-tool'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     const serverAllowRule = config.rules?.find(
       (r) =>
@@ -304,19 +394,22 @@ describe('createPolicyEngineConfig', () => {
 
     expect(serverAllowRule).toBeDefined();
     expect(toolDenyRule).toBeDefined();
+    // Command line exclude (2.4) has higher priority than MCP server trust (2.2)
+    // This is the correct behavior - specific exclusions should beat general server trust
     expect(toolDenyRule!.priority).toBeGreaterThan(serverAllowRule!.priority!);
   });
 
-  it('should handle complex priority scenarios correctly', () => {
+  it('should handle complex priority scenarios correctly', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       tools: {
-        autoAccept: true, // Priority 50 for read-only tools
-        allowed: ['my-server__tool1', 'other-tool'], // Priority 100
-        exclude: ['my-server__tool2', 'glob'], // Priority 200
+        autoAccept: true, // Not used in policy system (modes handle this)
+        allowed: ['my-server__tool1', 'other-tool'], // Priority 2.3
+        exclude: ['my-server__tool2', 'glob'], // Priority 2.4
       },
       mcp: {
-        allowed: ['allowed-server'], // Priority 85
-        excluded: ['excluded-server'], // Priority 195
+        allowed: ['allowed-server'], // Priority 2.1
+        excluded: ['excluded-server'], // Priority 2.9
       },
       mcpServers: {
         'trusted-server': {
@@ -326,7 +419,10 @@ describe('createPolicyEngineConfig', () => {
         },
       },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     // Verify glob is denied even though autoAccept would allow it
     const globDenyRule = config.rules?.find(
@@ -337,8 +433,10 @@ describe('createPolicyEngineConfig', () => {
     );
     expect(globDenyRule).toBeDefined();
     expect(globAllowRule).toBeDefined();
-    expect(globDenyRule!.priority).toBe(200);
-    expect(globAllowRule!.priority).toBe(50);
+    // Deny from settings (user tier)
+    expect(globDenyRule!.priority).toBeCloseTo(2.4, 5); // Command line exclude
+    // Allow from default TOML: 1 + 50/1000 = 1.05
+    expect(globAllowRule!.priority).toBeCloseTo(1.05, 5);
 
     // Verify all priority levels are correct
     const priorities = config.rules
@@ -349,16 +447,17 @@ describe('createPolicyEngineConfig', () => {
       }))
       .sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
 
-    // Check that the highest priority items are the excludes
+    // Check that the highest priority items are the excludes (user tier: 2.4)
     const highestPriorityExcludes = priorities?.filter(
-      (p) => p.priority === 200,
+      (p) => Math.abs(p.priority! - 2.4) < 0.01,
     );
     expect(
       highestPriorityExcludes?.every((p) => p.decision === PolicyDecision.DENY),
     ).toBe(true);
   });
 
-  it('should handle MCP servers with undefined trust property', () => {
+  it('should handle MCP servers with undefined trust property', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcpServers: {
         'no-trust-property': {
@@ -373,7 +472,10 @@ describe('createPolicyEngineConfig', () => {
         },
       },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     // Neither server should have an allow rule
     const noTrustRule = config.rules?.find(
@@ -391,20 +493,22 @@ describe('createPolicyEngineConfig', () => {
     expect(explicitFalseRule).toBeUndefined();
   });
 
-  it('should not add write tool rules in YOLO mode', () => {
+  it('should have YOLO allow-all rule beat write tool rules in YOLO mode', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       tools: { exclude: ['dangerous-tool'] },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.YOLO);
+    const config = await createPolicyEngineConfig(settings, ApprovalMode.YOLO);
 
-    // Should have the wildcard allow rule with priority 0
+    // Should have the wildcard allow rule
     const wildcardRule = config.rules?.find(
-      (r) =>
-        !r.toolName && r.decision === PolicyDecision.ALLOW && r.priority === 0,
+      (r) => !r.toolName && r.decision === PolicyDecision.ALLOW,
     );
     expect(wildcardRule).toBeDefined();
+    // Priority 999 in default tier → 1.999
+    expect(wildcardRule?.priority).toBeCloseTo(1.999, 5);
 
-    // Should NOT have any write tool rules (which would have priority 10)
+    // Write tool ASK_USER rules are present (no modes restriction now)
     const writeToolRules = config.rules?.filter(
       (r) =>
         [
@@ -415,18 +519,24 @@ describe('createPolicyEngineConfig', () => {
           WEB_FETCH_TOOL_NAME,
         ].includes(r.toolName || '') && r.decision === PolicyDecision.ASK_USER,
     );
-    expect(writeToolRules).toHaveLength(0);
+    expect(writeToolRules).toBeDefined();
 
-    // Should still have the exclude rule
+    // But YOLO allow-all rule has higher priority than all write tool rules
+    writeToolRules?.forEach((writeRule) => {
+      expect(wildcardRule!.priority).toBeGreaterThan(writeRule.priority!);
+    });
+
+    // Should still have the exclude rule (from settings, user tier)
     const excludeRule = config.rules?.find(
       (r) =>
         r.toolName === 'dangerous-tool' && r.decision === PolicyDecision.DENY,
     );
     expect(excludeRule).toBeDefined();
-    expect(excludeRule?.priority).toBe(200);
+    expect(excludeRule?.priority).toBeCloseTo(2.4, 5); // Command line exclude
   });
 
-  it('should handle combination of trusted server and excluded server for same name', () => {
+  it('should handle combination of trusted server and excluded server for same name', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {
       mcpServers: {
         'conflicted-server': {
@@ -439,7 +549,10 @@ describe('createPolicyEngineConfig', () => {
         excluded: ['conflicted-server'], // Priority 195
       },
     };
-    const config = createPolicyEngineConfig(settings, ApprovalMode.DEFAULT);
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
 
     // Both rules should exist
     const trustRule = config.rules?.find(
@@ -454,18 +567,19 @@ describe('createPolicyEngineConfig', () => {
     );
 
     expect(trustRule).toBeDefined();
-    expect(trustRule?.priority).toBe(90);
+    expect(trustRule?.priority).toBe(2.2); // MCP trusted server
     expect(excludeRule).toBeDefined();
-    expect(excludeRule?.priority).toBe(195);
+    expect(excludeRule?.priority).toBe(2.9); // MCP excluded server
 
     // Exclude (195) should win over trust (90) when evaluated
   });
 
-  it('should handle all approval modes correctly', () => {
+  it('should handle all approval modes correctly', async () => {
+    const { createPolicyEngineConfig } = await import('./policy.js');
     const settings: Settings = {};
 
     // Test DEFAULT mode
-    const defaultConfig = createPolicyEngineConfig(
+    const defaultConfig = await createPolicyEngineConfig(
       settings,
       ApprovalMode.DEFAULT,
     );
@@ -477,16 +591,20 @@ describe('createPolicyEngineConfig', () => {
     ).toBeUndefined();
 
     // Test YOLO mode
-    const yoloConfig = createPolicyEngineConfig(settings, ApprovalMode.YOLO);
+    const yoloConfig = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.YOLO,
+    );
     expect(yoloConfig.defaultDecision).toBe(PolicyDecision.ASK_USER);
     const yoloWildcard = yoloConfig.rules?.find(
       (r) => !r.toolName && r.decision === PolicyDecision.ALLOW,
     );
     expect(yoloWildcard).toBeDefined();
-    expect(yoloWildcard?.priority).toBe(0);
+    // Priority 999 in default tier → 1.999
+    expect(yoloWildcard?.priority).toBeCloseTo(1.999, 5);
 
     // Test AUTO_EDIT mode
-    const autoEditConfig = createPolicyEngineConfig(
+    const autoEditConfig = await createPolicyEngineConfig(
       settings,
       ApprovalMode.AUTO_EDIT,
     );
@@ -495,6 +613,1044 @@ describe('createPolicyEngineConfig', () => {
       (r) => r.toolName === 'replace' && r.decision === PolicyDecision.ALLOW,
     );
     expect(editRule).toBeDefined();
-    expect(editRule?.priority).toBe(15);
+    // Priority 15 in default tier → 1.015
+    expect(editRule?.priority).toBeCloseTo(1.015, 5);
+  });
+
+  it('should support argsPattern in policy rules', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'write.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/write.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+argsPattern = "\\"command\\":\\"git (status|diff|log)\\""
+decision = "allow"
+priority = 150
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+    expect(rule).toBeDefined();
+    // Priority 150 in user tier → 2.150
+    expect(rule?.priority).toBeCloseTo(2.15, 5);
+    expect(rule?.argsPattern).toBeInstanceOf(RegExp);
+    expect(rule?.argsPattern?.test('{"command":"git status"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git diff"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git log"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git commit"}')).toBe(false);
+    expect(rule?.argsPattern?.test('{"command":"git push"}')).toBe(false);
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should load and apply user-defined policies', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'write.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/write.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+decision = "allow"
+priority = 150
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+    expect(rule).toBeDefined();
+    // Priority 150 in user tier → 2.150
+    expect(rule?.priority).toBeCloseTo(2.15, 5);
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should load and apply admin policies over user and default policies', async () => {
+    process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'] = '/tmp/admin/settings.json';
+
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (typeof path === 'string') {
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('/tmp/admin/policies'))
+          ) {
+            return [
+              {
+                name: 'write.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('.gemini/policies'))
+          ) {
+            return [
+              {
+                name: 'write.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          (nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('/tmp/admin/policies/write.toml')) ||
+            path.endsWith('tmp/admin/policies/write.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+decision = "deny"
+priority = 200
+`;
+        }
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/write.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+decision = "allow"
+priority = 150
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const denyRule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.DENY,
+    );
+    const allowRule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+
+    expect(denyRule).toBeDefined();
+    // Priority 200 in admin tier → 3.200
+    expect(denyRule?.priority).toBeCloseTo(3.2, 5);
+    expect(allowRule).toBeDefined();
+    // Priority 150 in user tier → 2.150
+    expect(allowRule?.priority).toBeCloseTo(2.15, 5);
+    expect(denyRule!.priority).toBeGreaterThan(allowRule!.priority!);
+
+    delete process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'];
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should apply priority bands to ensure Admin > User > Default hierarchy', async () => {
+    process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'] = '/tmp/admin/settings.json';
+
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (typeof path === 'string') {
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('/tmp/admin/policies'))
+          ) {
+            return [
+              {
+                name: 'admin-policy.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('.gemini/policies'))
+          ) {
+            return [
+              {
+                name: 'user-policy.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (typeof path === 'string') {
+          // Admin policy with low priority (100)
+          if (
+            nodePath
+              .normalize(path)
+              .includes(
+                nodePath.normalize('/tmp/admin/policies/admin-policy.toml'),
+              )
+          ) {
+            return `
+[[rule]]
+toolName = "run_shell_command"
+decision = "deny"
+priority = 100
+`;
+          }
+          // User policy with high priority (900)
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('.gemini/policies/user-policy.toml'))
+          ) {
+            return `
+[[rule]]
+toolName = "run_shell_command"
+decision = "allow"
+priority = 900
+`;
+          }
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const adminRule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.DENY,
+    );
+    const userRule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+
+    expect(adminRule).toBeDefined();
+    expect(userRule).toBeDefined();
+
+    // Admin priority should be 3.100 (tier 3 + 100/1000)
+    expect(adminRule?.priority).toBeCloseTo(3.1, 5);
+    // User priority should be 2.900 (tier 2 + 900/1000)
+    expect(userRule?.priority).toBeCloseTo(2.9, 5);
+
+    // Admin rule with low priority should still beat user rule with high priority
+    expect(adminRule!.priority).toBeGreaterThan(userRule!.priority!);
+
+    delete process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'];
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should apply correct priority transformations for each tier', async () => {
+    process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'] = '/tmp/admin/settings.json';
+
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (typeof path === 'string') {
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('/tmp/admin/policies'))
+          ) {
+            return [
+              {
+                name: 'admin.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('.gemini/policies'))
+          ) {
+            return [
+              {
+                name: 'user.toml',
+                isFile: () => true,
+                isDirectory: () => false,
+              },
+            ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+          }
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (typeof path === 'string') {
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('/tmp/admin/policies/admin.toml'))
+          ) {
+            return `
+[[rule]]
+toolName = "admin-tool"
+decision = "allow"
+priority = 500
+`;
+          }
+          if (
+            nodePath
+              .normalize(path)
+              .includes(nodePath.normalize('.gemini/policies/user.toml'))
+          ) {
+            return `
+[[rule]]
+toolName = "user-tool"
+decision = "allow"
+priority = 500
+`;
+          }
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const adminRule = config.rules?.find((r) => r.toolName === 'admin-tool');
+    const userRule = config.rules?.find((r) => r.toolName === 'user-tool');
+
+    expect(adminRule).toBeDefined();
+    expect(userRule).toBeDefined();
+
+    // Priority 500 in admin tier → 3.500
+    expect(adminRule?.priority).toBeCloseTo(3.5, 5);
+    // Priority 500 in user tier → 2.500
+    expect(userRule?.priority).toBeCloseTo(2.5, 5);
+
+    delete process.env['GEMINI_CLI_SYSTEM_SETTINGS_PATH'];
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should support array syntax for toolName in TOML policies', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'array-test.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/array-test.toml'))
+        ) {
+          return `
+# Test array syntax for toolName
+[[rule]]
+toolName = ["tool1", "tool2", "tool3"]
+decision = "allow"
+priority = 100
+
+# Test array syntax with mcpName
+[[rule]]
+mcpName = "google-workspace"
+toolName = ["calendar.findFreeTime", "calendar.getEvent", "calendar.list"]
+decision = "allow"
+priority = 150
+`;
+        }
+        return actualFs.readFile(
+          path,
+          options as Parameters<typeof actualFs.readFile>[1],
+        );
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    // Should create separate rules for each tool in the array
+    const tool1Rule = config.rules?.find((r) => r.toolName === 'tool1');
+    const tool2Rule = config.rules?.find((r) => r.toolName === 'tool2');
+    const tool3Rule = config.rules?.find((r) => r.toolName === 'tool3');
+
+    expect(tool1Rule).toBeDefined();
+    expect(tool2Rule).toBeDefined();
+    expect(tool3Rule).toBeDefined();
+
+    // All should have the same decision and priority
+    expect(tool1Rule?.decision).toBe(PolicyDecision.ALLOW);
+    expect(tool2Rule?.decision).toBe(PolicyDecision.ALLOW);
+    expect(tool3Rule?.decision).toBe(PolicyDecision.ALLOW);
+
+    // Priority 100 in user tier → 2.100
+    expect(tool1Rule?.priority).toBeCloseTo(2.1, 5);
+    expect(tool2Rule?.priority).toBeCloseTo(2.1, 5);
+    expect(tool3Rule?.priority).toBeCloseTo(2.1, 5);
+
+    // MCP tools should have composite names
+    const calendarFreeTime = config.rules?.find(
+      (r) => r.toolName === 'google-workspace__calendar.findFreeTime',
+    );
+    const calendarGetEvent = config.rules?.find(
+      (r) => r.toolName === 'google-workspace__calendar.getEvent',
+    );
+    const calendarList = config.rules?.find(
+      (r) => r.toolName === 'google-workspace__calendar.list',
+    );
+
+    expect(calendarFreeTime).toBeDefined();
+    expect(calendarGetEvent).toBeDefined();
+    expect(calendarList).toBeDefined();
+
+    // All should have the same decision and priority
+    expect(calendarFreeTime?.decision).toBe(PolicyDecision.ALLOW);
+    expect(calendarGetEvent?.decision).toBe(PolicyDecision.ALLOW);
+    expect(calendarList?.decision).toBe(PolicyDecision.ALLOW);
+
+    // Priority 150 in user tier → 2.150
+    expect(calendarFreeTime?.priority).toBeCloseTo(2.15, 5);
+    expect(calendarGetEvent?.priority).toBeCloseTo(2.15, 5);
+    expect(calendarList?.priority).toBeCloseTo(2.15, 5);
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should support commandPrefix syntax for shell commands', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'shell.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = "git status"
+decision = "allow"
+priority = 100
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+    expect(rule).toBeDefined();
+    expect(rule?.priority).toBeCloseTo(2.1, 5);
+    expect(rule?.argsPattern).toBeInstanceOf(RegExp);
+    // Should match commands starting with "git status"
+    expect(rule?.argsPattern?.test('{"command":"git status"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git status --short"}')).toBe(
+      true,
+    );
+    // Should not match other commands
+    expect(rule?.argsPattern?.test('{"command":"git branch"}')).toBe(false);
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should support array syntax for commandPrefix', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'shell.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = ["git status", "git branch", "git log"]
+decision = "allow"
+priority = 100
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rules = config.rules?.filter(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+
+    // Should create 3 rules (one for each prefix)
+    expect(rules?.length).toBe(3);
+
+    // All rules should have the same priority and decision
+    rules?.forEach((rule) => {
+      expect(rule.priority).toBeCloseTo(2.1, 5);
+      expect(rule.decision).toBe(PolicyDecision.ALLOW);
+    });
+
+    // Test that each prefix pattern works
+    const patterns = rules?.map((r) => r.argsPattern);
+    expect(patterns?.some((p) => p?.test('{"command":"git status"}'))).toBe(
+      true,
+    );
+    expect(patterns?.some((p) => p?.test('{"command":"git branch"}'))).toBe(
+      true,
+    );
+    expect(patterns?.some((p) => p?.test('{"command":"git log"}'))).toBe(true);
+    // Should not match other commands
+    expect(patterns?.some((p) => p?.test('{"command":"git commit"}'))).toBe(
+      false,
+    );
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should support commandRegex syntax for shell commands', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'shell.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandRegex = "git (status|branch|log).*"
+decision = "allow"
+priority = 100
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+    expect(rule).toBeDefined();
+    expect(rule?.priority).toBeCloseTo(2.1, 5);
+    expect(rule?.argsPattern).toBeInstanceOf(RegExp);
+
+    // Should match commands matching the regex
+    expect(rule?.argsPattern?.test('{"command":"git status"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git status --short"}')).toBe(
+      true,
+    );
+    expect(rule?.argsPattern?.test('{"command":"git branch"}')).toBe(true);
+    expect(rule?.argsPattern?.test('{"command":"git log --all"}')).toBe(true);
+    // Should not match commands not in the regex
+    expect(rule?.argsPattern?.test('{"command":"git commit"}')).toBe(false);
+    expect(rule?.argsPattern?.test('{"command":"git push"}')).toBe(false);
+
+    vi.doUnmock('node:fs/promises');
+  });
+
+  it('should escape regex special characters in commandPrefix', async () => {
+    const actualFs =
+      await vi.importActual<typeof import('node:fs/promises')>(
+        'node:fs/promises',
+      );
+
+    const mockReaddir = vi.fn(
+      async (
+        path: string | Buffer | URL,
+        options?: Parameters<typeof actualFs.readdir>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies'))
+        ) {
+          return [
+            {
+              name: 'shell.toml',
+              isFile: () => true,
+              isDirectory: () => false,
+            },
+          ] as unknown as Awaited<ReturnType<typeof actualFs.readdir>>;
+        }
+        return actualFs.readdir(
+          path,
+          options as Parameters<typeof actualFs.readdir>[1],
+        );
+      },
+    );
+
+    const mockReadFile = vi.fn(
+      async (
+        path: Parameters<typeof actualFs.readFile>[0],
+        options: Parameters<typeof actualFs.readFile>[1],
+      ) => {
+        if (
+          typeof path === 'string' &&
+          nodePath
+            .normalize(path)
+            .includes(nodePath.normalize('.gemini/policies/shell.toml'))
+        ) {
+          return `
+[[rule]]
+toolName = "run_shell_command"
+commandPrefix = "git log *.txt"
+decision = "allow"
+priority = 100
+`;
+        }
+        return actualFs.readFile(path, options);
+      },
+    );
+
+    vi.doMock('node:fs/promises', () => ({
+      ...actualFs,
+      default: { ...actualFs, readFile: mockReadFile, readdir: mockReaddir },
+      readFile: mockReadFile,
+      readdir: mockReaddir,
+    }));
+
+    vi.resetModules();
+    const { createPolicyEngineConfig } = await import('./policy.js');
+
+    const settings: Settings = {};
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+    );
+
+    const rule = config.rules?.find(
+      (r) =>
+        r.toolName === 'run_shell_command' &&
+        r.decision === PolicyDecision.ALLOW,
+    );
+    expect(rule).toBeDefined();
+    // Should match the literal string "git log *.txt" (asterisk is escaped)
+    expect(rule?.argsPattern?.test('{"command":"git log *.txt"}')).toBe(true);
+    // Should not match "git log a.txt" because * is escaped to literal asterisk
+    expect(rule?.argsPattern?.test('{"command":"git log a.txt"}')).toBe(false);
+
+    vi.doUnmock('node:fs/promises');
   });
 });
diff --git a/packages/cli/src/config/policy.ts b/packages/cli/src/config/policy.ts
index 0ebe8f06e0..7714780c47 100644
--- a/packages/cli/src/config/policy.ts
+++ b/packages/cli/src/config/policy.ts
@@ -8,88 +8,172 @@ import {
   type PolicyEngineConfig,
   PolicyDecision,
   type PolicyRule,
-  ApprovalMode,
-  // Read-only tools
-  GREP_TOOL_NAME,
-  LS_TOOL_NAME,
-  READ_MANY_FILES_TOOL_NAME,
-  READ_FILE_TOOL_NAME,
-  // Write tools
-  SHELL_TOOL_NAME,
-  WRITE_FILE_TOOL_NAME,
-  WEB_FETCH_TOOL_NAME,
-  GLOB_TOOL_NAME,
-  EDIT_TOOL_NAME,
-  MEMORY_TOOL_NAME,
-  WEB_SEARCH_TOOL_NAME,
+  type ApprovalMode,
   type PolicyEngine,
   type MessageBus,
   MessageBusType,
   type UpdatePolicy,
+  Storage,
 } from '@google/gemini-cli-core';
-import { type Settings } from './settings.js';
+import { type Settings, getSystemSettingsPath } from './settings.js';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import {
+  loadPoliciesFromToml,
+  type PolicyFileError,
+} from './policy-toml-loader.js';
 
-// READ_ONLY_TOOLS is a list of built-in tools that do not modify the user's
-// files or system state.
-const READ_ONLY_TOOLS = new Set([
-  GLOB_TOOL_NAME,
-  GREP_TOOL_NAME,
-  LS_TOOL_NAME,
-  READ_FILE_TOOL_NAME,
-  READ_MANY_FILES_TOOL_NAME,
-  WEB_SEARCH_TOOL_NAME,
-]);
+// Get the directory name of the current module
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
-// WRITE_TOOLS is a list of built-in tools that can modify the user's files or
-// system state. These tools have a shouldConfirmExecute method.
-// We are keeping this here for visibility and to maintain backwards compatibility
-// with the existing tool permissions system. Eventually we'll remove this and
-// any tool that isn't read only will require a confirmation unless altered by
-// config and policy.
-const WRITE_TOOLS = new Set([
-  EDIT_TOOL_NAME,
-  MEMORY_TOOL_NAME,
-  SHELL_TOOL_NAME,
-  WRITE_FILE_TOOL_NAME,
-  WEB_FETCH_TOOL_NAME,
-]);
+// Store policy loading errors to be displayed after UI is ready
+let storedPolicyErrors: string[] = [];
 
-export function createPolicyEngineConfig(
+function getPolicyDirectories(): string[] {
+  const DEFAULT_POLICIES_DIR = path.resolve(__dirname, 'policies');
+  const USER_POLICIES_DIR = Storage.getUserPoliciesDir();
+  const systemSettingsPath = getSystemSettingsPath();
+  const ADMIN_POLICIES_DIR = path.join(
+    path.dirname(systemSettingsPath),
+    'policies',
+  );
+
+  return [
+    DEFAULT_POLICIES_DIR,
+    USER_POLICIES_DIR,
+    ADMIN_POLICIES_DIR,
+  ].reverse();
+}
+
+/**
+ * Determines the policy tier (1=default, 2=user, 3=admin) for a given directory.
+ * This is used by the TOML loader to assign priority bands.
+ */
+function getPolicyTier(dir: string): number {
+  const DEFAULT_POLICIES_DIR = path.resolve(__dirname, 'policies');
+  const USER_POLICIES_DIR = Storage.getUserPoliciesDir();
+  const systemSettingsPath = getSystemSettingsPath();
+  const ADMIN_POLICIES_DIR = path.join(
+    path.dirname(systemSettingsPath),
+    'policies',
+  );
+
+  // Normalize paths for comparison
+  const normalizedDir = path.resolve(dir);
+  const normalizedDefault = path.resolve(DEFAULT_POLICIES_DIR);
+  const normalizedUser = path.resolve(USER_POLICIES_DIR);
+  const normalizedAdmin = path.resolve(ADMIN_POLICIES_DIR);
+
+  if (normalizedDir === normalizedDefault) return 1;
+  if (normalizedDir === normalizedUser) return 2;
+  if (normalizedDir === normalizedAdmin) return 3;
+
+  // Default to tier 1 if unknown
+  return 1;
+}
+
+/**
+ * Formats a policy file error for console logging.
+ */
+function formatPolicyError(error: PolicyFileError): string {
+  const tierLabel = error.tier.toUpperCase();
+  let message = `[${tierLabel}] Policy file error in ${error.fileName}:\n`;
+  message += `  ${error.message}`;
+  if (error.details) {
+    message += `\n${error.details}`;
+  }
+  if (error.suggestion) {
+    message += `\n  Suggestion: ${error.suggestion}`;
+  }
+  return message;
+}
+
+export async function createPolicyEngineConfig(
   settings: Settings,
   approvalMode: ApprovalMode,
-): PolicyEngineConfig {
-  const rules: PolicyRule[] = [];
+): Promise<PolicyEngineConfig> {
+  const policyDirs = getPolicyDirectories();
+
+  // Load policies from TOML files
+  const { rules: tomlRules, errors } = await loadPoliciesFromToml(
+    approvalMode,
+    policyDirs,
+    getPolicyTier,
+  );
+
+  // Store any errors encountered during TOML loading
+  // These will be emitted by getPolicyErrorsForUI() after the UI is ready.
+  if (errors.length > 0) {
+    storedPolicyErrors = errors.map((error) => formatPolicyError(error));
+  }
+
+  const rules: PolicyRule[] = [...tomlRules];
 
   // Priority system for policy rules:
   // - Higher priority numbers win over lower priority numbers
   // - When multiple rules match, the highest priority rule is applied
   // - Rules are evaluated in order of priority (highest first)
   //
-  // Priority levels used in this configuration:
-  //   0: Default allow-all (YOLO mode only)
-  //   10: Write tools default to ASK_USER
-  //   50: Auto-accept read-only tools
-  //   85: MCP servers allowed list
-  //   90: MCP servers with trust=true
-  //   100: Explicitly allowed individual tools
-  //   195: Explicitly excluded MCP servers
-  //   199: Tools that the user has selected as "Always Allow" in the interactive UI.
-  //   200: Explicitly excluded individual tools (highest priority)
+  // Priority bands (tiers):
+  // - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+  // - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+  // - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+  //
+  // This ensures Admin > User > Default hierarchy is always preserved,
+  // while allowing user-specified priorities to work within each tier.
+  //
+  // Settings-based and dynamic rules (all in user tier 2.x):
+  //   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+  //   2.9:  MCP servers excluded list (security: persistent server blocks)
+  //   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+  //   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+  //   2.2:  MCP servers with trust=true (persistent trusted servers)
+  //   2.1:  MCP servers allowed list (persistent general server allows)
+  //
+  // TOML policy priorities (before transformation):
+  //   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+  //   15: Auto-edit tool override (becomes 1.015 in default tier)
+  //   50: Read-only tools (becomes 1.050 in default tier)
+  //   999: YOLO mode allow-all (becomes 1.999 in default tier)
 
-  // MCP servers that are explicitly allowed in settings.mcp.allowed
-  // Priority: 85 (lower than trusted servers)
-  if (settings.mcp?.allowed) {
-    for (const serverName of settings.mcp.allowed) {
+  // MCP servers that are explicitly excluded in settings.mcp.excluded
+  // Priority: 2.9 (highest in user tier for security - persistent server blocks)
+  if (settings.mcp?.excluded) {
+    for (const serverName of settings.mcp.excluded) {
       rules.push({
         toolName: `${serverName}__*`,
+        decision: PolicyDecision.DENY,
+        priority: 2.9,
+      });
+    }
+  }
+
+  // Tools that are explicitly excluded in the settings.
+  // Priority: 2.4 (user tier - explicit temporary blocks)
+  if (settings.tools?.exclude) {
+    for (const tool of settings.tools.exclude) {
+      rules.push({
+        toolName: tool,
+        decision: PolicyDecision.DENY,
+        priority: 2.4,
+      });
+    }
+  }
+
+  // Tools that are explicitly allowed in the settings.
+  // Priority: 2.3 (user tier - explicit temporary allows)
+  if (settings.tools?.allowed) {
+    for (const tool of settings.tools.allowed) {
+      rules.push({
+        toolName: tool,
         decision: PolicyDecision.ALLOW,
-        priority: 85,
+        priority: 2.3,
       });
     }
   }
 
   // MCP servers that are trusted in the settings.
-  // Priority: 90 (higher than general allowed servers but lower than explicit tool allows)
+  // Priority: 2.2 (user tier - persistent trusted servers)
   if (settings.mcpServers) {
     for (const [serverName, serverConfig] of Object.entries(
       settings.mcpServers,
@@ -100,83 +184,24 @@ export function createPolicyEngineConfig(
         rules.push({
           toolName: `${serverName}__*`,
           decision: PolicyDecision.ALLOW,
-          priority: 90,
+          priority: 2.2,
         });
       }
     }
   }
 
-  // Tools that are explicitly allowed in the settings.
-  // Priority: 100
-  if (settings.tools?.allowed) {
-    for (const tool of settings.tools.allowed) {
-      rules.push({
-        toolName: tool,
-        decision: PolicyDecision.ALLOW,
-        priority: 100,
-      });
-    }
-  }
-
-  // Tools that are explicitly excluded in the settings.
-  // Priority: 200
-  if (settings.tools?.exclude) {
-    for (const tool of settings.tools.exclude) {
-      rules.push({
-        toolName: tool,
-        decision: PolicyDecision.DENY,
-        priority: 200,
-      });
-    }
-  }
-
-  // MCP servers that are explicitly excluded in settings.mcp.excluded
-  // Priority: 195 (high priority to block servers)
-  if (settings.mcp?.excluded) {
-    for (const serverName of settings.mcp.excluded) {
+  // MCP servers that are explicitly allowed in settings.mcp.allowed
+  // Priority: 2.1 (user tier - persistent general server allows)
+  if (settings.mcp?.allowed) {
+    for (const serverName of settings.mcp.allowed) {
       rules.push({
         toolName: `${serverName}__*`,
-        decision: PolicyDecision.DENY,
-        priority: 195,
+        decision: PolicyDecision.ALLOW,
+        priority: 2.1,
       });
     }
   }
 
-  // Allow all read-only tools.
-  // Priority: 50
-  for (const tool of READ_ONLY_TOOLS) {
-    rules.push({
-      toolName: tool,
-      decision: PolicyDecision.ALLOW,
-      priority: 50,
-    });
-  }
-
-  // Only add write tool rules if not in YOLO mode
-  // In YOLO mode, the wildcard ALLOW rule handles everything
-  if (approvalMode !== ApprovalMode.YOLO) {
-    for (const tool of WRITE_TOOLS) {
-      rules.push({
-        toolName: tool,
-        decision: PolicyDecision.ASK_USER,
-        priority: 10,
-      });
-    }
-  }
-
-  if (approvalMode === ApprovalMode.YOLO) {
-    rules.push({
-      decision: PolicyDecision.ALLOW,
-      priority: 0, // Lowest priority - catches everything not explicitly configured
-    });
-  } else if (approvalMode === ApprovalMode.AUTO_EDIT) {
-    rules.push({
-      toolName: EDIT_TOOL_NAME,
-      decision: PolicyDecision.ALLOW,
-      priority: 15, // Higher than write tools (10) to override ASK_USER
-    });
-  }
-
   return {
     rules,
     defaultDecision: PolicyDecision.ASK_USER,
@@ -195,8 +220,23 @@ export function createPolicyUpdater(
       policyEngine.addRule({
         toolName,
         decision: PolicyDecision.ALLOW,
-        priority: 199, // High priority, but lower than explicit DENY (200)
+        // User tier (2) + high priority (950/1000) = 2.95
+        // This ensures user "always allow" selections are high priority
+        // but still lose to admin policies (3.xxx) and settings excludes (200)
+        priority: 2.95,
       });
     },
   );
 }
+
+/**
+ * Gets and clears any policy errors that were stored during config loading.
+ * This should be called once the UI is ready to display errors.
+ *
+ * @returns Array of formatted error messages, or empty array if no errors
+ */
+export function getPolicyErrorsForUI(): string[] {
+  const errors = [...storedPolicyErrors];
+  storedPolicyErrors = []; // Clear after retrieving
+  return errors;
+}
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index a81ad63280..eef68e4e03 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -50,6 +50,7 @@ import {
 } from '@google/gemini-cli-core';
 import { validateAuthMethod } from '../config/auth.js';
 import { loadHierarchicalGeminiMemory } from '../config/config.js';
+import { getPolicyErrorsForUI } from '../config/policy.js';
 import process from 'node:process';
 import { useHistory } from './hooks/useHistoryManager.js';
 import { useMemoryMonitor } from './hooks/useMemoryMonitor.js';
@@ -885,11 +886,23 @@ Logging in with Google... Please restart Gemini CLI to continue.
     };
     appEvents.on(AppEvent.LogError, logErrorHandler);
 
+    // Emit any policy errors that were stored during config loading
+    // Only show these when message bus integration is enabled, as policies
+    // are only active when the message bus is being used.
+    if (config.getEnableMessageBusIntegration()) {
+      const policyErrors = getPolicyErrorsForUI();
+      if (policyErrors.length > 0) {
+        for (const error of policyErrors) {
+          appEvents.emit(AppEvent.LogError, error);
+        }
+      }
+    }
+
     return () => {
       appEvents.off(AppEvent.OpenDebugConsole, openDebugConsole);
       appEvents.off(AppEvent.LogError, logErrorHandler);
     };
-  }, [handleNewMessage]);
+  }, [handleNewMessage, config]);
 
   useEffect(() => {
     if (ctrlCTimerRef.current) {
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 398914ae5a..969dc0a91e 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -1101,6 +1101,11 @@ export class Config {
   async createToolRegistry(): Promise<ToolRegistry> {
     const registry = new ToolRegistry(this, this.eventEmitter);
 
+    // Set message bus on tool registry before discovery so MCP tools can access it
+    if (this.getEnableMessageBusIntegration()) {
+      registry.setMessageBus(this.messageBus);
+    }
+
     // helper to create & register core tools that are enabled
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     const registerCoreTool = (ToolClass: any, ...args: unknown[]) => {
diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts
index 18c0a4faee..75883698ac 100644
--- a/packages/core/src/config/storage.ts
+++ b/packages/core/src/config/storage.ts
@@ -54,6 +54,10 @@ export class Storage {
     return path.join(Storage.getGlobalGeminiDir(), 'memory.md');
   }
 
+  static getUserPoliciesDir(): string {
+    return path.join(Storage.getGlobalGeminiDir(), 'policies');
+  }
+
   static getGlobalTempDir(): string {
     return path.join(Storage.getGlobalGeminiDir(), TMP_DIR_NAME);
   }
diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts
index cf4388c09d..cc98cc5966 100644
--- a/packages/core/src/policy/policy-engine.ts
+++ b/packages/core/src/policy/policy-engine.ts
@@ -11,6 +11,7 @@ import {
   type PolicyRule,
 } from './types.js';
 import { stableStringify } from './stable-stringify.js';
+import { debugLogger } from '../utils/debugLogger.js';
 
 function ruleMatches(
   rule: PolicyRule,
@@ -71,14 +72,24 @@ export class PolicyEngine {
       stringifiedArgs = stableStringify(toolCall.args);
     }
 
+    debugLogger.debug(
+      `[PolicyEngine.check] toolCall.name: ${toolCall.name}, stringifiedArgs: ${stringifiedArgs}`,
+    );
+
     // Find the first matching rule (already sorted by priority)
     for (const rule of this.rules) {
       if (ruleMatches(rule, toolCall, stringifiedArgs)) {
+        debugLogger.debug(
+          `[PolicyEngine.check] MATCHED rule: toolName=${rule.toolName}, decision=${rule.decision}, priority=${rule.priority}, argsPattern=${rule.argsPattern?.source || 'none'}`,
+        );
         return this.applyNonInteractiveMode(rule.decision);
       }
     }
 
     // No matching rule found, use default decision
+    debugLogger.debug(
+      `[PolicyEngine.check] NO MATCH - using default decision: ${this.defaultDecision}`,
+    );
     return this.applyNonInteractiveMode(this.defaultDecision);
   }
 
diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts
index 23760e9914..5ff64eb0a8 100644
--- a/packages/core/src/tools/mcp-client.test.ts
+++ b/packages/core/src/tools/mcp-client.test.ts
@@ -89,6 +89,7 @@ describe('mcp-client', () => {
       } as unknown as GenAiLib.CallableTool);
       const mockedToolRegistry = {
         registerTool: vi.fn(),
+        getMessageBus: vi.fn().mockReturnValue(undefined),
       } as unknown as ToolRegistry;
       const client = new McpClient(
         'test-server',
@@ -152,6 +153,7 @@ describe('mcp-client', () => {
       } as unknown as GenAiLib.CallableTool);
       const mockedToolRegistry = {
         registerTool: vi.fn(),
+        getMessageBus: vi.fn().mockReturnValue(undefined),
       } as unknown as ToolRegistry;
       const client = new McpClient(
         'test-server',
@@ -190,12 +192,16 @@ describe('mcp-client', () => {
       vi.mocked(GenAiLib.mcpToTool).mockReturnValue({
         tool: () => Promise.resolve({ functionDeclarations: [] }),
       } as unknown as GenAiLib.CallableTool);
+      const mockedToolRegistry = {
+        registerTool: vi.fn(),
+        getMessageBus: vi.fn().mockReturnValue(undefined),
+      } as unknown as ToolRegistry;
       const client = new McpClient(
         'test-server',
         {
           command: 'test-command',
         },
-        {} as ToolRegistry,
+        mockedToolRegistry,
         {} as PromptRegistry,
         workspaceContext,
         false,
@@ -231,6 +237,7 @@ describe('mcp-client', () => {
       const mockedMcpToTool = vi.mocked(GenAiLib.mcpToTool);
       const mockedToolRegistry = {
         registerTool: vi.fn(),
+        getMessageBus: vi.fn().mockReturnValue(undefined),
       } as unknown as ToolRegistry;
       const client = new McpClient(
         'test-server',
@@ -279,6 +286,7 @@ describe('mcp-client', () => {
       } as unknown as GenAiLib.CallableTool);
       const mockedToolRegistry = {
         registerTool: vi.fn(),
+        getMessageBus: vi.fn().mockReturnValue(undefined),
       } as unknown as ToolRegistry;
       const client = new McpClient(
         'test-server',
diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts
index a2afc04736..6457fc1cb3 100644
--- a/packages/core/src/tools/mcp-client.ts
+++ b/packages/core/src/tools/mcp-client.ts
@@ -42,6 +42,7 @@ import type {
 } from '../utils/workspaceContext.js';
 import type { ToolRegistry } from './tool-registry.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import type { MessageBus } from '../confirmation-bus/message-bus.js';
 import { coreEvents } from '../utils/events.js';
 
 export const MCP_DEFAULT_TIMEOUT_MSEC = 10 * 60 * 1000; // default to 10 minutes
@@ -198,6 +199,7 @@ export class McpClient {
       this.serverConfig,
       this.client!,
       cliConfig,
+      this.toolRegistry.getMessageBus(),
     );
   }
 
@@ -545,6 +547,7 @@ export async function connectAndDiscover(
       mcpServerConfig,
       mcpClient,
       cliConfig,
+      toolRegistry.getMessageBus(),
     );
 
     // If we have neither prompts nor tools, it's a failed discovery
@@ -582,6 +585,8 @@ export async function connectAndDiscover(
  * @param mcpServerName The name of the MCP server.
  * @param mcpServerConfig The configuration for the MCP server.
  * @param mcpClient The active MCP client instance.
+ * @param cliConfig The CLI configuration object.
+ * @param messageBus Optional message bus for policy engine integration.
  * @returns A promise that resolves to an array of discovered and enabled tools.
  * @throws An error if no enabled tools are found or if the server provides invalid function declarations.
  */
@@ -590,6 +595,7 @@ export async function discoverTools(
   mcpServerConfig: MCPServerConfig,
   mcpClient: Client,
   cliConfig: Config,
+  messageBus?: MessageBus,
 ): Promise<DiscoveredMCPTool[]> {
   try {
     // Only request tools if the server supports them.
@@ -612,19 +618,29 @@ export async function discoverTools(
           continue;
         }
 
-        discoveredTools.push(
-          new DiscoveredMCPTool(
-            mcpCallableTool,
-            mcpServerName,
-            funcDecl.name!,
-            funcDecl.description ?? '',
-            funcDecl.parametersJsonSchema ?? { type: 'object', properties: {} },
-            mcpServerConfig.trust,
-            undefined,
-            cliConfig,
-            mcpServerConfig.extension?.id,
-          ),
+        const tool = new DiscoveredMCPTool(
+          mcpCallableTool,
+          mcpServerName,
+          funcDecl.name!,
+          funcDecl.description ?? '',
+          funcDecl.parametersJsonSchema ?? { type: 'object', properties: {} },
+          mcpServerConfig.trust,
+          undefined,
+          cliConfig,
+          mcpServerConfig.extension?.id,
+          messageBus,
         );
+
+        if (
+          cliConfig.getDebugMode?.() &&
+          cliConfig.getEnableMessageBusIntegration?.()
+        ) {
+          debugLogger.log(
+            `[DEBUG] Discovered MCP tool '${funcDecl.name}' from server '${mcpServerName}' with messageBus: ${messageBus ? 'YES' : 'NO'}`,
+          );
+        }
+
+        discoveredTools.push(tool);
       } catch (error) {
         coreEvents.emitFeedback(
           'error',
diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts
index 822a41f24f..182366a6dc 100644
--- a/packages/core/src/tools/mcp-tool.ts
+++ b/packages/core/src/tools/mcp-tool.ts
@@ -72,11 +72,15 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation<
     readonly trust?: boolean,
     params: ToolParams = {},
     private readonly cliConfig?: Config,
+    messageBus?: MessageBus,
   ) {
-    super(params);
+    // Use composite format for policy checks: serverName__toolName
+    // This enables server wildcards (e.g., "google-workspace__*")
+    // while still allowing specific tool rules
+    super(params, messageBus, `${serverName}__${serverToolName}`, displayName);
   }
 
-  override async shouldConfirmExecute(
+  protected override async getConfirmationDetails(
     _abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
     const serverAllowListKey = this.serverName;
@@ -215,6 +219,7 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool<
     nameOverride?: string,
     private readonly cliConfig?: Config,
     override readonly extensionId?: string,
+    messageBus?: MessageBus,
   ) {
     super(
       nameOverride ?? generateValidName(serverToolName),
@@ -223,8 +228,8 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool<
       Kind.Other,
       parameterSchema,
       true, // isOutputMarkdown
-      false, // canUpdateOutput
-      undefined, // messageBus
+      false, // canUpdateOutput,
+      messageBus,
       extensionId,
     );
   }
@@ -240,6 +245,7 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool<
       `${this.serverName}__${this.serverToolName}`,
       this.cliConfig,
       this.extensionId,
+      this.messageBus,
     );
   }
 
@@ -257,6 +263,7 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool<
       this.trust,
       params,
       this.cliConfig,
+      _messageBus,
     );
   }
 }
diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts
index ba67c8adcf..11c028b7fc 100644
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -60,8 +60,10 @@ export class ShellToolInvocation extends BaseToolInvocation<
     params: ShellToolParams,
     private readonly allowlist: Set<string>,
     messageBus?: MessageBus,
+    _toolName?: string,
+    _toolDisplayName?: string,
   ) {
-    super(params, messageBus);
+    super(params, messageBus, _toolName, _toolDisplayName);
   }
 
   getDescription(): string {
@@ -451,12 +453,16 @@ export class ShellTool extends BaseDeclarativeTool<
   protected createInvocation(
     params: ShellToolParams,
     messageBus?: MessageBus,
+    _toolName?: string,
+    _toolDisplayName?: string,
   ): ToolInvocation<ShellToolParams, ToolResult> {
     return new ShellToolInvocation(
       this.config,
       params,
       this.allowlist,
       messageBus,
+      _toolName,
+      _toolDisplayName,
     );
   }
 }
diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts
index 4a4a13c5f9..c7d8e35305 100644
--- a/packages/core/src/tools/tool-registry.ts
+++ b/packages/core/src/tools/tool-registry.ts
@@ -177,12 +177,21 @@ export class ToolRegistry {
   private tools: Map<string, AnyDeclarativeTool> = new Map();
   private config: Config;
   private mcpClientManager: McpClientManager;
+  private messageBus?: MessageBus;
 
   constructor(config: Config, eventEmitter?: EventEmitter) {
     this.config = config;
     this.mcpClientManager = new McpClientManager(this, eventEmitter);
   }
 
+  setMessageBus(messageBus: MessageBus): void {
+    this.messageBus = messageBus;
+  }
+
+  getMessageBus(): MessageBus | undefined {
+    return this.messageBus;
+  }
+
   /**
    * Registers a tool definition.
    * @param tool - The tool object containing schema and execution logic.
diff --git a/packages/core/src/tools/write-todos.ts b/packages/core/src/tools/write-todos.ts
index 8f80904c85..73377b8255 100644
--- a/packages/core/src/tools/write-todos.ts
+++ b/packages/core/src/tools/write-todos.ts
@@ -99,6 +99,15 @@ class WriteTodosToolInvocation extends BaseToolInvocation<
   WriteTodosToolParams,
   ToolResult
 > {
+  constructor(
+    params: WriteTodosToolParams,
+    messageBus?: MessageBus,
+    _toolName?: string,
+    _toolDisplayName?: string,
+  ) {
+    super(params, messageBus, _toolName, _toolDisplayName);
+  }
+
   getDescription(): string {
     const count = this.params.todos?.length ?? 0;
     if (count === 0) {
@@ -209,6 +218,11 @@ export class WriteTodosTool extends BaseDeclarativeTool<
     _toolName?: string,
     _displayName?: string,
   ): ToolInvocation<WriteTodosToolParams, ToolResult> {
-    return new WriteTodosToolInvocation(params);
+    return new WriteTodosToolInvocation(
+      params,
+      _messageBus,
+      _toolName,
+      _displayName,
+    );
   }
 }
diff --git a/scripts/copy_files.js b/scripts/copy_files.js
index ddf254641d..4e32e61e00 100644
--- a/scripts/copy_files.js
+++ b/scripts/copy_files.js
@@ -26,7 +26,7 @@ import path from 'node:path';
 const sourceDir = path.join('src');
 const targetDir = path.join('dist', 'src');
 
-const extensionsToCopy = ['.md', '.json', '.sb'];
+const extensionsToCopy = ['.md', '.json', '.sb', '.toml'];
 
 function copyFilesRecursive(source, target) {
   if (!fs.existsSync(target)) {

From 4e6eef588298fe608039fbef642c5805e64fa0c8 Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Tue, 28 Oct 2025 10:16:47 -0700
Subject: [PATCH 56/73] refactor: Migrate console.error to debugLogger.warn in
 atCommandProcessor.ts (#12134)

---
 packages/cli/src/ui/hooks/atCommandProcessor.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.ts b/packages/cli/src/ui/hooks/atCommandProcessor.ts
index 01d5907324..da2eb3eb56 100644
--- a/packages/cli/src/ui/hooks/atCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/atCommandProcessor.ts
@@ -281,7 +281,7 @@ export async function handleAtCommand({
                 );
               }
             } catch (globError) {
-              console.error(
+              debugLogger.warn(
                 `Error during glob search for ${pathName}: ${getErrorMessage(globError)}`,
               );
               onDebugMessage(
@@ -294,7 +294,7 @@ export async function handleAtCommand({
             );
           }
         } else {
-          console.error(
+          debugLogger.warn(
             `Error stating path ${pathName}: ${getErrorMessage(error)}`,
           );
           onDebugMessage(

From 5d61adf804e607af974117601d6f1e5b5900fa60 Mon Sep 17 00:00:00 2001
From: Allen Hutchison <adh@google.com>
Date: Tue, 28 Oct 2025 10:18:34 -0700
Subject: [PATCH 57/73] feat: Add message bus setting guard for tool
 confirmation (#12169)

---
 packages/core/src/tools/tools.ts | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts
index a69856cd72..35dac2c12d 100644
--- a/packages/core/src/tools/tools.ts
+++ b/packages/core/src/tools/tools.ts
@@ -114,13 +114,31 @@ export abstract class BaseToolInvocation<
   /**
    * Subclasses should override this method to provide custom confirmation UI
    * when the policy engine's decision is 'ASK_USER'.
-   * The base implementation returns false (no confirmation needed).
-   * Only tools that need confirmation (e.g., write, execute tools) should override this.
+   * The base implementation provides a generic confirmation prompt.
    */
   protected async getConfirmationDetails(
     _abortSignal: AbortSignal,
   ): Promise<ToolCallConfirmationDetails | false> {
-    return false;
+    if (!this.messageBus) {
+      return false;
+    }
+
+    const confirmationDetails: ToolCallConfirmationDetails = {
+      type: 'info',
+      title: `Confirm: ${this._toolDisplayName || this._toolName}`,
+      prompt: this.getDescription(),
+      onConfirm: async (outcome: ToolConfirmationOutcome) => {
+        if (outcome === ToolConfirmationOutcome.ProceedAlways) {
+          if (this.messageBus && this._toolName) {
+            this.messageBus.publish({
+              type: MessageBusType.UPDATE_POLICY,
+              toolName: this._toolName,
+            });
+          }
+        }
+      },
+    };
+    return confirmationDetails;
   }
 
   protected getMessageBusDecision(

From 13aa0148e7a533750c82c293e98f02984629d45b Mon Sep 17 00:00:00 2001
From: Jacob Richman <jacob314@gmail.com>
Date: Tue, 28 Oct 2025 10:32:15 -0700
Subject: [PATCH 58/73] Migrate tests to use avoid jsdom (#12118)

---
 package-lock.json                             |  46 ++--
 packages/cli/src/config/extension.test.ts     |   6 +-
 .../cli/src/config/extensions/update.test.ts  |  12 +-
 packages/cli/src/test-utils/render.test.tsx   |  66 ++++++
 packages/cli/src/test-utils/render.tsx        |  57 +++++
 .../ui/components/FolderTrustDialog.test.tsx  |  10 +-
 packages/cli/src/ui/components/Help.test.tsx  |   2 -
 .../src/ui/components/ModelDialog.test.tsx    |  16 +-
 .../PermissionsModifyTrustDialog.test.tsx     |  26 +--
 .../src/ui/components/SettingsDialog.test.tsx |  27 +--
 .../shared/BaseSelectionList.test.tsx         |  25 +-
 .../ui/components/shared/text-buffer.test.ts  |   5 +-
 .../src/ui/contexts/KeypressContext.test.tsx  |  13 +-
 .../src/ui/contexts/SessionContext.test.tsx   |  57 +++--
 .../cli/src/ui/hooks/useAtCompletion.test.ts  |  60 +++--
 .../ui/hooks/useAutoAcceptIndicator.test.ts   |   5 +-
 .../src/ui/hooks/useFlickerDetector.test.ts   |  17 +-
 .../cli/src/ui/hooks/useFolderTrust.test.ts   |  85 ++++---
 .../cli/src/ui/hooks/useGeminiStream.test.tsx |  75 +++---
 .../src/ui/hooks/useHistoryManager.test.ts    |   5 +-
 .../cli/src/ui/hooks/useInputHistory.test.ts  |   5 +-
 .../src/ui/hooks/useInputHistoryStore.test.ts |   5 +-
 .../hooks/usePermissionsModifyTrust.test.ts   |   7 +-
 .../cli/src/ui/hooks/usePhraseCycler.test.ts  | 210 -----------------
 .../cli/src/ui/hooks/usePhraseCycler.test.tsx | 216 ++++++++++++++++++
 .../src/ui/hooks/useQuotaAndFallback.test.ts  |   5 +-
 .../ui/hooks/useReactToolScheduler.test.ts    |   4 +-
 .../hooks/useReverseSearchCompletion.test.tsx |   5 +-
 .../cli/src/ui/hooks/useShellHistory.test.ts  |  60 +++--
 .../src/ui/hooks/useSlashCompletion.test.ts   | 116 +++++-----
 .../cli/src/ui/hooks/useToolScheduler.test.ts |  96 ++++----
 31 files changed, 765 insertions(+), 579 deletions(-)
 create mode 100644 packages/cli/src/test-utils/render.test.tsx
 delete mode 100644 packages/cli/src/ui/hooks/usePhraseCycler.test.ts
 create mode 100644 packages/cli/src/ui/hooks/usePhraseCycler.test.tsx

diff --git a/package-lock.json b/package-lock.json
index 69fb107bc6..a0e554676c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -598,7 +598,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       },
@@ -622,7 +621,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       }
@@ -2428,7 +2426,6 @@
       "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@octokit/auth-token": "^6.0.0",
         "@octokit/graphql": "^9.0.2",
@@ -2609,7 +2606,6 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
       "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
       "license": "Apache-2.0",
-      "peer": true,
       "engines": {
         "node": ">=8.0.0"
       }
@@ -2643,7 +2639,6 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.0.1.tgz",
       "integrity": "sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw==",
       "license": "Apache-2.0",
-      "peer": true,
       "dependencies": {
         "@opentelemetry/semantic-conventions": "^1.29.0"
       },
@@ -3012,7 +3007,6 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.0.1.tgz",
       "integrity": "sha512-dZOB3R6zvBwDKnHDTB4X1xtMArB/d324VsbiPkX/Yu0Q8T2xceRthoIVFhJdvgVM2QhGVUyX9tzwiNxGtoBJUw==",
       "license": "Apache-2.0",
-      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/semantic-conventions": "^1.29.0"
@@ -3046,7 +3040,6 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.0.1.tgz",
       "integrity": "sha512-wf8OaJoSnujMAHWR3g+/hGvNcsC16rf9s1So4JlMiFaFHiE4HpIA3oUh+uWZQ7CNuK8gVW/pQSkgoa5HkkOl0g==",
       "license": "Apache-2.0",
-      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/resources": "2.0.1"
@@ -3099,7 +3092,6 @@
       "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.0.1.tgz",
       "integrity": "sha512-xYLlvk/xdScGx1aEqvxLwf6sXQLXCjk3/1SQT9X9AoN5rXRhkdvIFShuNNmtTEPRBqcsMbS4p/gJLNI2wXaDuQ==",
       "license": "Apache-2.0",
-      "peer": true,
       "dependencies": {
         "@opentelemetry/core": "2.0.1",
         "@opentelemetry/resources": "2.0.1",
@@ -3815,7 +3807,6 @@
       "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
       "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.10.4",
         "@babel/runtime": "^7.12.5",
@@ -4348,7 +4339,6 @@
       "integrity": "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -4359,7 +4349,6 @@
       "integrity": "sha512-4hOiT/dwO8Ko0gV1m/TJZYk3y0KBnY9vzDh7W+DH17b2HFSOGgdj33dhihPeuy3l0q23+4e+hoXHV6hCC4dCXw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "peerDependencies": {
         "@types/react": "^19.0.0"
       }
@@ -4637,7 +4626,6 @@
       "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.35.0",
         "@typescript-eslint/types": "8.35.0",
@@ -5405,7 +5393,6 @@
       "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -5769,7 +5756,8 @@
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
       "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/array-includes": {
       "version": "3.1.9",
@@ -7015,6 +7003,7 @@
       "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
       "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "safe-buffer": "5.2.1"
       },
@@ -8062,7 +8051,6 @@
       "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.2.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -8652,6 +8640,7 @@
       "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
       "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">= 0.6"
       }
@@ -8661,6 +8650,7 @@
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
       "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "ms": "2.0.0"
       }
@@ -8670,6 +8660,7 @@
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
       "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">= 0.8"
       }
@@ -8899,6 +8890,7 @@
       "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
       "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "debug": "2.6.9",
         "encodeurl": "~2.0.0",
@@ -8917,6 +8909,7 @@
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
       "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "ms": "2.0.0"
       }
@@ -8925,13 +8918,15 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
       "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/finalhandler/node_modules/statuses": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
       "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">= 0.8"
       }
@@ -10148,7 +10143,6 @@
       "resolved": "https://registry.npmjs.org/ink/-/ink-6.2.3.tgz",
       "integrity": "sha512-fQkfEJjKbLXIcVWEE3MvpYSnwtbbmRsmeNDNz1pIuOFlwE+UF2gsy228J36OXKZGWJWZJKUigphBSqCNMcARtg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@alcalzone/ansi-tokenize": "^0.2.0",
         "ansi-escapes": "^7.0.0",
@@ -13285,7 +13279,8 @@
       "version": "0.1.12",
       "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
       "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/path-type": {
       "version": "3.0.0",
@@ -13819,7 +13814,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
       "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -13830,7 +13824,6 @@
       "integrity": "sha512-cq/o30z9W2Wb4rzBefjv5fBalHU0rJGZCHAkf/RHSBWSSYwh8PlQTqqOJmgIIbBtpj27T6FIPXeomIjZtCNVqA==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "shell-quote": "^1.6.1",
         "ws": "^7"
@@ -13864,7 +13857,6 @@
       "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "scheduler": "^0.26.0"
       },
@@ -15928,7 +15920,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -16139,8 +16130,7 @@
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
       "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
       "dev": true,
-      "license": "0BSD",
-      "peer": true
+      "license": "0BSD"
     },
     "node_modules/tsx": {
       "version": "4.20.3",
@@ -16148,7 +16138,6 @@
       "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "~0.25.0",
         "get-tsconfig": "^4.7.5"
@@ -16333,7 +16322,6 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -16495,6 +16483,7 @@
       "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
       "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">= 0.4.0"
       }
@@ -16550,7 +16539,6 @@
       "integrity": "sha512-4nVGliEpxmhCL8DslSAUdxlB6+SMrhB0a1v5ijlh1xB1nEPuy1mxaHxysVucLHuWryAxLWg6a5ei+U4TLn/rFg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.25.0",
         "fdir": "^6.5.0",
@@ -16667,7 +16655,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -16681,7 +16668,6 @@
       "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@types/chai": "^5.2.2",
         "@vitest/expect": "3.2.4",
@@ -17433,7 +17419,6 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
       "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
       "license": "MIT",
-      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
@@ -17975,7 +17960,6 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index 3ce23405f3..e4fa0364ac 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { vi, type MockedFunction } from 'vitest';
 import * as fs from 'node:fs';
 import * as os from 'node:os';
@@ -61,11 +59,13 @@ vi.mock('simple-git', () => ({
   }),
 }));
 
+const mockHomedir = vi.hoisted(() => vi.fn(() => '/tmp/mock-home'));
+
 vi.mock('os', async (importOriginal) => {
   const mockedOs = await importOriginal<typeof os>();
   return {
     ...mockedOs,
-    homedir: vi.fn(),
+    homedir: mockHomedir,
   };
 });
 
diff --git a/packages/cli/src/config/extensions/update.test.ts b/packages/cli/src/config/extensions/update.test.ts
index 8c02168164..8dfe841d74 100644
--- a/packages/cli/src/config/extensions/update.test.ts
+++ b/packages/cli/src/config/extensions/update.test.ts
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { vi, type MockedFunction } from 'vitest';
 import * as fs from 'node:fs';
 import * as os from 'node:os';
@@ -50,13 +48,9 @@ vi.mock('os', async (importOriginal) => {
   };
 });
 
-vi.mock('../trustedFolders.js', async (importOriginal) => {
-  const actual = await importOriginal<typeof import('../trustedFolders.js')>();
-  return {
-    ...actual,
-    isWorkspaceTrusted: vi.fn(),
-  };
-});
+vi.mock('../trustedFolders.js', () => ({
+  isWorkspaceTrusted: vi.fn(),
+}));
 
 const mockLogExtensionInstallEvent = vi.hoisted(() => vi.fn());
 const mockLogExtensionUninstall = vi.hoisted(() => vi.fn());
diff --git a/packages/cli/src/test-utils/render.test.tsx b/packages/cli/src/test-utils/render.test.tsx
new file mode 100644
index 0000000000..b705c2a5e1
--- /dev/null
+++ b/packages/cli/src/test-utils/render.test.tsx
@@ -0,0 +1,66 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+import { useState, useEffect } from 'react';
+import { renderHook } from './render.js';
+
+describe('renderHook', () => {
+  it('should rerender with previous props when called without arguments', async () => {
+    const useTestHook = ({ value }: { value: number }) => {
+      const [count, setCount] = useState(0);
+      useEffect(() => {
+        setCount((c) => c + 1);
+      }, [value]);
+      return { count, value };
+    };
+
+    const { result, rerender } = renderHook(useTestHook, {
+      initialProps: { value: 1 },
+    });
+
+    expect(result.current.value).toBe(1);
+    await vi.waitFor(() => expect(result.current.count).toBe(1));
+
+    // Rerender with new props
+    rerender({ value: 2 });
+    expect(result.current.value).toBe(2);
+    await vi.waitFor(() => expect(result.current.count).toBe(2));
+
+    // Rerender without arguments should use previous props (value: 2)
+    // This would previously crash or pass undefined if not fixed
+    rerender();
+    expect(result.current.value).toBe(2);
+    // Count should not increase because value didn't change
+    await vi.waitFor(() => expect(result.current.count).toBe(2));
+  });
+
+  it('should handle initial render without props', () => {
+    const useTestHook = () => {
+      const [count, setCount] = useState(0);
+      return { count, increment: () => setCount((c) => c + 1) };
+    };
+
+    const { result, rerender } = renderHook(useTestHook);
+
+    expect(result.current.count).toBe(0);
+
+    rerender();
+    expect(result.current.count).toBe(0);
+  });
+
+  it('should update props if undefined is passed explicitly', () => {
+    const useTestHook = (val: string | undefined) => val;
+    const { result, rerender } = renderHook(useTestHook, {
+      initialProps: 'initial',
+    });
+
+    expect(result.current).toBe('initial');
+
+    rerender(undefined);
+    expect(result.current).toBeUndefined();
+  });
+});
diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx
index 3eba2ff964..1eb00406c5 100644
--- a/packages/cli/src/test-utils/render.tsx
+++ b/packages/cli/src/test-utils/render.tsx
@@ -6,6 +6,7 @@
 
 import { render } from 'ink-testing-library';
 import type React from 'react';
+import { act } from 'react';
 import { LoadedSettings, type Settings } from '../config/settings.js';
 import { KeypressProvider } from '../ui/contexts/KeypressContext.js';
 import { SettingsContext } from '../ui/contexts/SettingsContext.js';
@@ -128,3 +129,59 @@ export const renderWithProviders = (
     </ConfigContext.Provider>,
   );
 };
+
+export function renderHook<Result, Props>(
+  renderCallback: (props: Props) => Result,
+  options?: {
+    initialProps?: Props;
+    wrapper?: React.ComponentType<{ children: React.ReactNode }>;
+  },
+): {
+  result: { current: Result };
+  rerender: (props?: Props) => void;
+  unmount: () => void;
+} {
+  const result = { current: undefined as unknown as Result };
+  let currentProps = options?.initialProps as Props;
+
+  function TestComponent({
+    renderCallback,
+    props,
+  }: {
+    renderCallback: (props: Props) => Result;
+    props: Props;
+  }) {
+    result.current = renderCallback(props);
+    return null;
+  }
+
+  const Wrapper = options?.wrapper || (({ children }) => <>{children}</>);
+
+  let inkRerender: (tree: React.ReactElement) => void = () => {};
+  let unmount: () => void = () => {};
+
+  act(() => {
+    const renderResult = render(
+      <Wrapper>
+        <TestComponent renderCallback={renderCallback} props={currentProps} />
+      </Wrapper>,
+    );
+    inkRerender = renderResult.rerender;
+    unmount = renderResult.unmount;
+  });
+
+  function rerender(props?: Props) {
+    if (arguments.length > 0) {
+      currentProps = props as Props;
+    }
+    act(() => {
+      inkRerender(
+        <Wrapper>
+          <TestComponent renderCallback={renderCallback} props={currentProps} />
+        </Wrapper>,
+      );
+    });
+  }
+
+  return { result, rerender, unmount };
+}
diff --git a/packages/cli/src/ui/components/FolderTrustDialog.test.tsx b/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
index 77280be320..588f39653e 100644
--- a/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
+++ b/packages/cli/src/ui/components/FolderTrustDialog.test.tsx
@@ -4,10 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { renderWithProviders } from '../../test-utils/render.js';
-import { waitFor, act } from '@testing-library/react';
+import { act } from 'react';
 import { vi } from 'vitest';
 import { FolderTrustDialog } from './FolderTrustDialog.js';
 import * as processUtils from '../../utils/processUtils.js';
@@ -56,12 +54,12 @@ describe('FolderTrustDialog', () => {
       stdin.write('\u001b[27u'); // Press kitty escape key
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(lastFrame()).toContain(
         'A folder trust level must be selected to continue. Exiting since escape was pressed.',
       );
     });
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockedExit).toHaveBeenCalledWith(1);
     });
     expect(onSelect).not.toHaveBeenCalled();
@@ -95,7 +93,7 @@ describe('FolderTrustDialog', () => {
       stdin.write('r');
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockedExit).not.toHaveBeenCalled();
     });
   });
diff --git a/packages/cli/src/ui/components/Help.test.tsx b/packages/cli/src/ui/components/Help.test.tsx
index ff749643ba..27f072c8eb 100644
--- a/packages/cli/src/ui/components/Help.test.tsx
+++ b/packages/cli/src/ui/components/Help.test.tsx
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { render } from 'ink-testing-library';
 import { describe, it, expect } from 'vitest';
 import { Help } from './Help.js';
diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx
index 0080a03b3d..1bcfb5c75f 100644
--- a/packages/cli/src/ui/components/ModelDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.test.tsx
@@ -4,9 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { render, cleanup } from '@testing-library/react';
+import { render, cleanup } from 'ink-testing-library';
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import {
   DEFAULT_GEMINI_FLASH_LITE_MODEL,
@@ -82,12 +80,12 @@ describe('<ModelDialog />', () => {
   });
 
   it('renders the title and help text', () => {
-    const { getByText } = renderComponent();
-    expect(getByText('Select Model')).toBeDefined();
-    expect(getByText('(Press Esc to close)')).toBeDefined();
-    expect(
-      getByText('> To use a specific Gemini model, use the --model flag.'),
-    ).toBeDefined();
+    const { lastFrame } = renderComponent();
+    expect(lastFrame()).toContain('Select Model');
+    expect(lastFrame()).toContain('(Press Esc to close)');
+    expect(lastFrame()).toContain(
+      '> To use a specific Gemini model, use the --model flag.',
+    );
   });
 
   it('passes all model options to DescriptiveRadioButtonSelect', () => {
diff --git a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx b/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
index ed2740c580..4cf24614fa 100644
--- a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
+++ b/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx
@@ -4,16 +4,12 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-/// <reference types="vitest/globals" />
-
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import type { Mock } from 'vitest';
 import { renderWithProviders } from '../../test-utils/render.js';
 import { PermissionsModifyTrustDialog } from './PermissionsModifyTrustDialog.js';
 import { TrustLevel } from '../../config/trustedFolders.js';
-import { waitFor, act } from '@testing-library/react';
+import { act } from 'react';
 import * as processUtils from '../../utils/processUtils.js';
 import { usePermissionsModifyTrust } from '../hooks/usePermissionsModifyTrust.js';
 
@@ -72,7 +68,7 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={vi.fn()} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(lastFrame()).toContain('Modify Trust Level');
       expect(lastFrame()).toContain('Folder: /test/dir');
       expect(lastFrame()).toContain('Current Level: DO_NOT_TRUST');
@@ -94,7 +90,7 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={vi.fn()} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(lastFrame()).toContain(
         'Note: This folder behaves as a trusted folder because one of the parent folders is trusted.',
       );
@@ -116,7 +112,7 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={vi.fn()} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(lastFrame()).toContain(
         'Note: This folder behaves as a trusted folder because the connected IDE workspace is trusted.',
       );
@@ -128,7 +124,7 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={vi.fn()} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(lastFrame()).toContain('Trust this folder (dir)');
       expect(lastFrame()).toContain('Trust parent folder (test)');
     });
@@ -140,13 +136,13 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={onExit} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
+    await vi.waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
 
     act(() => {
       stdin.write('\u001b[27u'); // Kitty escape key
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(onExit).toHaveBeenCalled();
     });
   });
@@ -171,11 +167,11 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={onExit} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
+    await vi.waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
 
     act(() => stdin.write('r')); // Press 'r' to restart
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockCommitTrustLevelChange).toHaveBeenCalled();
       expect(mockRelaunchApp).toHaveBeenCalled();
       expect(onExit).toHaveBeenCalled();
@@ -201,11 +197,11 @@ describe('PermissionsModifyTrustDialog', () => {
       <PermissionsModifyTrustDialog onExit={onExit} addItem={vi.fn()} />,
     );
 
-    await waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
+    await vi.waitFor(() => expect(lastFrame()).not.toContain('Loading...'));
 
     act(() => stdin.write('\u001b[27u')); // Press kitty escape key
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockCommitTrustLevelChange).not.toHaveBeenCalled();
       expect(onExit).toHaveBeenCalled();
     });
diff --git a/packages/cli/src/ui/components/SettingsDialog.test.tsx b/packages/cli/src/ui/components/SettingsDialog.test.tsx
index 50d32c1871..f8577e6bb7 100644
--- a/packages/cli/src/ui/components/SettingsDialog.test.tsx
+++ b/packages/cli/src/ui/components/SettingsDialog.test.tsx
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 /**
  *
  *
@@ -30,7 +28,6 @@ import { LoadedSettings, SettingScope } from '../../config/settings.js';
 import { VimModeProvider } from '../contexts/VimModeContext.js';
 import { KeypressProvider } from '../contexts/KeypressContext.js';
 import { act } from 'react';
-import { waitFor } from '@testing-library/react';
 import { saveModifiedSettings, TEST_ONLY } from '../../utils/settingsUtils.js';
 import {
   getSettingsSchema,
@@ -408,7 +405,7 @@ describe('SettingsDialog', () => {
       const { stdin, unmount, lastFrame } = render(component);
 
       // Wait for initial render and verify we're on Vim Mode (first setting)
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('● Vim Mode');
       });
 
@@ -416,7 +413,7 @@ describe('SettingsDialog', () => {
       act(() => {
         stdin.write(TerminalKeys.DOWN_ARROW as string);
       });
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('● Disable Auto Update');
       });
 
@@ -425,14 +422,14 @@ describe('SettingsDialog', () => {
         stdin.write(TerminalKeys.ENTER as string);
       });
       // Wait for the setting change to be processed
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(
           vi.mocked(saveModifiedSettings).mock.calls.length,
         ).toBeGreaterThan(0);
       });
 
       // Wait for the mock to be called
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(vi.mocked(saveModifiedSettings)).toHaveBeenCalled();
       });
 
@@ -470,7 +467,7 @@ describe('SettingsDialog', () => {
         await wait();
         stdin.write(TerminalKeys.ENTER as string);
         await wait();
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(vi.mocked(saveModifiedSettings)).toHaveBeenCalled();
         });
 
@@ -507,7 +504,7 @@ describe('SettingsDialog', () => {
         await wait();
         stdin.write(TerminalKeys.ENTER as string);
         await wait();
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(vi.mocked(saveModifiedSettings)).toHaveBeenCalled();
         });
 
@@ -596,7 +593,7 @@ describe('SettingsDialog', () => {
       );
 
       // Wait for initial render
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('Vim Mode');
       });
 
@@ -668,7 +665,7 @@ describe('SettingsDialog', () => {
       );
 
       // Wait for initial render
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('Hide Window Title');
       });
 
@@ -964,7 +961,7 @@ describe('SettingsDialog', () => {
         await wait(50);
       }
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(
           vi.mocked(saveModifiedSettings).mock.calls.length,
         ).toBeGreaterThan(0);
@@ -1024,7 +1021,7 @@ describe('SettingsDialog', () => {
         await wait(30);
       }
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(
           vi.mocked(saveModifiedSettings).mock.calls.length,
         ).toBeGreaterThan(0);
@@ -1141,7 +1138,7 @@ describe('SettingsDialog', () => {
       );
 
       // Wait for initial render
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('Vim Mode');
       });
 
@@ -1203,7 +1200,7 @@ describe('SettingsDialog', () => {
       );
 
       // Wait for initial render
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(lastFrame()).toContain('Vim Mode');
       });
 
diff --git a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
index bc2fd37db3..9f9b9e60de 100644
--- a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
+++ b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx
@@ -4,10 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { waitFor } from '@testing-library/react';
 import { renderWithProviders } from '../../../test-utils/render.js';
 import {
   BaseSelectionList,
@@ -301,7 +298,7 @@ describe('BaseSelectionList', () => {
 
         rerender(<BaseSelectionList {...componentProps} />);
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(lastFrame()).toBeTruthy();
         });
       };
@@ -325,7 +322,7 @@ describe('BaseSelectionList', () => {
       // New visible window should be Items 2, 3, 4 (scroll offset 1).
       await updateActiveIndex(3);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).not.toContain('Item 1');
         expect(output).toContain('Item 2');
@@ -339,7 +336,7 @@ describe('BaseSelectionList', () => {
 
       await updateActiveIndex(4);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).toContain('Item 3'); // Should see items 3, 4, 5
         expect(output).toContain('Item 5');
@@ -350,7 +347,7 @@ describe('BaseSelectionList', () => {
       // This should trigger scroll up to show items 2, 3, 4
       await updateActiveIndex(1);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).toContain('Item 2');
         expect(output).toContain('Item 4');
@@ -364,7 +361,7 @@ describe('BaseSelectionList', () => {
       // Visible items: 8, 9, 10.
       const { lastFrame } = renderScrollableList(9);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).toContain('Item 10');
         expect(output).toContain('Item 8');
@@ -383,14 +380,14 @@ describe('BaseSelectionList', () => {
       expect(lastFrame()).toContain('Item 1');
 
       await updateActiveIndex(3); // Should trigger scroll
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).toContain('Item 2');
         expect(output).toContain('Item 4');
         expect(output).not.toContain('Item 1');
       });
       await updateActiveIndex(5); // Scroll further
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         expect(output).toContain('Item 4');
         expect(output).toContain('Item 6');
@@ -417,7 +414,7 @@ describe('BaseSelectionList', () => {
     it('should correctly identify the selected item when scrolled (high index)', async () => {
       renderScrollableList(5);
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         // Item 6 (index 5) should be selected
         expect(mockRenderItem).toHaveBeenCalledWith(
           expect.objectContaining({ value: 'Item 6' }),
@@ -475,7 +472,7 @@ describe('BaseSelectionList', () => {
         0,
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         // At the top, should show first 3 items
         expect(output).toContain('Item 1');
@@ -493,7 +490,7 @@ describe('BaseSelectionList', () => {
         5,
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         // After scrolling to middle, should see items around index 5
         expect(output).toContain('Item 4');
@@ -512,7 +509,7 @@ describe('BaseSelectionList', () => {
         9,
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         const output = lastFrame();
         // At the end, should show last 3 items
         expect(output).toContain('Item 8');
diff --git a/packages/cli/src/ui/components/shared/text-buffer.test.ts b/packages/cli/src/ui/components/shared/text-buffer.test.ts
index 77013f27b5..fa68800f87 100644
--- a/packages/cli/src/ui/components/shared/text-buffer.test.ts
+++ b/packages/cli/src/ui/components/shared/text-buffer.test.ts
@@ -4,11 +4,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect, beforeEach } from 'vitest';
 import stripAnsi from 'strip-ansi';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../../test-utils/render.js';
 import type {
   Viewport,
   TextBuffer,
diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
index 4f1aa42e69..3d11de50b7 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
@@ -4,10 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import type React from 'react';
-import { renderHook, act, waitFor } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import type { Mock } from 'vitest';
 import { vi } from 'vitest';
 import type { Key } from './KeypressContext.js';
@@ -370,7 +369,7 @@ describe('KeypressContext - Kitty Protocol', () => {
         stdin.write(PASTE_END);
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         // Expect the handler to be called exactly once for the entire paste
         expect(keyHandler).toHaveBeenCalledTimes(1);
       });
@@ -399,7 +398,7 @@ describe('KeypressContext - Kitty Protocol', () => {
         stdin.write(PASTE_END);
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(keyHandler).toHaveBeenCalledTimes(1);
       });
 
@@ -427,7 +426,7 @@ describe('KeypressContext - Kitty Protocol', () => {
         stdin.write(PASTE_END.slice(3));
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(keyHandler).toHaveBeenCalledTimes(1);
       });
 
@@ -1193,7 +1192,7 @@ describe('Kitty Sequence Parsing', () => {
     }
 
     // Should parse once complete
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(keyHandler).toHaveBeenCalledWith(
         expect.objectContaining({
           name: 'escape',
diff --git a/packages/cli/src/ui/contexts/SessionContext.test.tsx b/packages/cli/src/ui/contexts/SessionContext.test.tsx
index 45833ae5ee..b2602e3925 100644
--- a/packages/cli/src/ui/contexts/SessionContext.test.tsx
+++ b/packages/cli/src/ui/contexts/SessionContext.test.tsx
@@ -4,17 +4,40 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { type MutableRefObject } from 'react';
+import { type MutableRefObject, Component, type ReactNode } from 'react';
 import { render } from 'ink-testing-library';
-import { renderHook } from '@testing-library/react';
-import { act } from 'react-dom/test-utils';
+
+import { act } from 'react';
 import type { SessionMetrics } from './SessionContext.js';
 import { SessionStatsProvider, useSessionStats } from './SessionContext.js';
 import { describe, it, expect, vi } from 'vitest';
 import { uiTelemetryService } from '@google/gemini-cli-core';
 
+class ErrorBoundary extends Component<
+  { children: ReactNode; onError: (error: Error) => void },
+  { hasError: boolean }
+> {
+  constructor(props: { children: ReactNode; onError: (error: Error) => void }) {
+    super(props);
+    this.state = { hasError: false };
+  }
+
+  static getDerivedStateFromError(_error: Error) {
+    return { hasError: true };
+  }
+
+  override componentDidCatch(error: Error) {
+    this.props.onError(error);
+  }
+
+  override render() {
+    if (this.state.hasError) {
+      return null;
+    }
+    return this.props.children;
+  }
+}
+
 /**
  * A test harness component that uses the hook and exposes the context value
  * via a mutable ref. This allows us to interact with the context's functions
@@ -208,16 +231,22 @@ describe('SessionStatsContext', () => {
   });
 
   it('should throw an error when useSessionStats is used outside of a provider', () => {
-    // Suppress console.error for this test since we expect an error
+    const onError = vi.fn();
+    // Suppress console.error from React for this test
     const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
 
-    try {
-      // Expect renderHook itself to throw when the hook is used outside a provider
-      expect(() => {
-        renderHook(() => useSessionStats());
-      }).toThrow('useSessionStats must be used within a SessionStatsProvider');
-    } finally {
-      consoleSpy.mockRestore();
-    }
+    render(
+      <ErrorBoundary onError={onError}>
+        <TestHarness contextRef={{ current: undefined }} />
+      </ErrorBoundary>,
+    );
+
+    expect(onError).toHaveBeenCalledWith(
+      expect.objectContaining({
+        message: 'useSessionStats must be used within a SessionStatsProvider',
+      }),
+    );
+
+    consoleSpy.mockRestore();
   });
 });
diff --git a/packages/cli/src/ui/hooks/useAtCompletion.test.ts b/packages/cli/src/ui/hooks/useAtCompletion.test.ts
index 5b4687f02c..42c63ae62b 100644
--- a/packages/cli/src/ui/hooks/useAtCompletion.test.ts
+++ b/packages/cli/src/ui/hooks/useAtCompletion.test.ts
@@ -4,16 +4,14 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
-import { renderHook, waitFor, act } from '@testing-library/react';
+import { act, useState } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useAtCompletion } from './useAtCompletion.js';
 import type { Config, FileSearch } from '@google/gemini-cli-core';
 import { FileSearchFactory } from '@google/gemini-cli-core';
 import type { FileSystemStructure } from '@google/gemini-cli-test-utils';
 import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
-import { useState } from 'react';
 import type { Suggestion } from '../components/SuggestionsDisplay.js';
 
 // Test harness to capture the state from the hook's callbacks.
@@ -76,7 +74,7 @@ describe('useAtCompletion', () => {
         useTestHarnessForAtCompletion(true, '', mockConfig, testRootDir),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
@@ -106,7 +104,7 @@ describe('useAtCompletion', () => {
         useTestHarnessForAtCompletion(true, 'src/', mockConfig, testRootDir),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
@@ -129,7 +127,7 @@ describe('useAtCompletion', () => {
         useTestHarnessForAtCompletion(true, '', mockConfig, testRootDir),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
@@ -166,7 +164,7 @@ describe('useAtCompletion', () => {
       );
 
       // The hook should find 'cRaZycAsE.txt' even though the pattern is 'CrAzYCaSe'.
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'cRaZycAsE.txt',
         ]);
@@ -177,15 +175,29 @@ describe('useAtCompletion', () => {
   describe('UI State and Loading Behavior', () => {
     it('should be in a loading state during initial file system crawl', async () => {
       testRootDir = await createTmpDir({});
+
+      // Mock FileSearch to be slow to catch the loading state
+      const mockFileSearch = {
+        initialize: vi.fn().mockImplementation(async () => {
+          await new Promise((resolve) => setTimeout(resolve, 50));
+        }),
+        search: vi.fn().mockResolvedValue([]),
+      };
+      vi.spyOn(FileSearchFactory, 'create').mockReturnValue(
+        mockFileSearch as unknown as FileSearch,
+      );
+
       const { result } = renderHook(() =>
         useTestHarnessForAtCompletion(true, '', mockConfig, testRootDir),
       );
 
       // It's initially true because the effect runs synchronously.
-      expect(result.current.isLoadingSuggestions).toBe(true);
+      await vi.waitFor(() => {
+        expect(result.current.isLoadingSuggestions).toBe(true);
+      });
 
       // Wait for the loading to complete.
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.isLoadingSuggestions).toBe(false);
       });
     });
@@ -200,7 +212,7 @@ describe('useAtCompletion', () => {
         { initialProps: { pattern: 'a' } },
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'a.txt',
         ]);
@@ -210,7 +222,7 @@ describe('useAtCompletion', () => {
       rerender({ pattern: 'b' });
 
       // Wait for the final result
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'b.txt',
         ]);
@@ -253,7 +265,7 @@ describe('useAtCompletion', () => {
       );
 
       // Wait for the initial search to complete (using real timers)
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'a.txt',
         ]);
@@ -283,7 +295,7 @@ describe('useAtCompletion', () => {
       vi.useRealTimers();
 
       // Wait for the search results to be processed
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'b.txt',
         ]);
@@ -314,7 +326,7 @@ describe('useAtCompletion', () => {
       );
 
       // Wait for the hook to be ready (initialization is complete)
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockFileSearch.search).toHaveBeenCalledWith(
           'a',
           expect.any(Object),
@@ -330,7 +342,7 @@ describe('useAtCompletion', () => {
       expect(abortSpy).toHaveBeenCalledTimes(1);
 
       // Wait for the final result, which should be from the second, faster search.
-      await waitFor(
+      await vi.waitFor(
         () => {
           expect(result.current.suggestions.map((s) => s.value)).toEqual(['b']);
         },
@@ -357,7 +369,7 @@ describe('useAtCompletion', () => {
       );
 
       // Wait for the hook to be ready and have suggestions
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'a.txt',
         ]);
@@ -389,7 +401,7 @@ describe('useAtCompletion', () => {
       );
 
       // Wait for the hook to enter the error state
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.isLoadingSuggestions).toBe(false);
       });
       expect(result.current.suggestions).toEqual([]); // No suggestions on error
@@ -420,7 +432,7 @@ describe('useAtCompletion', () => {
         useTestHarnessForAtCompletion(true, '', mockConfig, testRootDir),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
@@ -441,7 +453,7 @@ describe('useAtCompletion', () => {
         useTestHarnessForAtCompletion(true, '', undefined, testRootDir),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
@@ -469,7 +481,7 @@ describe('useAtCompletion', () => {
       );
 
       // Wait for initial suggestions from the first directory
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'file1.txt',
         ]);
@@ -481,13 +493,13 @@ describe('useAtCompletion', () => {
       });
 
       // After CWD changes, suggestions should be cleared and it should load again.
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.isLoadingSuggestions).toBe(true);
         expect(result.current.suggestions).toEqual([]);
       });
 
       // Wait for the new suggestions from the second directory
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.map((s) => s.value)).toEqual([
           'file2.txt',
         ]);
@@ -525,7 +537,7 @@ describe('useAtCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions.length).toBeGreaterThan(0);
       });
 
diff --git a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
index 25b515de6b..910c0960c0 100644
--- a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
+++ b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.test.ts
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import {
   describe,
   it,
@@ -15,7 +13,8 @@ import {
   type MockedFunction,
   type Mock,
 } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useAutoAcceptIndicator } from './useAutoAcceptIndicator.js';
 
 import { Config, ApprovalMode } from '@google/gemini-cli-core';
diff --git a/packages/cli/src/ui/hooks/useFlickerDetector.test.ts b/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
index aa60378648..cbe5e4f14e 100644
--- a/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
+++ b/packages/cli/src/ui/hooks/useFlickerDetector.test.ts
@@ -4,9 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { renderHook } from '@testing-library/react';
+import { renderHook } from '../../test-utils/render.js';
 import { vi, type Mock } from 'vitest';
 import { useFlickerDetector } from './useFlickerDetector.js';
 import { useConfig } from '../contexts/ConfigContext.js';
@@ -19,10 +17,15 @@ import { appEvents, AppEvent } from '../../utils/events.js';
 // Mock dependencies
 vi.mock('../contexts/ConfigContext.js');
 vi.mock('../contexts/UIStateContext.js');
-vi.mock('@google/gemini-cli-core', () => ({
-  recordFlickerFrame: vi.fn(),
-  GEMINI_DIR: '.gemini',
-}));
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const actual =
+    await importOriginal<typeof import('@google/gemini-cli-core')>();
+  return {
+    ...actual,
+    recordFlickerFrame: vi.fn(),
+    GEMINI_DIR: '.gemini',
+  };
+});
 vi.mock('ink', async (importOriginal) => {
   const original = await importOriginal<typeof import('ink')>();
   return {
diff --git a/packages/cli/src/ui/hooks/useFolderTrust.test.ts b/packages/cli/src/ui/hooks/useFolderTrust.test.ts
index cc663a11d9..25609ab65c 100644
--- a/packages/cli/src/ui/hooks/useFolderTrust.test.ts
+++ b/packages/cli/src/ui/hooks/useFolderTrust.test.ts
@@ -4,10 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { vi, type Mock, type MockInstance } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useFolderTrust } from './useFolderTrust.js';
 import type { LoadedSettings } from '../../config/settings.js';
 import { FolderTrustChoice } from '../components/FolderTrustDialog.js';
@@ -30,7 +29,6 @@ vi.mock('node:process', async () => {
 describe('useFolderTrust', () => {
   let mockSettings: LoadedSettings;
   let mockTrustedFolders: LoadedTrustedFolders;
-  let loadTrustedFoldersSpy: MockInstance;
   let isWorkspaceTrustedSpy: MockInstance;
   let onTrustChange: (isTrusted: boolean | undefined) => void;
   let addItem: Mock;
@@ -51,9 +49,9 @@ describe('useFolderTrust', () => {
       setValue: vi.fn(),
     } as unknown as LoadedTrustedFolders;
 
-    loadTrustedFoldersSpy = vi
-      .spyOn(trustedFolders, 'loadTrustedFolders')
-      .mockReturnValue(mockTrustedFolders);
+    vi.spyOn(trustedFolders, 'loadTrustedFolders').mockReturnValue(
+      mockTrustedFolders,
+    );
     isWorkspaceTrustedSpy = vi.spyOn(trustedFolders, 'isWorkspaceTrusted');
     mockedCwd.mockReturnValue('/test/path');
     onTrustChange = vi.fn();
@@ -82,7 +80,7 @@ describe('useFolderTrust', () => {
     expect(onTrustChange).toHaveBeenCalledWith(false);
   });
 
-  it('should open dialog when folder trust is undefined', () => {
+  it('should open dialog when folder trust is undefined', async () => {
     isWorkspaceTrustedSpy.mockReturnValue({
       isTrusted: undefined,
       source: undefined,
@@ -90,7 +88,9 @@ describe('useFolderTrust', () => {
     const { result } = renderHook(() =>
       useFolderTrust(mockSettings, onTrustChange, addItem),
     );
-    expect(result.current.isFolderTrustDialogOpen).toBe(true);
+    await vi.waitFor(() => {
+      expect(result.current.isFolderTrustDialogOpen).toBe(true);
+    });
     expect(onTrustChange).toHaveBeenCalledWith(undefined);
   });
 
@@ -112,26 +112,41 @@ describe('useFolderTrust', () => {
     expect(addItem).not.toHaveBeenCalled();
   });
 
-  it('should handle TRUST_FOLDER choice', () => {
+  it('should handle TRUST_FOLDER choice', async () => {
     isWorkspaceTrustedSpy.mockReturnValue({
       isTrusted: undefined,
       source: undefined,
     });
+
+    (mockTrustedFolders.setValue as Mock).mockImplementation(() => {
+      isWorkspaceTrustedSpy.mockReturnValue({
+        isTrusted: true,
+        source: 'file',
+      });
+    });
+
     const { result } = renderHook(() =>
       useFolderTrust(mockSettings, onTrustChange, addItem),
     );
 
-    act(() => {
-      result.current.handleFolderTrustSelect(FolderTrustChoice.TRUST_FOLDER);
+    await vi.waitFor(() => {
+      expect(result.current.isTrusted).toBeUndefined();
     });
 
-    expect(loadTrustedFoldersSpy).toHaveBeenCalled();
-    expect(mockTrustedFolders.setValue).toHaveBeenCalledWith(
-      '/test/path',
-      TrustLevel.TRUST_FOLDER,
-    );
-    expect(result.current.isFolderTrustDialogOpen).toBe(false);
-    expect(onTrustChange).toHaveBeenLastCalledWith(true);
+    await act(async () => {
+      await result.current.handleFolderTrustSelect(
+        FolderTrustChoice.TRUST_FOLDER,
+      );
+    });
+
+    await vi.waitFor(() => {
+      expect(mockTrustedFolders.setValue).toHaveBeenCalledWith(
+        '/test/path',
+        TrustLevel.TRUST_FOLDER,
+      );
+      expect(result.current.isFolderTrustDialogOpen).toBe(false);
+      expect(onTrustChange).toHaveBeenLastCalledWith(true);
+    });
   });
 
   it('should handle TRUST_PARENT choice', () => {
@@ -177,7 +192,7 @@ describe('useFolderTrust', () => {
     expect(result.current.isFolderTrustDialogOpen).toBe(true);
   });
 
-  it('should do nothing for default choice', () => {
+  it('should do nothing for default choice', async () => {
     isWorkspaceTrustedSpy.mockReturnValue({
       isTrusted: undefined,
       source: undefined,
@@ -192,24 +207,40 @@ describe('useFolderTrust', () => {
       );
     });
 
-    expect(mockTrustedFolders.setValue).not.toHaveBeenCalled();
-    expect(mockSettings.setValue).not.toHaveBeenCalled();
-    expect(result.current.isFolderTrustDialogOpen).toBe(true);
-    expect(onTrustChange).toHaveBeenCalledWith(undefined);
+    await vi.waitFor(() => {
+      expect(mockTrustedFolders.setValue).not.toHaveBeenCalled();
+      expect(mockSettings.setValue).not.toHaveBeenCalled();
+      expect(result.current.isFolderTrustDialogOpen).toBe(true);
+      expect(onTrustChange).toHaveBeenCalledWith(undefined);
+    });
   });
 
-  it('should set isRestarting to true when trust status changes from false to true', () => {
+  it('should set isRestarting to true when trust status changes from false to true', async () => {
     isWorkspaceTrustedSpy.mockReturnValue({ isTrusted: false, source: 'file' }); // Initially untrusted
+
+    (mockTrustedFolders.setValue as Mock).mockImplementation(() => {
+      isWorkspaceTrustedSpy.mockReturnValue({
+        isTrusted: true,
+        source: 'file',
+      });
+    });
+
     const { result } = renderHook(() =>
       useFolderTrust(mockSettings, onTrustChange, addItem),
     );
 
+    await vi.waitFor(() => {
+      expect(result.current.isTrusted).toBe(false);
+    });
+
     act(() => {
       result.current.handleFolderTrustSelect(FolderTrustChoice.TRUST_FOLDER);
     });
 
-    expect(result.current.isRestarting).toBe(true);
-    expect(result.current.isFolderTrustDialogOpen).toBe(true); // Dialog should stay open
+    await vi.waitFor(() => {
+      expect(result.current.isRestarting).toBe(true);
+      expect(result.current.isFolderTrustDialogOpen).toBe(true); // Dialog should stay open
+    });
   });
 
   it('should not set isRestarting to true when trust status does not change', () => {
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 37698a09b9..ae3566feba 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -4,12 +4,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import type { Mock, MockInstance } from 'vitest';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { renderHook, act, waitFor } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useGeminiStream } from './useGeminiStream.js';
 import { useKeypress } from './useKeypress.js';
 import * as atCommandProcessor from './atCommandProcessor.js';
@@ -507,7 +506,7 @@ describe('useGeminiStream', () => {
       }
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1);
       expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
     });
@@ -590,7 +589,7 @@ describe('useGeminiStream', () => {
       }
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['1']);
       expect(client.addHistory).toHaveBeenCalledWith({
         role: 'user',
@@ -702,7 +701,7 @@ describe('useGeminiStream', () => {
       }
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       // The tools should be marked as submitted locally
       expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith([
         'cancel-1',
@@ -840,7 +839,7 @@ describe('useGeminiStream', () => {
     });
 
     // 5. Wait for submitQuery to be called
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockSendMessageStream).toHaveBeenCalledWith(
         toolCallResponseParts,
         expect.any(AbortSignal),
@@ -889,7 +888,7 @@ describe('useGeminiStream', () => {
       });
 
       // Wait for the first part of the response
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.streamingState).toBe(StreamingState.Responding);
       });
 
@@ -897,7 +896,7 @@ describe('useGeminiStream', () => {
       simulateEscapeKeyPress();
 
       // Verify cancellation message is added
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           {
             type: MessageType.INFO,
@@ -1030,7 +1029,7 @@ describe('useGeminiStream', () => {
         result.current.submitQuery('long running query');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.streamingState).toBe(StreamingState.Responding);
       });
 
@@ -1138,7 +1137,7 @@ describe('useGeminiStream', () => {
       expect(mockCancelAllToolCalls).toHaveBeenCalled();
 
       // A cancellation message should be added to history
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             text: 'Request cancelled.',
@@ -1167,7 +1166,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/memory add "test fact"');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockScheduleToolCalls).toHaveBeenCalledWith(
           [
             expect.objectContaining({
@@ -1194,7 +1193,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/help');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).toHaveBeenCalledWith('/help');
         expect(mockScheduleToolCalls).not.toHaveBeenCalled();
         expect(mockSendMessageStream).not.toHaveBeenCalled(); // No LLM call made
@@ -1215,7 +1214,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/my-custom-command');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).toHaveBeenCalledWith(
           '/my-custom-command',
         );
@@ -1250,7 +1249,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/emptycmd');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).toHaveBeenCalledWith('/emptycmd');
         expect(localMockSendMessageStream).toHaveBeenCalledWith(
           '',
@@ -1268,7 +1267,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('// This is a line comment');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).not.toHaveBeenCalled();
         expect(localMockSendMessageStream).toHaveBeenCalledWith(
           '// This is a line comment',
@@ -1286,7 +1285,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/* This is a block comment */');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).not.toHaveBeenCalled();
         expect(localMockSendMessageStream).toHaveBeenCalledWith(
           '/* This is a block comment */',
@@ -1324,7 +1323,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('/about');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockHandleSlashCommand).not.toHaveBeenCalled();
       });
     });
@@ -1401,7 +1400,7 @@ describe('useGeminiStream', () => {
         }
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockPerformMemoryRefresh).toHaveBeenCalledTimes(1);
       });
     });
@@ -1457,7 +1456,7 @@ describe('useGeminiStream', () => {
       });
 
       // 3. Assertion
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockParseAndFormatApiError).toHaveBeenCalledWith(
           'Rate limit exceeded',
           mockAuthType,
@@ -1990,7 +1989,7 @@ describe('useGeminiStream', () => {
       });
 
       // Check that the info message was added
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           {
             type: 'info',
@@ -2050,7 +2049,7 @@ describe('useGeminiStream', () => {
         });
 
         // Check that the message was added without suggestion
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(mockAddItem).toHaveBeenCalledWith(
             {
               type: 'info',
@@ -2105,7 +2104,7 @@ describe('useGeminiStream', () => {
         });
 
         // Check that the message was added with suggestion
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(mockAddItem).toHaveBeenCalledWith(
             {
               type: 'info',
@@ -2161,7 +2160,7 @@ describe('useGeminiStream', () => {
       });
 
       // Check that onCancelSubmit was called
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(onCancelSubmitSpy).toHaveBeenCalled();
       });
     });
@@ -2360,7 +2359,7 @@ describe('useGeminiStream', () => {
           await result.current.submitQuery(`Test ${reason}`);
         });
 
-        await waitFor(() => {
+        await vi.waitFor(() => {
           expect(mockAddItem).toHaveBeenCalledWith(
             {
               type: 'info',
@@ -2487,7 +2486,7 @@ describe('useGeminiStream', () => {
       });
 
       // Wait for the first response to complete
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             type: 'gemini',
@@ -2520,7 +2519,7 @@ describe('useGeminiStream', () => {
       // We can verify this by checking that the LoadingIndicator would not show the previous thought
       // The actual thought state is internal to the hook, but we can verify the behavior
       // by ensuring the second response doesn't show the previous thought
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             type: 'gemini',
@@ -2638,7 +2637,7 @@ describe('useGeminiStream', () => {
       });
 
       // Verify cancellation message was added
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             type: 'info',
@@ -2696,7 +2695,7 @@ describe('useGeminiStream', () => {
       });
 
       // Verify error message was added
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             type: 'error',
@@ -2747,7 +2746,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('test query');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
         expect(
           typeof result.current.loopDetectionConfirmationRequest?.onComplete,
@@ -2795,7 +2794,7 @@ describe('useGeminiStream', () => {
       });
 
       // Wait for confirmation request to be set
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
       });
 
@@ -2824,7 +2823,7 @@ describe('useGeminiStream', () => {
       );
 
       // Verify that the request was retried
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
         expect(mockSendMessageStream).toHaveBeenNthCalledWith(
           2,
@@ -2860,7 +2859,7 @@ describe('useGeminiStream', () => {
       });
 
       // Wait for confirmation request to be set
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
       });
 
@@ -2907,7 +2906,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('first query');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
       });
 
@@ -2957,7 +2956,7 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('second query');
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
       });
 
@@ -2980,7 +2979,7 @@ describe('useGeminiStream', () => {
       );
 
       // Verify that the request was retried
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockSendMessageStream).toHaveBeenCalledTimes(3); // 1st query, 2nd query, retry of 2nd query
         expect(mockSendMessageStream).toHaveBeenNthCalledWith(
           3,
@@ -3011,7 +3010,7 @@ describe('useGeminiStream', () => {
       });
 
       // Verify that the content was added to history before the loop detection dialog
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockAddItem).toHaveBeenCalledWith(
           expect.objectContaining({
             type: 'gemini',
@@ -3022,7 +3021,7 @@ describe('useGeminiStream', () => {
       });
 
       // Then verify loop detection confirmation request was set
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.loopDetectionConfirmationRequest).not.toBeNull();
       });
     });
diff --git a/packages/cli/src/ui/hooks/useHistoryManager.test.ts b/packages/cli/src/ui/hooks/useHistoryManager.test.ts
index d813379ac2..cff7ef69bf 100644
--- a/packages/cli/src/ui/hooks/useHistoryManager.test.ts
+++ b/packages/cli/src/ui/hooks/useHistoryManager.test.ts
@@ -4,10 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useHistory } from './useHistoryManager.js';
 import type { HistoryItem } from '../types.js';
 
diff --git a/packages/cli/src/ui/hooks/useInputHistory.test.ts b/packages/cli/src/ui/hooks/useInputHistory.test.ts
index 55e0b63182..6d0d7fad2f 100644
--- a/packages/cli/src/ui/hooks/useInputHistory.test.ts
+++ b/packages/cli/src/ui/hooks/useInputHistory.test.ts
@@ -4,9 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { act, renderHook } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useInputHistory } from './useInputHistory.js';
 
 describe('useInputHistory', () => {
diff --git a/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts b/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
index 6953ce1b37..ee7aa7d86d 100644
--- a/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
+++ b/packages/cli/src/ui/hooks/useInputHistoryStore.test.ts
@@ -4,9 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { act, renderHook } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { vi, describe, it, expect, beforeEach } from 'vitest';
 import { useInputHistoryStore } from './useInputHistoryStore.js';
 
diff --git a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts b/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
index 9549274160..d317170c18 100644
--- a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
+++ b/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts
@@ -4,10 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-/// <reference types="vitest/globals" />
-
 import {
   describe,
   it,
@@ -17,7 +13,8 @@ import {
   afterEach,
   type Mock,
 } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { usePermissionsModifyTrust } from './usePermissionsModifyTrust.js';
 import { TrustLevel } from '../../config/trustedFolders.js';
 import type { LoadedSettings } from '../../config/settings.js';
diff --git a/packages/cli/src/ui/hooks/usePhraseCycler.test.ts b/packages/cli/src/ui/hooks/usePhraseCycler.test.ts
deleted file mode 100644
index bfa53ff8c8..0000000000
--- a/packages/cli/src/ui/hooks/usePhraseCycler.test.ts
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * @license
- * Copyright 2025 Google LLC
- * SPDX-License-Identifier: Apache-2.0
- */
-
-/** @vitest-environment jsdom */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
-import {
-  usePhraseCycler,
-  WITTY_LOADING_PHRASES,
-  PHRASE_CHANGE_INTERVAL_MS,
-} from './usePhraseCycler.js';
-
-describe('usePhraseCycler', () => {
-  beforeEach(() => {
-    vi.useFakeTimers();
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  it('should initialize with a witty phrase when not active and not waiting', () => {
-    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result } = renderHook(() => usePhraseCycler(false, false));
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-  });
-
-  it('should show "Waiting for user confirmation..." when isWaiting is true', () => {
-    const { result, rerender } = renderHook(
-      ({ isActive, isWaiting }) => usePhraseCycler(isActive, isWaiting),
-      { initialProps: { isActive: true, isWaiting: false } },
-    );
-    rerender({ isActive: true, isWaiting: true });
-    expect(result.current).toBe('Waiting for user confirmation...');
-  });
-
-  it('should not cycle phrases if isActive is false and not waiting', () => {
-    const { result } = renderHook(() => usePhraseCycler(false, false));
-    const initialPhrase = result.current;
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS * 2);
-    });
-    expect(result.current).toBe(initialPhrase);
-  });
-
-  it('should cycle through witty phrases when isActive is true and not waiting', () => {
-    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result } = renderHook(() => usePhraseCycler(true, false));
-    // Initial phrase should be one of the witty phrases
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS);
-    });
-    // Phrase should change and be one of the witty phrases
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS);
-    });
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-  });
-
-  it('should reset to a witty phrase when isActive becomes true after being false (and not waiting)', () => {
-    // Ensure there are at least two phrases for this test to be meaningful.
-    if (WITTY_LOADING_PHRASES.length < 2) {
-      return;
-    }
-
-    // Mock Math.random to make the test deterministic.
-    const mockRandomValues = [
-      0.5, // -> witty
-      0, // -> index 0
-      0.5, // -> witty
-      1 / WITTY_LOADING_PHRASES.length, // -> index 1
-      0.5, // -> witty
-      0, // -> index 0
-    ];
-    let randomCallCount = 0;
-    vi.spyOn(Math, 'random').mockImplementation(() => {
-      const val = mockRandomValues[randomCallCount % mockRandomValues.length];
-      randomCallCount++;
-      return val;
-    });
-
-    const { result, rerender } = renderHook(
-      ({ isActive, isWaiting }) => usePhraseCycler(isActive, isWaiting),
-      { initialProps: { isActive: false, isWaiting: false } },
-    );
-
-    // Activate
-    rerender({ isActive: true, isWaiting: false });
-    const firstActivePhrase = result.current;
-    expect(WITTY_LOADING_PHRASES).toContain(firstActivePhrase);
-    // With our mock, this should be the first phrase.
-    expect(firstActivePhrase).toBe(WITTY_LOADING_PHRASES[0]);
-
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS);
-    });
-
-    // Phrase should change to the second phrase.
-    expect(result.current).not.toBe(firstActivePhrase);
-    expect(result.current).toBe(WITTY_LOADING_PHRASES[1]);
-
-    // Set to inactive - should reset to the default initial phrase
-    rerender({ isActive: false, isWaiting: false });
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-
-    // Set back to active - should pick a random witty phrase (which our mock controls)
-    act(() => {
-      rerender({ isActive: true, isWaiting: false });
-    });
-    // The random mock will now return 0, so it should be the first phrase again.
-    expect(result.current).toBe(WITTY_LOADING_PHRASES[0]);
-  });
-
-  it('should clear phrase interval on unmount when active', () => {
-    const { unmount } = renderHook(() => usePhraseCycler(true, false));
-    const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
-    unmount();
-    expect(clearIntervalSpy).toHaveBeenCalledOnce();
-  });
-
-  it('should use custom phrases when provided', () => {
-    const customPhrases = ['Custom Phrase 1', 'Custom Phrase 2'];
-    let callCount = 0;
-    const randomMock = vi.spyOn(Math, 'random').mockImplementation(() => {
-      const val = callCount % 2;
-      callCount++;
-      return val / customPhrases.length;
-    });
-
-    const { result, rerender } = renderHook(
-      ({ isActive, isWaiting, customPhrases: phrases }) =>
-        usePhraseCycler(isActive, isWaiting, phrases),
-      {
-        initialProps: {
-          isActive: true,
-          isWaiting: false,
-          customPhrases,
-        },
-      },
-    );
-
-    expect(result.current).toBe(customPhrases[0]);
-
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS);
-    });
-
-    expect(result.current).toBe(customPhrases[1]);
-
-    // Test fallback to default phrases.
-    randomMock.mockRestore();
-    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-
-    rerender({ isActive: true, isWaiting: false, customPhrases: [] });
-
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-  });
-
-  it('should fall back to witty phrases if custom phrases are an empty array', () => {
-    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result } = renderHook(
-      ({ isActive, isWaiting, customPhrases: phrases }) =>
-        usePhraseCycler(isActive, isWaiting, phrases),
-      {
-        initialProps: {
-          isActive: true,
-          isWaiting: false,
-          customPhrases: [],
-        },
-      },
-    );
-
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-  });
-
-  it('should reset to a witty phrase when transitioning from waiting to active', () => {
-    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
-    const { result, rerender } = renderHook(
-      ({ isActive, isWaiting }) => usePhraseCycler(isActive, isWaiting),
-      { initialProps: { isActive: true, isWaiting: false } },
-    );
-
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-
-    // Cycle to a different phrase (potentially)
-    act(() => {
-      vi.advanceTimersByTime(PHRASE_CHANGE_INTERVAL_MS);
-    });
-    if (WITTY_LOADING_PHRASES.length > 1) {
-      // This check is probabilistic with random selection
-    }
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-
-    // Go to waiting state
-    rerender({ isActive: false, isWaiting: true });
-    expect(result.current).toBe('Waiting for user confirmation...');
-
-    // Go back to active cycling - should pick a random witty phrase
-    rerender({ isActive: true, isWaiting: false });
-    expect(WITTY_LOADING_PHRASES).toContain(result.current);
-  });
-});
diff --git a/packages/cli/src/ui/hooks/usePhraseCycler.test.tsx b/packages/cli/src/ui/hooks/usePhraseCycler.test.tsx
new file mode 100644
index 0000000000..3e83b97536
--- /dev/null
+++ b/packages/cli/src/ui/hooks/usePhraseCycler.test.tsx
@@ -0,0 +1,216 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { act } from 'react';
+import { render } from 'ink-testing-library';
+import { Text } from 'ink';
+import {
+  usePhraseCycler,
+  WITTY_LOADING_PHRASES,
+  PHRASE_CHANGE_INTERVAL_MS,
+} from './usePhraseCycler.js';
+
+// Test component to consume the hook
+const TestComponent = ({
+  isActive,
+  isWaiting,
+  customPhrases,
+}: {
+  isActive: boolean;
+  isWaiting: boolean;
+  customPhrases?: string[];
+}) => {
+  const phrase = usePhraseCycler(isActive, isWaiting, customPhrases);
+  return <Text>{phrase}</Text>;
+};
+
+describe('usePhraseCycler', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('should initialize with a witty phrase when not active and not waiting', () => {
+    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
+    const { lastFrame } = render(
+      <TestComponent isActive={false} isWaiting={false} />,
+    );
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+  });
+
+  it('should show "Waiting for user confirmation..." when isWaiting is true', async () => {
+    const { lastFrame, rerender } = render(
+      <TestComponent isActive={true} isWaiting={false} />,
+    );
+    rerender(<TestComponent isActive={true} isWaiting={true} />);
+    await vi.advanceTimersByTimeAsync(0);
+    expect(lastFrame()).toBe('Waiting for user confirmation...');
+  });
+
+  it('should not cycle phrases if isActive is false and not waiting', async () => {
+    const { lastFrame } = render(
+      <TestComponent isActive={false} isWaiting={false} />,
+    );
+    const initialPhrase = lastFrame();
+    await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS * 2);
+    expect(lastFrame()).toBe(initialPhrase);
+  });
+
+  it('should cycle through witty phrases when isActive is true and not waiting', async () => {
+    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
+    const { lastFrame } = render(
+      <TestComponent isActive={true} isWaiting={false} />,
+    );
+    // Initial phrase should be one of the witty phrases
+    await vi.advanceTimersByTimeAsync(0);
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+
+    await act(async () => {
+      await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS + 100);
+    });
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+
+    await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS);
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+  });
+
+  it('should reset to a phrase when isActive becomes true after being false', async () => {
+    const customPhrases = ['Phrase A', 'Phrase B'];
+    let callCount = 0;
+    vi.spyOn(Math, 'random').mockImplementation(() => {
+      // For custom phrases, only 1 Math.random call is made per update.
+      // 0 -> index 0 ('Phrase A')
+      // 0.99 -> index 1 ('Phrase B')
+      const val = callCount % 2 === 0 ? 0 : 0.99;
+      callCount++;
+      return val;
+    });
+
+    const { lastFrame, rerender } = render(
+      <TestComponent
+        isActive={false}
+        isWaiting={false}
+        customPhrases={customPhrases}
+      />,
+    );
+
+    // Activate -> callCount 0 -> returns 0 -> 'Phrase A'
+    rerender(
+      <TestComponent
+        isActive={true}
+        isWaiting={false}
+        customPhrases={customPhrases}
+      />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+    expect(lastFrame()).toBe('Phrase A');
+
+    // Interval -> callCount 1 -> returns 0.99 -> 'Phrase B'
+    await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS);
+    expect(lastFrame()).toBe('Phrase B');
+
+    // Deactivate -> resets to customPhrases[0] -> 'Phrase A'
+    rerender(
+      <TestComponent
+        isActive={false}
+        isWaiting={false}
+        customPhrases={customPhrases}
+      />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+    expect(lastFrame()).toBe('Phrase A');
+
+    // Activate again -> callCount 2 -> returns 0 -> 'Phrase A'
+    rerender(
+      <TestComponent
+        isActive={true}
+        isWaiting={false}
+        customPhrases={customPhrases}
+      />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+    expect(lastFrame()).toBe('Phrase A');
+  });
+
+  it('should clear phrase interval on unmount when active', () => {
+    const { unmount } = render(
+      <TestComponent isActive={true} isWaiting={false} />,
+    );
+    const clearIntervalSpy = vi.spyOn(global, 'clearInterval');
+    unmount();
+    expect(clearIntervalSpy).toHaveBeenCalledOnce();
+  });
+
+  it('should use custom phrases when provided', async () => {
+    const customPhrases = ['Custom Phrase 1', 'Custom Phrase 2'];
+    const randomMock = vi.spyOn(Math, 'random');
+    randomMock.mockReturnValue(0);
+
+    const { lastFrame, rerender } = render(
+      <TestComponent
+        isActive={true}
+        isWaiting={false}
+        customPhrases={customPhrases}
+      />,
+    );
+
+    expect(lastFrame()).toBe('Custom Phrase 1');
+
+    randomMock.mockReturnValue(0.99);
+    await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS + 100);
+
+    expect(lastFrame()).toBe('Custom Phrase 2');
+
+    // Test fallback to default phrases.
+    randomMock.mockRestore();
+    vi.spyOn(Math, 'random').mockReturnValue(0.5); // Always witty
+
+    rerender(
+      <TestComponent isActive={true} isWaiting={false} customPhrases={[]} />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+  });
+
+  it('should fall back to witty phrases if custom phrases are an empty array', async () => {
+    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
+    const { lastFrame } = render(
+      <TestComponent isActive={true} isWaiting={false} customPhrases={[]} />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+  });
+
+  it('should reset to a witty phrase when transitioning from waiting to active', async () => {
+    vi.spyOn(Math, 'random').mockImplementation(() => 0.5); // Always witty
+    const { lastFrame, rerender } = render(
+      <TestComponent isActive={true} isWaiting={false} />,
+    );
+    await vi.advanceTimersByTimeAsync(0);
+
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+
+    // Cycle to a different phrase (potentially)
+    await vi.advanceTimersByTimeAsync(PHRASE_CHANGE_INTERVAL_MS);
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+
+    // Go to waiting state
+    rerender(<TestComponent isActive={false} isWaiting={true} />);
+    await vi.advanceTimersByTimeAsync(0);
+    expect(lastFrame()).toBe('Waiting for user confirmation...');
+
+    // Go back to active cycling - should pick a random witty phrase
+    rerender(<TestComponent isActive={true} isWaiting={false} />);
+    await vi.advanceTimersByTimeAsync(0);
+    expect(WITTY_LOADING_PHRASES).toContain(lastFrame());
+  });
+});
diff --git a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
index e3a86009dd..edadbbacfc 100644
--- a/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
+++ b/packages/cli/src/ui/hooks/useQuotaAndFallback.test.ts
@@ -4,8 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import {
   vi,
   describe,
@@ -15,7 +13,8 @@ import {
   afterEach,
   type Mock,
 } from 'vitest';
-import { act, renderHook } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import {
   type Config,
   type FallbackModelHandler,
diff --git a/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts b/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
index ac38b5d1e4..84d948b64d 100644
--- a/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useReactToolScheduler.test.ts
@@ -4,11 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { CoreToolScheduler } from '@google/gemini-cli-core';
 import type { Config } from '@google/gemini-cli-core';
-import { renderHook } from '@testing-library/react';
+import { renderHook } from '../../test-utils/render.js';
 import { vi, describe, it, expect, beforeEach } from 'vitest';
 import { useReactToolScheduler } from './useReactToolScheduler.js';
 
diff --git a/packages/cli/src/ui/hooks/useReverseSearchCompletion.test.tsx b/packages/cli/src/ui/hooks/useReverseSearchCompletion.test.tsx
index 373696ce4c..0b41c69441 100644
--- a/packages/cli/src/ui/hooks/useReverseSearchCompletion.test.tsx
+++ b/packages/cli/src/ui/hooks/useReverseSearchCompletion.test.tsx
@@ -4,10 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useReverseSearchCompletion } from './useReverseSearchCompletion.js';
 import { useTextBuffer } from '../components/shared/text-buffer.js';
 
diff --git a/packages/cli/src/ui/hooks/useShellHistory.test.ts b/packages/cli/src/ui/hooks/useShellHistory.test.ts
index 865bc7cf3f..a682d0acb7 100644
--- a/packages/cli/src/ui/hooks/useShellHistory.test.ts
+++ b/packages/cli/src/ui/hooks/useShellHistory.test.ts
@@ -4,13 +4,12 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
-import { renderHook, act, waitFor } from '@testing-library/react';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import { useShellHistory } from './useShellHistory.js';
 import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
-import * as os from 'node:os';
 import * as crypto from 'node:crypto';
 import { GEMINI_DIR } from '@google/gemini-cli-core';
 
@@ -19,7 +18,14 @@ vi.mock('node:fs/promises', () => ({
   writeFile: vi.fn(),
   mkdir: vi.fn(),
 }));
-vi.mock('node:os');
+const mockHomedir = vi.hoisted(() => vi.fn(() => '/tmp/mock-home'));
+vi.mock('node:os', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:os')>();
+  return {
+    ...actual,
+    homedir: mockHomedir,
+  };
+});
 vi.mock('node:crypto');
 vi.mock('node:fs', async (importOriginal) => {
   const actualFs = await importOriginal<typeof import('node:fs')>();
@@ -33,6 +39,9 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
     await importOriginal<typeof import('@google/gemini-cli-core')>();
   const path = await import('node:path');
   class Storage {
+    static getGlobalSettingsPath(): string {
+      return '/test/home/.gemini/settings.json';
+    }
     getProjectTempDir(): string {
       return path.join('/test/home/', actual.GEMINI_DIR, 'tmp', 'mocked_hash');
     }
@@ -68,7 +77,6 @@ const MOCKED_HISTORY_FILE = path.join(MOCKED_HISTORY_DIR, 'shell_history');
 
 describe('useShellHistory', () => {
   const mockedFs = vi.mocked(fs);
-  const mockedOs = vi.mocked(os);
   const mockedCrypto = vi.mocked(crypto);
 
   beforeEach(() => {
@@ -77,7 +85,7 @@ describe('useShellHistory', () => {
     mockedFs.readFile.mockResolvedValue('');
     mockedFs.writeFile.mockResolvedValue(undefined);
     mockedFs.mkdir.mockResolvedValue(undefined);
-    mockedOs.homedir.mockReturnValue(MOCKED_HOME_DIR);
+    mockHomedir.mockReturnValue(MOCKED_HOME_DIR);
 
     const hashMock = {
       update: vi.fn().mockReturnThis(),
@@ -90,7 +98,7 @@ describe('useShellHistory', () => {
     mockedFs.readFile.mockResolvedValue('cmd1\ncmd2');
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockedFs.readFile).toHaveBeenCalledWith(
         MOCKED_HISTORY_FILE,
         'utf-8',
@@ -113,7 +121,7 @@ describe('useShellHistory', () => {
 
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockedFs.readFile).toHaveBeenCalled();
     });
 
@@ -128,13 +136,15 @@ describe('useShellHistory', () => {
   it('should add a command and write to the history file', async () => {
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
 
-    await waitFor(() => expect(mockedFs.readFile).toHaveBeenCalled());
+    await vi.waitFor(() => {
+      expect(mockedFs.readFile).toHaveBeenCalled();
+    });
 
     act(() => {
       result.current.addCommandToHistory('new_command');
     });
 
-    await waitFor(() => {
+    await vi.waitFor(() => {
       expect(mockedFs.mkdir).toHaveBeenCalledWith(MOCKED_HISTORY_DIR, {
         recursive: true,
       });
@@ -156,7 +166,9 @@ describe('useShellHistory', () => {
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
 
     // Wait for history to be loaded: ['cmd3', 'cmd2', 'cmd1']
-    await waitFor(() => expect(mockedFs.readFile).toHaveBeenCalled());
+    await vi.waitFor(() => {
+      expect(mockedFs.readFile).toHaveBeenCalled();
+    });
 
     let command: string | null = null;
 
@@ -200,7 +212,10 @@ describe('useShellHistory', () => {
 
   it('should not add empty or whitespace-only commands to history', async () => {
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
-    await waitFor(() => expect(mockedFs.readFile).toHaveBeenCalled());
+
+    await vi.waitFor(() => {
+      expect(mockedFs.readFile).toHaveBeenCalled();
+    });
 
     act(() => {
       result.current.addCommandToHistory('   ');
@@ -214,14 +229,18 @@ describe('useShellHistory', () => {
     mockedFs.readFile.mockResolvedValue(oldCommands.join('\n'));
 
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
-    await waitFor(() => expect(mockedFs.readFile).toHaveBeenCalled());
+    await vi.waitFor(() => {
+      expect(mockedFs.readFile).toHaveBeenCalled();
+    });
 
     act(() => {
       result.current.addCommandToHistory('new_cmd');
     });
 
     // Wait for the async write to happen and then inspect the arguments.
-    await waitFor(() => expect(mockedFs.writeFile).toHaveBeenCalled());
+    await vi.waitFor(() => {
+      expect(mockedFs.writeFile).toHaveBeenCalled();
+    });
 
     // The hook stores history newest-first.
     // Initial state: ['old_cmd_119', ..., 'old_cmd_0']
@@ -240,15 +259,20 @@ describe('useShellHistory', () => {
     const { result } = renderHook(() => useShellHistory(MOCKED_PROJECT_ROOT));
 
     // Initial state: ['cmd3', 'cmd2', 'cmd1']
-    await waitFor(() => expect(mockedFs.readFile).toHaveBeenCalled());
+    await vi.waitFor(() => {
+      expect(mockedFs.readFile).toHaveBeenCalled();
+    });
 
     act(() => {
       result.current.addCommandToHistory('cmd1');
     });
 
     // After re-adding 'cmd1': ['cmd1', 'cmd3', 'cmd2']
-    // Written to file (reversed): ['cmd2', 'cmd3', 'cmd1']
-    await waitFor(() => expect(mockedFs.writeFile).toHaveBeenCalled());
+    expect(mockedFs.readFile).toHaveBeenCalled();
+
+    await vi.waitFor(() => {
+      expect(mockedFs.writeFile).toHaveBeenCalled();
+    });
 
     const writtenContent = mockedFs.writeFile.mock.calls[0][1] as string;
     const writtenLines = writtenContent.split('\n');
diff --git a/packages/cli/src/ui/hooks/useSlashCompletion.test.ts b/packages/cli/src/ui/hooks/useSlashCompletion.test.ts
index 371af516e7..e569e783be 100644
--- a/packages/cli/src/ui/hooks/useSlashCompletion.test.ts
+++ b/packages/cli/src/ui/hooks/useSlashCompletion.test.ts
@@ -4,10 +4,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 import { describe, it, expect, vi } from 'vitest';
-import { renderHook, waitFor } from '@testing-library/react';
+import { renderHook } from '../../test-utils/render.js';
 import { useSlashCompletion } from './useSlashCompletion.js';
 import type { CommandContext, SlashCommand } from '../commands/types.js';
 import { CommandKind } from '../commands/types.js';
@@ -205,10 +203,12 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      expect(result.current.suggestions.length).toBe(slashCommands.length);
-      expect(result.current.suggestions.map((s) => s.label)).toEqual(
-        expect.arrayContaining(['help', 'clear', 'memory', 'chat', 'stats']),
-      );
+      await vi.waitFor(() => {
+        expect(result.current.suggestions.length).toBe(slashCommands.length);
+        expect(result.current.suggestions.map((s) => s.label)).toEqual(
+          expect.arrayContaining(['help', 'clear', 'memory', 'chat', 'stats']),
+        );
+      });
     });
 
     it('should filter commands based on partial input', async () => {
@@ -224,7 +224,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           {
             label: 'memory',
@@ -253,7 +253,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           {
             label: 'stats',
@@ -369,8 +369,10 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      expect(result.current.suggestions.length).toBe(1);
-      expect(result.current.suggestions[0].label).toBe('visible');
+      await vi.waitFor(() => {
+        expect(result.current.suggestions.length).toBe(1);
+        expect(result.current.suggestions[0].label).toBe('visible');
+      });
     });
   });
 
@@ -390,29 +392,31 @@ describe('useSlashCompletion', () => {
       const { result } = renderHook(() =>
         useTestHarnessForSlashCompletion(
           true,
-          '/memory',
+          '/memory ',
           slashCommands,
           mockCommandContext,
         ),
       );
 
-      expect(result.current.suggestions).toHaveLength(2);
-      expect(result.current.suggestions).toEqual(
-        expect.arrayContaining([
-          {
-            label: 'show',
-            value: 'show',
-            description: 'Show memory',
-            commandKind: CommandKind.BUILT_IN,
-          },
-          {
-            label: 'add',
-            value: 'add',
-            description: 'Add to memory',
-            commandKind: CommandKind.BUILT_IN,
-          },
-        ]),
-      );
+      await vi.waitFor(() => {
+        expect(result.current.suggestions).toHaveLength(2);
+        expect(result.current.suggestions).toEqual(
+          expect.arrayContaining([
+            {
+              label: 'show',
+              value: 'show',
+              description: 'Show memory',
+              commandKind: CommandKind.BUILT_IN,
+            },
+            {
+              label: 'add',
+              value: 'add',
+              description: 'Add to memory',
+              commandKind: CommandKind.BUILT_IN,
+            },
+          ]),
+        );
+      });
     });
 
     it('should suggest all sub-commands when the query ends with the parent command and a space', async () => {
@@ -435,23 +439,25 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      expect(result.current.suggestions).toHaveLength(2);
-      expect(result.current.suggestions).toEqual(
-        expect.arrayContaining([
-          {
-            label: 'show',
-            value: 'show',
-            description: 'Show memory',
-            commandKind: CommandKind.BUILT_IN,
-          },
-          {
-            label: 'add',
-            value: 'add',
-            description: 'Add to memory',
-            commandKind: CommandKind.BUILT_IN,
-          },
-        ]),
-      );
+      await vi.waitFor(() => {
+        expect(result.current.suggestions).toHaveLength(2);
+        expect(result.current.suggestions).toEqual(
+          expect.arrayContaining([
+            {
+              label: 'show',
+              value: 'show',
+              description: 'Show memory',
+              commandKind: CommandKind.BUILT_IN,
+            },
+            {
+              label: 'add',
+              value: 'add',
+              description: 'Add to memory',
+              commandKind: CommandKind.BUILT_IN,
+            },
+          ]),
+        );
+      });
     });
 
     it('should filter sub-commands by prefix', async () => {
@@ -474,7 +480,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           {
             label: 'add',
@@ -547,7 +553,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockCompletionFn).toHaveBeenCalledWith(
           expect.objectContaining({
             invocation: {
@@ -560,7 +566,7 @@ describe('useSlashCompletion', () => {
         );
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           { label: 'my-chat-tag-1', value: 'my-chat-tag-1' },
           { label: 'my-chat-tag-2', value: 'my-chat-tag-2' },
@@ -596,7 +602,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(mockCompletionFn).toHaveBeenCalledWith(
           expect.objectContaining({
             invocation: {
@@ -609,7 +615,7 @@ describe('useSlashCompletion', () => {
         );
       });
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toHaveLength(3);
       });
     });
@@ -639,9 +645,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
-        expect(result.current.suggestions).toHaveLength(0);
-      });
+      expect(result.current.suggestions).toHaveLength(0);
     });
   });
 
@@ -714,7 +718,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           {
             label: 'summarize',
@@ -795,7 +799,7 @@ describe('useSlashCompletion', () => {
         ),
       );
 
-      await waitFor(() => {
+      await vi.waitFor(() => {
         expect(result.current.suggestions).toEqual([
           {
             label: 'custom-script',
diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
index 11d1b7e7d8..59896ea487 100644
--- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
@@ -4,12 +4,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-/** @vitest-environment jsdom */
-
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import type { Mock } from 'vitest';
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { renderHook, act } from '@testing-library/react';
+import { act } from 'react';
+import { renderHook } from '../../test-utils/render.js';
 import {
   useReactToolScheduler,
   mapToDisplay,
@@ -38,7 +37,14 @@ import { ToolCallStatus } from '../types.js';
 
 // Mocks
 vi.mock('@google/gemini-cli-core', async () => {
-  const actual = await vi.importActual('@google/gemini-cli-core');
+  const actual = await vi.importActual<any>('@google/gemini-cli-core');
+  // Patch CoreToolScheduler to have cancelAll if it's missing in the test environment
+  if (
+    actual.CoreToolScheduler &&
+    !actual.CoreToolScheduler.prototype.cancelAll
+  ) {
+    actual.CoreToolScheduler.prototype.cancelAll = vi.fn();
+  }
   return {
     ...actual,
     ToolRegistry: vi.fn(),
@@ -153,13 +159,13 @@ describe('useReactToolScheduler in YOLO Mode', () => {
     });
 
     await act(async () => {
-      await vi.runAllTimersAsync(); // Process validation
+      await vi.advanceTimersByTimeAsync(0); // Process validation
     });
     await act(async () => {
-      await vi.runAllTimersAsync(); // Process scheduling
+      await vi.advanceTimersByTimeAsync(0); // Process scheduling
     });
     await act(async () => {
-      await vi.runAllTimersAsync(); // Process execution
+      await vi.advanceTimersByTimeAsync(0); // Process execution
     });
 
     // Check that execute WAS called
@@ -270,13 +276,13 @@ describe('useReactToolScheduler', () => {
     });
 
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(mockTool.execute).toHaveBeenCalledWith(request.args);
@@ -341,13 +347,13 @@ describe('useReactToolScheduler', () => {
 
     // Let the new call finish.
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     expect(onComplete).toHaveBeenCalled();
   });
@@ -375,11 +381,11 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     }); // validation
     await act(async () => {
-      await vi.runAllTimersAsync();
-    }); // scheduling
+      await vi.advanceTimersByTimeAsync(0); // Process scheduling
+    });
 
     // At this point, the tool is 'executing' and waiting on the promise.
     expect(result.current[0][0].status).toBe('executing');
@@ -390,7 +396,7 @@ describe('useReactToolScheduler', () => {
     });
 
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(onComplete).toHaveBeenCalledWith([
@@ -423,10 +429,10 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(completedToolCalls).toHaveLength(1);
@@ -462,10 +468,10 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(completedToolCalls).toHaveLength(1);
@@ -497,13 +503,13 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(completedToolCalls).toHaveLength(1);
@@ -532,7 +538,7 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     const waitingCall = result.current[0][0] as any;
@@ -545,13 +551,13 @@ describe('useReactToolScheduler', () => {
     });
 
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(mockOnUserConfirmForToolConfirmation).toHaveBeenCalledWith(
@@ -590,7 +596,7 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     const waitingCall = result.current[0][0] as any;
@@ -602,10 +608,10 @@ describe('useReactToolScheduler', () => {
       await capturedOnConfirmForTest?.(ToolConfirmationOutcome.Cancel);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(mockOnUserConfirmForToolConfirmation).toHaveBeenCalledWith(
@@ -665,7 +671,7 @@ describe('useReactToolScheduler', () => {
       schedule(request, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(liveUpdateFn).toBeDefined();
@@ -675,14 +681,14 @@ describe('useReactToolScheduler', () => {
       liveUpdateFn?.('Live output 1');
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     await act(async () => {
       liveUpdateFn?.('Live output 2');
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     act(() => {
@@ -692,10 +698,10 @@ describe('useReactToolScheduler', () => {
       } as ToolResult);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(onComplete).toHaveBeenCalledWith([
@@ -753,16 +759,16 @@ describe('useReactToolScheduler', () => {
       schedule(requests, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     expect(onComplete).toHaveBeenCalledTimes(1);
@@ -845,16 +851,16 @@ describe('useReactToolScheduler', () => {
       schedule(request1, new AbortController().signal);
     });
     await act(async () => {
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
     });
 
     schedule(request2, new AbortController().signal);
 
     await act(async () => {
       await vi.advanceTimersByTimeAsync(50);
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
       await act(async () => {
-        await vi.runAllTimersAsync();
+        await vi.advanceTimersByTimeAsync(0);
       });
     });
     expect(onComplete).toHaveBeenCalledWith([
@@ -867,9 +873,9 @@ describe('useReactToolScheduler', () => {
     // Wait for request2 to complete
     await act(async () => {
       await vi.advanceTimersByTimeAsync(50);
-      await vi.runAllTimersAsync();
+      await vi.advanceTimersByTimeAsync(0);
       await act(async () => {
-        await vi.runAllTimersAsync();
+        await vi.advanceTimersByTimeAsync(0);
       });
     });
     expect(onComplete).toHaveBeenCalledWith([

From c2d60d61ac7e2ad71603fd35d1d67f379cccfe6f Mon Sep 17 00:00:00 2001
From: JAYADITYA <96861162+JayadityaGit@users.noreply.github.com>
Date: Tue, 28 Oct 2025 23:46:26 +0530
Subject: [PATCH 59/73] feat: Add explore subcommand for extension (#11846)

Co-authored-by: christine betts <chrstn@uw.edu>
---
 .../src/ui/commands/extensionsCommand.test.ts | 98 ++++++++++++++++++-
 .../cli/src/ui/commands/extensionsCommand.ts  | 60 +++++++++++-
 2 files changed, 156 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/ui/commands/extensionsCommand.test.ts b/packages/cli/src/ui/commands/extensionsCommand.test.ts
index 562744a0de..c10b3896d1 100644
--- a/packages/cli/src/ui/commands/extensionsCommand.test.ts
+++ b/packages/cli/src/ui/commands/extensionsCommand.test.ts
@@ -9,9 +9,14 @@ import { createMockCommandContext } from '../../test-utils/mockCommandContext.js
 import { MessageType } from '../types.js';
 import { extensionsCommand } from './extensionsCommand.js';
 import { type CommandContext } from './types.js';
-import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { type ExtensionUpdateAction } from '../state/extensions.js';
 
+import open from 'open';
+vi.mock('open', () => ({
+  default: vi.fn(),
+}));
+
 vi.mock('../../config/extensions/update.js', () => ({
   updateExtension: vi.fn(),
   checkForAllExtensionUpdates: vi.fn(),
@@ -26,6 +31,7 @@ describe('extensionsCommand', () => {
   beforeEach(() => {
     vi.resetAllMocks();
     mockGetExtensions.mockReturnValue([]);
+    vi.mocked(open).mockClear();
     mockContext = createMockCommandContext({
       services: {
         config: {
@@ -39,6 +45,11 @@ describe('extensionsCommand', () => {
     });
   });
 
+  afterEach(() => {
+    // Restore any stubbed environment variables, similar to docsCommand.test.ts
+    vi.unstubAllEnvs();
+  });
+
   describe('list', () => {
     it('should add an EXTENSIONS_LIST item to the UI', async () => {
       if (!extensionsCommand.action) throw new Error('Action not defined');
@@ -302,4 +313,89 @@ describe('extensionsCommand', () => {
       });
     });
   });
+
+  describe('explore', () => {
+    const exploreAction = extensionsCommand.subCommands?.find(
+      (cmd) => cmd.name === 'explore',
+    )?.action;
+
+    if (!exploreAction) {
+      throw new Error('Explore action not found');
+    }
+
+    it("should add an info message and call 'open' in a non-sandbox environment", async () => {
+      // Ensure no special environment variables that would affect behavior
+      vi.stubEnv('NODE_ENV', '');
+      vi.stubEnv('SANDBOX', '');
+
+      await exploreAction(mockContext, '');
+
+      const extensionsUrl = 'https://geminicli.com/extensions/';
+      expect(mockContext.ui.addItem).toHaveBeenCalledWith(
+        {
+          type: MessageType.INFO,
+          text: `Opening extensions page in your browser: ${extensionsUrl}`,
+        },
+        expect.any(Number),
+      );
+
+      expect(open).toHaveBeenCalledWith(extensionsUrl);
+    });
+
+    it('should only add an info message in a sandbox environment', async () => {
+      // Simulate a sandbox environment
+      vi.stubEnv('NODE_ENV', '');
+      vi.stubEnv('SANDBOX', 'gemini-sandbox');
+      const extensionsUrl = 'https://geminicli.com/extensions/';
+
+      await exploreAction(mockContext, '');
+
+      expect(mockContext.ui.addItem).toHaveBeenCalledWith(
+        {
+          type: MessageType.INFO,
+          text: `View available extensions at ${extensionsUrl}`,
+        },
+        expect.any(Number),
+      );
+
+      // Ensure 'open' was not called in the sandbox
+      expect(open).not.toHaveBeenCalled();
+    });
+
+    it('should add an info message and not call open in NODE_ENV test environment', async () => {
+      vi.stubEnv('NODE_ENV', 'test');
+      vi.stubEnv('SANDBOX', '');
+      const extensionsUrl = 'https://geminicli.com/extensions/';
+
+      await exploreAction(mockContext, '');
+
+      expect(mockContext.ui.addItem).toHaveBeenCalledWith(
+        {
+          type: MessageType.INFO,
+          text: `Would open extensions page in your browser: ${extensionsUrl} (skipped in test environment)`,
+        },
+        expect.any(Number),
+      );
+
+      // Ensure 'open' was not called in test environment
+      expect(open).not.toHaveBeenCalled();
+    });
+
+    it('should handle errors when opening the browser', async () => {
+      vi.stubEnv('NODE_ENV', '');
+      const extensionsUrl = 'https://geminicli.com/extensions/';
+      const errorMessage = 'Failed to open browser';
+      vi.mocked(open).mockRejectedValue(new Error(errorMessage));
+
+      await exploreAction(mockContext, '');
+
+      expect(mockContext.ui.addItem).toHaveBeenCalledWith(
+        {
+          type: MessageType.ERROR,
+          text: `Failed to open browser. Check out the extensions gallery at ${extensionsUrl}`,
+        },
+        expect.any(Number),
+      );
+    });
+  });
 });
diff --git a/packages/cli/src/ui/commands/extensionsCommand.ts b/packages/cli/src/ui/commands/extensionsCommand.ts
index 612de23cc6..45ea3e47b6 100644
--- a/packages/cli/src/ui/commands/extensionsCommand.ts
+++ b/packages/cli/src/ui/commands/extensionsCommand.ts
@@ -13,6 +13,8 @@ import {
   type SlashCommand,
   CommandKind,
 } from './types.js';
+import open from 'open';
+import process from 'node:process';
 
 async function listAction(context: CommandContext) {
   const historyItem: HistoryItemExtensionsList = {
@@ -112,6 +114,51 @@ function updateAction(context: CommandContext, args: string): Promise<void> {
   return updateComplete.then((_) => {});
 }
 
+async function exploreAction(context: CommandContext) {
+  const extensionsUrl = 'https://geminicli.com/extensions/';
+
+  // Only check for NODE_ENV for explicit test mode, not for unit test framework
+  if (process.env['NODE_ENV'] === 'test') {
+    context.ui.addItem(
+      {
+        type: MessageType.INFO,
+        text: `Would open extensions page in your browser: ${extensionsUrl} (skipped in test environment)`,
+      },
+      Date.now(),
+    );
+  } else if (
+    process.env['SANDBOX'] &&
+    process.env['SANDBOX'] !== 'sandbox-exec'
+  ) {
+    context.ui.addItem(
+      {
+        type: MessageType.INFO,
+        text: `View available extensions at ${extensionsUrl}`,
+      },
+      Date.now(),
+    );
+  } else {
+    context.ui.addItem(
+      {
+        type: MessageType.INFO,
+        text: `Opening extensions page in your browser: ${extensionsUrl}`,
+      },
+      Date.now(),
+    );
+    try {
+      await open(extensionsUrl);
+    } catch (_error) {
+      context.ui.addItem(
+        {
+          type: MessageType.ERROR,
+          text: `Failed to open browser. Check out the extensions gallery at ${extensionsUrl}`,
+        },
+        Date.now(),
+      );
+    }
+  }
+}
+
 const listExtensionsCommand: SlashCommand = {
   name: 'list',
   description: 'List active extensions',
@@ -141,11 +188,22 @@ const updateExtensionsCommand: SlashCommand = {
   },
 };
 
+const exploreExtensionsCommand: SlashCommand = {
+  name: 'explore',
+  description: 'Open extensions page in your browser',
+  kind: CommandKind.BUILT_IN,
+  action: exploreAction,
+};
+
 export const extensionsCommand: SlashCommand = {
   name: 'extensions',
   description: 'Manage extensions',
   kind: CommandKind.BUILT_IN,
-  subCommands: [listExtensionsCommand, updateExtensionsCommand],
+  subCommands: [
+    listExtensionsCommand,
+    updateExtensionsCommand,
+    exploreExtensionsCommand,
+  ],
   action: (context, args) =>
     // Default to list if no subcommand is provided
     listExtensionsCommand.action!(context, args),

From 7a238bd938ac76aa84af3b81a31181ae566db42f Mon Sep 17 00:00:00 2001
From: shishu314 <shishu_1998@yahoo.com>
Date: Tue, 28 Oct 2025 14:30:05 -0400
Subject: [PATCH 60/73] fix(infra) - Continue workflow when merge queue skipper
 fail (#10509)

Co-authored-by: Shi Shu <shii@google.com>
---
 .github/workflows/e2e.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 40feb87df9..19787b675b 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -30,6 +30,7 @@ jobs:
   merge_queue_skipper:
     name: 'Merge Queue Skipper'
     runs-on: 'gemini-cli-ubuntu-16-core'
+    permissions: 'read-all'
     outputs:
       skip: '${{ steps.merge-queue-e2e-skipper.outputs.skip-check }}'
     steps:
@@ -37,12 +38,13 @@ jobs:
         uses: 'cariad-tech/merge-queue-ci-skipper@1032489e59437862c90a08a2c92809c903883772' # ratchet:cariad-tech/merge-queue-ci-skipper@main
         with:
           secret: '${{ secrets.GITHUB_TOKEN }}'
+    continue-on-error: true
 
   e2e_linux:
     name: 'E2E Test (Linux) - ${{ matrix.sandbox }}'
     needs: 'merge_queue_skipper'
     if: |
-      needs.merge_queue_skipper.outputs.skip == 'false' &&
+      (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip == 'false') &&
       (github.event_name == 'push' ||
       github.event_name == 'merge_group' ||
       github.event_name == 'workflow_dispatch' ||
@@ -104,7 +106,7 @@ jobs:
     name: 'E2E Test (macOS)'
     needs: 'merge_queue_skipper'
     if: |
-      needs.merge_queue_skipper.outputs.skip == 'false' &&
+      (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip == 'false') &&
       (github.event_name == 'push' ||
       github.event_name == 'merge_group' ||
       github.event_name == 'workflow_dispatch' ||
@@ -153,7 +155,7 @@ jobs:
     name: 'Slow E2E - Win'
     needs: 'merge_queue_skipper'
     if: |
-      needs.merge_queue_skipper.outputs.skip == 'false' &&
+      (needs.merge_queue_skipper.result !='success' || needs.merge_queue_skipper.outputs.skip == 'false') &&
       (github.event_name == 'push' ||
       github.event_name == 'merge_group' ||
       github.event_name == 'workflow_dispatch' ||

From 7e987113a21d3d15934fb9e76729c0d61e3663bd Mon Sep 17 00:00:00 2001
From: christine betts <chrstn@uw.edu>
Date: Tue, 28 Oct 2025 14:48:50 -0400
Subject: [PATCH 61/73] Add support for sensitive keychain-stored per-extension
 settings (#11953)

---
 docs/extensions/index.md                      |   4 +-
 .../cli/src/commands/extensions/disable.ts    |  11 +-
 .../cli/src/commands/extensions/enable.ts     |   4 +-
 .../cli/src/commands/extensions/install.ts    |   2 +-
 packages/cli/src/commands/extensions/link.ts  |   2 +-
 packages/cli/src/commands/extensions/list.ts  |   2 +-
 .../cli/src/commands/extensions/uninstall.ts  |   2 +-
 .../cli/src/commands/extensions/update.ts     |   2 +-
 packages/cli/src/commands/mcp/list.ts         |   2 +-
 packages/cli/src/config/config.ts             |   2 +-
 packages/cli/src/config/extension-manager.ts  |  38 ++-
 packages/cli/src/config/extension.test.ts     | 252 ++++++++++--------
 .../extensions/extensionSettings.test.ts      | 236 ++++++++++++++--
 .../config/extensions/extensionSettings.ts    | 140 ++++++++--
 .../cli/src/config/extensions/update.test.ts  |  73 +++--
 packages/cli/src/config/extensions/update.ts  |   2 +-
 packages/cli/src/config/settings.test.ts      |   4 +-
 .../src/ui/hooks/useExtensionUpdates.test.tsx |   5 +-
 packages/core/index.ts                        |   1 +
 .../keychain-token-storage.test.ts            |  49 ++++
 .../token-storage/keychain-token-storage.ts   |  78 +++++-
 packages/core/src/mcp/token-storage/types.ts  |   7 +
 22 files changed, 706 insertions(+), 212 deletions(-)

diff --git a/docs/extensions/index.md b/docs/extensions/index.md
index e07930dcf4..84d116cfe6 100644
--- a/docs/extensions/index.md
+++ b/docs/extensions/index.md
@@ -190,8 +190,8 @@ Each object in the array should have the following properties:
 - `description`: A description of the setting and what it's used for.
 - `envVar`: The name of the environment variable that the setting will be stored
   as.
-
-**Example**
+- `sensitive`: Optional boolean. If true, obfuscates the input the user provides
+  and stores the secret in keychain storage. **Example**
 
 ```json
 {
diff --git a/packages/cli/src/commands/extensions/disable.ts b/packages/cli/src/commands/extensions/disable.ts
index 40bed33f83..bb60087275 100644
--- a/packages/cli/src/commands/extensions/disable.ts
+++ b/packages/cli/src/commands/extensions/disable.ts
@@ -17,7 +17,7 @@ interface DisableArgs {
   scope?: string;
 }
 
-export function handleDisable(args: DisableArgs) {
+export async function handleDisable(args: DisableArgs) {
   const workspaceDir = process.cwd();
   const extensionManager = new ExtensionManager({
     workspaceDir,
@@ -25,13 +25,16 @@ export function handleDisable(args: DisableArgs) {
     requestSetting: promptForSetting,
     settings: loadSettings(workspaceDir).merged,
   });
-  extensionManager.loadExtensions();
+  await extensionManager.loadExtensions();
 
   try {
     if (args.scope?.toLowerCase() === 'workspace') {
-      extensionManager.disableExtension(args.name, SettingScope.Workspace);
+      await extensionManager.disableExtension(
+        args.name,
+        SettingScope.Workspace,
+      );
     } else {
-      extensionManager.disableExtension(args.name, SettingScope.User);
+      await extensionManager.disableExtension(args.name, SettingScope.User);
     }
     debugLogger.log(
       `Extension "${args.name}" successfully disabled for scope "${args.scope}".`,
diff --git a/packages/cli/src/commands/extensions/enable.ts b/packages/cli/src/commands/extensions/enable.ts
index 468353f6a1..0796830100 100644
--- a/packages/cli/src/commands/extensions/enable.ts
+++ b/packages/cli/src/commands/extensions/enable.ts
@@ -20,7 +20,7 @@ interface EnableArgs {
   scope?: string;
 }
 
-export function handleEnable(args: EnableArgs) {
+export async function handleEnable(args: EnableArgs) {
   const workingDir = process.cwd();
   const extensionManager = new ExtensionManager({
     workspaceDir: workingDir,
@@ -28,7 +28,7 @@ export function handleEnable(args: EnableArgs) {
     requestSetting: promptForSetting,
     settings: loadSettings(workingDir).merged,
   });
-  extensionManager.loadExtensions();
+  await extensionManager.loadExtensions();
 
   try {
     if (args.scope?.toLowerCase() === 'workspace') {
diff --git a/packages/cli/src/commands/extensions/install.ts b/packages/cli/src/commands/extensions/install.ts
index 95d2e17b7a..920cfe63a4 100644
--- a/packages/cli/src/commands/extensions/install.ts
+++ b/packages/cli/src/commands/extensions/install.ts
@@ -76,7 +76,7 @@ export async function handleInstall(args: InstallArgs) {
       requestSetting: promptForSetting,
       settings: loadSettings(workspaceDir).merged,
     });
-    extensionManager.loadExtensions();
+    await extensionManager.loadExtensions();
     const name =
       await extensionManager.installOrUpdateExtension(installMetadata);
     debugLogger.log(`Extension "${name}" installed successfully and enabled.`);
diff --git a/packages/cli/src/commands/extensions/link.ts b/packages/cli/src/commands/extensions/link.ts
index 69c18d8bbe..9bee299a5e 100644
--- a/packages/cli/src/commands/extensions/link.ts
+++ b/packages/cli/src/commands/extensions/link.ts
@@ -33,7 +33,7 @@ export async function handleLink(args: InstallArgs) {
       requestSetting: promptForSetting,
       settings: loadSettings(workspaceDir).merged,
     });
-    extensionManager.loadExtensions();
+    await extensionManager.loadExtensions();
     const extensionName =
       await extensionManager.installOrUpdateExtension(installMetadata);
     debugLogger.log(
diff --git a/packages/cli/src/commands/extensions/list.ts b/packages/cli/src/commands/extensions/list.ts
index a0b31e45f3..4596f95cd9 100644
--- a/packages/cli/src/commands/extensions/list.ts
+++ b/packages/cli/src/commands/extensions/list.ts
@@ -21,7 +21,7 @@ export async function handleList() {
       requestSetting: promptForSetting,
       settings: loadSettings(workspaceDir).merged,
     });
-    const extensions = extensionManager.loadExtensions();
+    const extensions = await extensionManager.loadExtensions();
     if (extensions.length === 0) {
       debugLogger.log('No extensions installed.');
       return;
diff --git a/packages/cli/src/commands/extensions/uninstall.ts b/packages/cli/src/commands/extensions/uninstall.ts
index 91242fe3a1..c768c95164 100644
--- a/packages/cli/src/commands/extensions/uninstall.ts
+++ b/packages/cli/src/commands/extensions/uninstall.ts
@@ -25,7 +25,7 @@ export async function handleUninstall(args: UninstallArgs) {
       requestSetting: promptForSetting,
       settings: loadSettings(workspaceDir).merged,
     });
-    extensionManager.loadExtensions();
+    await extensionManager.loadExtensions();
     await extensionManager.uninstallExtension(args.name, false);
     debugLogger.log(`Extension "${args.name}" successfully uninstalled.`);
   } catch (error) {
diff --git a/packages/cli/src/commands/extensions/update.ts b/packages/cli/src/commands/extensions/update.ts
index b5c1620810..f3e78f2cca 100644
--- a/packages/cli/src/commands/extensions/update.ts
+++ b/packages/cli/src/commands/extensions/update.ts
@@ -37,7 +37,7 @@ export async function handleUpdate(args: UpdateArgs) {
     settings: loadSettings(workspaceDir).merged,
   });
 
-  const extensions = extensionManager.loadExtensions();
+  const extensions = await extensionManager.loadExtensions();
   if (args.name) {
     try {
       const extension = extensions.find(
diff --git a/packages/cli/src/commands/mcp/list.ts b/packages/cli/src/commands/mcp/list.ts
index 9e41964d17..9b5571d134 100644
--- a/packages/cli/src/commands/mcp/list.ts
+++ b/packages/cli/src/commands/mcp/list.ts
@@ -33,7 +33,7 @@ async function getMcpServersFromConfig(): Promise<
     requestConsent: requestConsentNonInteractive,
     requestSetting: promptForSetting,
   });
-  const extensions = extensionManager.loadExtensions();
+  const extensions = await extensionManager.loadExtensions();
   const mcpServers = { ...(settings.merged.mcpServers || {}) };
   for (const extension of extensions) {
     Object.entries(extension.mcpServers || {}).forEach(([key, server]) => {
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 2a102f78bc..9d9630634f 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -423,7 +423,7 @@ export async function loadCliConfig(
     workspaceDir: cwd,
     enabledExtensionOverrides: argv.extensions,
   });
-  extensionManager.loadExtensions();
+  await extensionManager.loadExtensions();
 
   // Call the (now wrapper) loadHierarchicalGeminiMemory which calls the server's version
   const { memoryContent, fileCount, filePaths } =
diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts
index d25591fd48..9980474e73 100644
--- a/packages/cli/src/config/extension-manager.ts
+++ b/packages/cli/src/config/extension-manager.ts
@@ -133,7 +133,7 @@ export class ExtensionManager implements ExtensionLoader {
     const isUpdate = !!previousExtensionConfig;
     let newExtensionConfig: ExtensionConfig | null = null;
     let localSourcePath: string | undefined;
-    let extension: GeminiCLIExtension;
+    let extension: GeminiCLIExtension | null;
     try {
       if (!isWorkspaceTrusted(this.settings).isTrusted) {
         throw new Error(
@@ -243,12 +243,16 @@ export class ExtensionManager implements ExtensionLoader {
           this.requestConsent,
           previousExtensionConfig,
         );
-
-        const extensionStorage = new ExtensionStorage(newExtensionName);
-        const destinationPath = extensionStorage.getExtensionDir();
+        const extensionId = getExtensionId(newExtensionConfig, installMetadata);
+        const destinationPath = new ExtensionStorage(
+          newExtensionName,
+        ).getExtensionDir();
         let previousSettings: Record<string, string> | undefined;
         if (isUpdate) {
-          previousSettings = getEnvContents(extensionStorage);
+          previousSettings = await getEnvContents(
+            previousExtensionConfig,
+            extensionId,
+          );
           await this.uninstallExtension(newExtensionName, isUpdate);
         }
 
@@ -257,6 +261,7 @@ export class ExtensionManager implements ExtensionLoader {
           if (isUpdate) {
             await maybePromptForSettings(
               newExtensionConfig,
+              extensionId,
               this.requestSetting,
               previousExtensionConfig,
               previousSettings,
@@ -264,6 +269,7 @@ export class ExtensionManager implements ExtensionLoader {
           } else {
             await maybePromptForSettings(
               newExtensionConfig,
+              extensionId,
               this.requestSetting,
             );
           }
@@ -286,7 +292,10 @@ export class ExtensionManager implements ExtensionLoader {
 
         // TODO: Gracefully handle this call failing, we should back up the old
         // extension prior to overwriting it and then restore it.
-        extension = this.loadExtension(destinationPath)!;
+        extension = await this.loadExtension(destinationPath)!;
+        if (!extension) {
+          throw new Error(`Extension not found`);
+        }
         if (isUpdate) {
           logExtensionUpdateEvent(
             this.telemetryConfig,
@@ -401,7 +410,7 @@ export class ExtensionManager implements ExtensionLoader {
     this.eventEmitter.emit('extensionUninstalled', { extension });
   }
 
-  loadExtensions(): GeminiCLIExtension[] {
+  async loadExtensions(): Promise<GeminiCLIExtension[]> {
     if (this.loadedExtensions) {
       throw new Error('Extensions already loaded, only load extensions once.');
     }
@@ -413,12 +422,14 @@ export class ExtensionManager implements ExtensionLoader {
     for (const subdir of fs.readdirSync(extensionsDir)) {
       const extensionDir = path.join(extensionsDir, subdir);
 
-      this.loadExtension(extensionDir);
+      await this.loadExtension(extensionDir);
     }
     return this.loadedExtensions;
   }
 
-  private loadExtension(extensionDir: string): GeminiCLIExtension | null {
+  private async loadExtension(
+    extensionDir: string,
+  ): Promise<GeminiCLIExtension | null> {
     this.loadedExtensions ??= [];
     if (!fs.statSync(extensionDir).isDirectory()) {
       return null;
@@ -441,7 +452,10 @@ export class ExtensionManager implements ExtensionLoader {
         );
       }
 
-      const customEnv = getEnvContents(new ExtensionStorage(config.name));
+      const customEnv = await getEnvContents(
+        config,
+        getExtensionId(config, installMetadata),
+      );
       config = resolveEnvVarsInObject(config, customEnv);
 
       if (config.mcpServers) {
@@ -573,7 +587,7 @@ export class ExtensionManager implements ExtensionLoader {
     return output;
   }
 
-  disableExtension(name: string, scope: SettingScope) {
+  async disableExtension(name: string, scope: SettingScope) {
     if (
       scope === SettingScope.System ||
       scope === SettingScope.SystemDefaults
@@ -598,7 +612,7 @@ export class ExtensionManager implements ExtensionLoader {
     this.eventEmitter.emit('extensionDisabled', { extension });
   }
 
-  enableExtension(name: string, scope: SettingScope) {
+  async enableExtension(name: string, scope: SettingScope) {
     if (
       scope === SettingScope.System ||
       scope === SettingScope.SystemDefaults
diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts
index e4fa0364ac..21df5f26de 100644
--- a/packages/cli/src/config/extension.test.ts
+++ b/packages/cli/src/config/extension.test.ts
@@ -13,6 +13,7 @@ import {
   ExtensionUninstallEvent,
   ExtensionDisableEvent,
   ExtensionEnableEvent,
+  KeychainTokenStorage,
 } from '@google/gemini-cli-core';
 import { loadSettings, SettingScope } from './settings.js';
 import { isWorkspaceTrusted } from './trustedFolders.js';
@@ -96,6 +97,13 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
     ExtensionInstallEvent: vi.fn(),
     ExtensionUninstallEvent: vi.fn(),
     ExtensionDisableEvent: vi.fn(),
+    KeychainTokenStorage: vi.fn().mockImplementation(() => ({
+      getSecret: vi.fn(),
+      setSecret: vi.fn(),
+      deleteSecret: vi.fn(),
+      listSecrets: vi.fn(),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    })),
   };
 });
 
@@ -107,6 +115,14 @@ vi.mock('child_process', async (importOriginal) => {
   };
 });
 
+interface MockKeychainStorage {
+  getSecret: ReturnType<typeof vi.fn>;
+  setSecret: ReturnType<typeof vi.fn>;
+  deleteSecret: ReturnType<typeof vi.fn>;
+  listSecrets: ReturnType<typeof vi.fn>;
+  isAvailable: ReturnType<typeof vi.fn>;
+}
+
 describe('extension tests', () => {
   let tempHomeDir: string;
   let tempWorkspaceDir: string;
@@ -116,8 +132,32 @@ describe('extension tests', () => {
   let mockPromptForSettings: MockedFunction<
     (setting: ExtensionSetting) => Promise<string>
   >;
+  let mockKeychainStorage: MockKeychainStorage;
+  let keychainData: Record<string, string>;
 
   beforeEach(() => {
+    vi.clearAllMocks();
+    keychainData = {};
+    mockKeychainStorage = {
+      getSecret: vi
+        .fn()
+        .mockImplementation(async (key: string) => keychainData[key] || null),
+      setSecret: vi
+        .fn()
+        .mockImplementation(async (key: string, value: string) => {
+          keychainData[key] = value;
+        }),
+      deleteSecret: vi.fn().mockImplementation(async (key: string) => {
+        delete keychainData[key];
+      }),
+      listSecrets: vi
+        .fn()
+        .mockImplementation(async () => Object.keys(keychainData)),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    };
+    (
+      KeychainTokenStorage as unknown as ReturnType<typeof vi.fn>
+    ).mockImplementation(() => mockKeychainStorage);
     tempHomeDir = fs.mkdtempSync(
       path.join(os.tmpdir(), 'gemini-cli-test-home-'),
     );
@@ -151,7 +191,7 @@ describe('extension tests', () => {
   });
 
   describe('loadExtensions', () => {
-    it('should include extension path in loaded extension', () => {
+    it('should include extension path in loaded extension', async () => {
       const extensionDir = path.join(userExtensionsDir, 'test-extension');
       fs.mkdirSync(extensionDir, { recursive: true });
 
@@ -161,13 +201,13 @@ describe('extension tests', () => {
         version: '1.0.0',
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
       expect(extensions).toHaveLength(1);
       expect(extensions[0].path).toBe(extensionDir);
       expect(extensions[0].name).toBe('test-extension');
     });
 
-    it('should load context file path when GEMINI.md is present', () => {
+    it('should load context file path when GEMINI.md is present', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
@@ -180,7 +220,7 @@ describe('extension tests', () => {
         version: '2.0.0',
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(2);
       const ext1 = extensions.find((e) => e.name === 'ext1');
@@ -191,7 +231,7 @@ describe('extension tests', () => {
       expect(ext2?.contextFiles).toEqual([]);
     });
 
-    it('should load context file path from the extension config', () => {
+    it('should load context file path from the extension config', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
@@ -200,7 +240,7 @@ describe('extension tests', () => {
         contextFileName: 'my-context-file.md',
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(1);
       const ext1 = extensions.find((e) => e.name === 'ext1');
@@ -209,7 +249,7 @@ describe('extension tests', () => {
       ]);
     });
 
-    it('should annotate disabled extensions', () => {
+    it('should annotate disabled extensions', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'disabled-extension',
@@ -220,8 +260,8 @@ describe('extension tests', () => {
         name: 'enabled-extension',
         version: '2.0.0',
       });
-      extensionManager.loadExtensions();
-      extensionManager.disableExtension(
+      await extensionManager.loadExtensions();
+      await extensionManager.disableExtension(
         'disabled-extension',
         SettingScope.User,
       );
@@ -233,7 +273,7 @@ describe('extension tests', () => {
       expect(extensions[1].isActive).toBe(true);
     });
 
-    it('should hydrate variables', () => {
+    it('should hydrate variables', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'test-extension',
@@ -247,7 +287,7 @@ describe('extension tests', () => {
         },
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
       expect(extensions).toHaveLength(1);
       const expectedCwd = path.join(
         userExtensionsDir,
@@ -266,7 +306,7 @@ describe('extension tests', () => {
       });
       fs.writeFileSync(path.join(sourceExtDir, 'context.md'), 'linked context');
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       const extension = await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
@@ -303,7 +343,7 @@ describe('extension tests', () => {
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
@@ -319,7 +359,7 @@ describe('extension tests', () => {
       ]);
     });
 
-    it('should resolve environment variables in extension configuration', () => {
+    it('should resolve environment variables in extension configuration', async () => {
       process.env['TEST_API_KEY'] = 'test-api-key-123';
       process.env['TEST_DB_URL'] = 'postgresql://localhost:5432/testdb';
 
@@ -352,7 +392,7 @@ describe('extension tests', () => {
         };
         fs.writeFileSync(configPath, JSON.stringify(extensionConfig));
 
-        const extensions = extensionManager.loadExtensions();
+        const extensions = await extensionManager.loadExtensions();
 
         expect(extensions).toHaveLength(1);
         const extension = extensions[0];
@@ -373,7 +413,7 @@ describe('extension tests', () => {
       }
     });
 
-    it('should resolve environment variables from an extension .env file', () => {
+    it('should resolve environment variables from an extension .env file', async () => {
       const extDir = createExtension({
         extensionsDir: userExtensionsDir,
         name: 'test-extension',
@@ -388,12 +428,19 @@ describe('extension tests', () => {
             },
           },
         },
+        settings: [
+          {
+            name: 'My API Key',
+            description: 'API key for testing.',
+            envVar: 'MY_API_KEY',
+          },
+        ],
       });
 
       const envFilePath = path.join(extDir, '.env');
       fs.writeFileSync(envFilePath, 'MY_API_KEY=test-key-from-file\n');
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(1);
       const extension = extensions[0];
@@ -403,7 +450,7 @@ describe('extension tests', () => {
       expect(serverConfig.env!['STATIC_VALUE']).toBe('no-substitution');
     });
 
-    it('should handle missing environment variables gracefully', () => {
+    it('should handle missing environment variables gracefully', async () => {
       const userExtensionsDir = path.join(
         tempHomeDir,
         EXTENSIONS_DIRECTORY_NAME,
@@ -433,7 +480,7 @@ describe('extension tests', () => {
         JSON.stringify(extensionConfig),
       );
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(1);
       const extension = extensions[0];
@@ -443,7 +490,7 @@ describe('extension tests', () => {
       expect(serverConfig.env!['MISSING_VAR_BRACES']).toBe('${ALSO_UNDEFINED}');
     });
 
-    it('should skip extensions with invalid JSON and log a warning', () => {
+    it('should skip extensions with invalid JSON and log a warning', async () => {
       const consoleSpy = vi
         .spyOn(console, 'error')
         .mockImplementation(() => {});
@@ -461,7 +508,7 @@ describe('extension tests', () => {
       const badConfigPath = path.join(badExtDir, EXTENSIONS_CONFIG_FILENAME);
       fs.writeFileSync(badConfigPath, '{ "name": "bad-ext"'); // Malformed
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(1);
       expect(extensions[0].name).toBe('good-ext');
@@ -474,7 +521,7 @@ describe('extension tests', () => {
       consoleSpy.mockRestore();
     });
 
-    it('should skip extensions with missing name and log a warning', () => {
+    it('should skip extensions with missing name and log a warning', async () => {
       const consoleSpy = vi
         .spyOn(console, 'error')
         .mockImplementation(() => {});
@@ -492,7 +539,7 @@ describe('extension tests', () => {
       const badConfigPath = path.join(badExtDir, EXTENSIONS_CONFIG_FILENAME);
       fs.writeFileSync(badConfigPath, JSON.stringify({ version: '1.0.0' }));
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
 
       expect(extensions).toHaveLength(1);
       expect(extensions[0].name).toBe('good-ext');
@@ -505,7 +552,7 @@ describe('extension tests', () => {
       consoleSpy.mockRestore();
     });
 
-    it('should filter trust out of mcp servers', () => {
+    it('should filter trust out of mcp servers', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'test-extension',
@@ -519,12 +566,12 @@ describe('extension tests', () => {
         },
       });
 
-      const extensions = extensionManager.loadExtensions();
+      const extensions = await extensionManager.loadExtensions();
       expect(extensions).toHaveLength(1);
       expect(extensions[0].mcpServers?.['test-server'].trust).toBeUndefined();
     });
 
-    it('should throw an error for invalid extension names', () => {
+    it('should throw an error for invalid extension names', async () => {
       const consoleSpy = vi
         .spyOn(console, 'error')
         .mockImplementation(() => {});
@@ -533,10 +580,8 @@ describe('extension tests', () => {
         name: 'bad_name',
         version: '1.0.0',
       });
-
-      const extension = extensionManager
-        .loadExtensions()
-        .find((e) => e.name === 'bad_name');
+      const extensions = await extensionManager.loadExtensions();
+      const extension = extensions.find((e) => e.name === 'bad_name');
 
       expect(extension).toBeUndefined();
       expect(consoleSpy).toHaveBeenCalledWith(
@@ -546,7 +591,7 @@ describe('extension tests', () => {
     });
 
     describe('id generation', () => {
-      it('should generate id from source for non-github git urls', () => {
+      it('should generate id from source for non-github git urls', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
@@ -556,14 +601,12 @@ describe('extension tests', () => {
             source: 'http://somehost.com/foo/bar',
           },
         });
-
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'my-ext');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('http://somehost.com/foo/bar'));
       });
 
-      it('should generate id from owner/repo for github http urls', () => {
+      it('should generate id from owner/repo for github http urls', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
@@ -574,13 +617,12 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'my-ext');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
-      it('should generate id from owner/repo for github ssh urls', () => {
+      it('should generate id from owner/repo for github ssh urls', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
@@ -591,13 +633,12 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'my-ext');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
-      it('should generate id from source for github-release extension', () => {
+      it('should generate id from source for github-release extension', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'my-ext',
@@ -607,14 +648,12 @@ describe('extension tests', () => {
             source: 'https://github.com/foo/bar',
           },
         });
-
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'my-ext');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'my-ext');
         expect(extension?.id).toBe(hashValue('https://github.com/foo/bar'));
       });
 
-      it('should generate id from the original source for local extension', () => {
+      it('should generate id from the original source for local extension', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'local-ext-name',
@@ -625,9 +664,8 @@ describe('extension tests', () => {
           },
         });
 
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'local-ext-name');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'local-ext-name');
         expect(extension?.id).toBe(hashValue('/some/path'));
       });
 
@@ -638,7 +676,7 @@ describe('extension tests', () => {
           name: 'link-ext-name',
           version: '1.0.0',
         });
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension({
           type: 'link',
           source: actualExtensionDir,
@@ -650,16 +688,15 @@ describe('extension tests', () => {
         expect(extension?.id).toBe(hashValue(actualExtensionDir));
       });
 
-      it('should generate id from name for extension with no install metadata', () => {
+      it('should generate id from name for extension with no install metadata', async () => {
         createExtension({
           extensionsDir: userExtensionsDir,
           name: 'no-meta-name',
           version: '1.0.0',
         });
 
-        const extension = extensionManager
-          .loadExtensions()
-          .find((e) => e.name === 'no-meta-name');
+        const extensions = await extensionManager.loadExtensions();
+        const extension = extensions.find((e) => e.name === 'no-meta-name');
         expect(extension?.id).toBe(hashValue('no-meta-name'));
       });
     });
@@ -675,7 +712,7 @@ describe('extension tests', () => {
       const targetExtDir = path.join(userExtensionsDir, 'my-local-extension');
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -697,7 +734,7 @@ describe('extension tests', () => {
         name: 'my-local-extension',
         version: '1.0.0',
       });
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -791,7 +828,7 @@ describe('extension tests', () => {
         type: 'github-release',
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: gitUrl,
         type: 'git',
@@ -816,7 +853,7 @@ describe('extension tests', () => {
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
       const configPath = path.join(targetExtDir, EXTENSIONS_CONFIG_FILENAME);
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'link',
@@ -846,7 +883,7 @@ describe('extension tests', () => {
             name: 'my-local-extension',
             version: '1.1.0',
           });
-          extensionManager.loadExtensions();
+          await extensionManager.loadExtensions();
           if (isUpdate) {
             await extensionManager.installOrUpdateExtension({
               source: sourceExtDir,
@@ -920,7 +957,7 @@ describe('extension tests', () => {
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
@@ -952,7 +989,7 @@ This extension will run the following MCP servers:
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
@@ -974,7 +1011,7 @@ This extension will run the following MCP servers:
         },
       });
       mockRequestConsent.mockResolvedValue(false);
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await expect(
         extensionManager.installOrUpdateExtension({
           source: sourceExtDir,
@@ -992,7 +1029,7 @@ This extension will run the following MCP servers:
       const targetExtDir = path.join(userExtensionsDir, 'my-local-extension');
       const metadataPath = path.join(targetExtDir, INSTALL_METADATA_FILENAME);
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -1023,7 +1060,7 @@ This extension will run the following MCP servers:
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       // Install it with hard coded consent first.
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
@@ -1058,7 +1095,7 @@ This extension will run the following MCP servers:
         ],
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -1088,7 +1125,7 @@ This extension will run the following MCP servers:
         settings: loadSettings(tempWorkspaceDir).merged,
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: sourceExtDir,
         type: 'local',
@@ -1111,7 +1148,7 @@ This extension will run the following MCP servers:
       });
 
       mockPromptForSettings.mockResolvedValueOnce('old-api-key');
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       // Install it so it exists in the userExtensionsDir
       await extensionManager.installOrUpdateExtension({
         source: oldSourceExtDir,
@@ -1181,7 +1218,7 @@ This extension will run the following MCP servers:
           },
         ],
       });
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.installOrUpdateExtension({
         source: oldSourceExtDir,
         type: 'local',
@@ -1273,7 +1310,7 @@ This extension will run the following MCP servers:
           join(tempDir, extensionName),
         );
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension({
           source: gitUrl,
           type: 'github-release',
@@ -1298,7 +1335,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension(
           { source: gitUrl, type: 'github-release' }, // Use github-release to force consent
         );
@@ -1329,7 +1366,7 @@ This extension will run the following MCP servers:
         });
         mockRequestConsent.mockResolvedValue(false);
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await expect(
           extensionManager.installOrUpdateExtension({
             source: gitUrl,
@@ -1354,7 +1391,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension({
           source: gitUrl,
           type: 'git',
@@ -1385,7 +1422,7 @@ This extension will run the following MCP servers:
           type: 'github-release',
         });
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.installOrUpdateExtension(
           { source: gitUrl, type: 'github-release' }, // Note the type
         );
@@ -1407,8 +1444,7 @@ This extension will run the following MCP servers:
         name: 'my-local-extension',
         version: '1.0.0',
       });
-
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.uninstallExtension('my-local-extension', false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
@@ -1426,7 +1462,7 @@ This extension will run the following MCP servers:
         version: '1.0.0',
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.uninstallExtension('my-local-extension', false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
@@ -1435,7 +1471,7 @@ This extension will run the following MCP servers:
     });
 
     it('should throw an error if the extension does not exist', async () => {
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await expect(
         extensionManager.uninstallExtension('nonexistent-extension', false),
       ).rejects.toThrow('Extension not found.');
@@ -1453,7 +1489,7 @@ This extension will run the following MCP servers:
           },
         });
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.uninstallExtension(
           'my-local-extension',
           isUpdate,
@@ -1481,7 +1517,7 @@ This extension will run the following MCP servers:
         const enablementManager = new ExtensionEnablementManager();
         enablementManager.enable('test-extension', true, '/some/scope');
 
-        extensionManager.loadExtensions();
+        await extensionManager.loadExtensions();
         await extensionManager.uninstallExtension('test-extension', isUpdate);
 
         const config = enablementManager.readConfig()['test-extension'];
@@ -1506,7 +1542,7 @@ This extension will run the following MCP servers:
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await extensionManager.uninstallExtension(gitUrl, false);
 
       expect(fs.existsSync(sourceExtDir)).toBe(false);
@@ -1526,7 +1562,7 @@ This extension will run the following MCP servers:
         // No installMetadata provided
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       await expect(
         extensionManager.uninstallExtension(
           'https://github.com/google/no-metadata-extension',
@@ -1537,14 +1573,14 @@ This extension will run the following MCP servers:
   });
 
   describe('disableExtension', () => {
-    it('should disable an extension at the user scope', () => {
+    it('should disable an extension at the user scope', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'my-extension',
         version: '1.0.0',
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.User);
       expect(
         isEnabled({
@@ -1554,14 +1590,14 @@ This extension will run the following MCP servers:
       ).toBe(false);
     });
 
-    it('should disable an extension at the workspace scope', () => {
+    it('should disable an extension at the workspace scope', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'my-extension',
         version: '1.0.0',
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.Workspace);
       expect(
         isEnabled({
@@ -1577,14 +1613,14 @@ This extension will run the following MCP servers:
       ).toBe(false);
     });
 
-    it('should handle disabling the same extension twice', () => {
+    it('should handle disabling the same extension twice', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'my-extension',
         version: '1.0.0',
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('my-extension', SettingScope.User);
       extensionManager.disableExtension('my-extension', SettingScope.User);
       expect(
@@ -1595,13 +1631,17 @@ This extension will run the following MCP servers:
       ).toBe(false);
     });
 
-    it('should throw an error if you request system scope', () => {
-      expect(() =>
-        extensionManager.disableExtension('my-extension', SettingScope.System),
-      ).toThrow('System and SystemDefaults scopes are not supported.');
+    it('should throw an error if you request system scope', async () => {
+      await expect(
+        async () =>
+          await extensionManager.disableExtension(
+            'my-extension',
+            SettingScope.System,
+          ),
+      ).rejects.toThrow('System and SystemDefaults scopes are not supported.');
     });
 
-    it('should log a disable event', () => {
+    it('should log a disable event', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
@@ -1612,7 +1652,7 @@ This extension will run the following MCP servers:
         },
       });
 
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
 
       expect(mockLogExtensionDisable).toHaveBeenCalled();
@@ -1634,41 +1674,41 @@ This extension will run the following MCP servers:
       return extensions.filter((e) => e.isActive);
     };
 
-    it('should enable an extension at the user scope', () => {
+    it('should enable an extension at the user scope', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
         version: '1.0.0',
       });
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.User);
       let activeExtensions = getActiveExtensions();
       expect(activeExtensions).toHaveLength(0);
 
-      extensionManager.enableExtension('ext1', SettingScope.User);
-      activeExtensions = getActiveExtensions();
+      await extensionManager.enableExtension('ext1', SettingScope.User);
+      activeExtensions = await getActiveExtensions();
       expect(activeExtensions).toHaveLength(1);
       expect(activeExtensions[0].name).toBe('ext1');
     });
 
-    it('should enable an extension at the workspace scope', () => {
+    it('should enable an extension at the workspace scope', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
         version: '1.0.0',
       });
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
       let activeExtensions = getActiveExtensions();
       expect(activeExtensions).toHaveLength(0);
 
-      extensionManager.enableExtension('ext1', SettingScope.Workspace);
-      activeExtensions = getActiveExtensions();
+      await extensionManager.enableExtension('ext1', SettingScope.Workspace);
+      activeExtensions = await getActiveExtensions();
       expect(activeExtensions).toHaveLength(1);
       expect(activeExtensions[0].name).toBe('ext1');
     });
 
-    it('should log an enable event', () => {
+    it('should log an enable event', async () => {
       createExtension({
         extensionsDir: userExtensionsDir,
         name: 'ext1',
@@ -1678,7 +1718,7 @@ This extension will run the following MCP servers:
           type: 'local',
         },
       });
-      extensionManager.loadExtensions();
+      await extensionManager.loadExtensions();
       extensionManager.disableExtension('ext1', SettingScope.Workspace);
       extensionManager.enableExtension('ext1', SettingScope.Workspace);
 
diff --git a/packages/cli/src/config/extensions/extensionSettings.test.ts b/packages/cli/src/config/extensions/extensionSettings.test.ts
index 9beb8a4284..e72ba8ad1a 100644
--- a/packages/cli/src/config/extensions/extensionSettings.test.ts
+++ b/packages/cli/src/config/extensions/extensionSettings.test.ts
@@ -17,6 +17,7 @@ import { ExtensionStorage } from './storage.js';
 import prompts from 'prompts';
 import * as fsPromises from 'node:fs/promises';
 import * as fs from 'node:fs';
+import { KeychainTokenStorage } from '@google/gemini-cli-core';
 
 vi.mock('prompts');
 vi.mock('os', async (importOriginal) => {
@@ -27,11 +28,59 @@ vi.mock('os', async (importOriginal) => {
   };
 });
 
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  const actual =
+    await importOriginal<typeof import('@google/gemini-cli-core')>();
+  return {
+    ...actual,
+    KeychainTokenStorage: vi.fn().mockImplementation(() => ({
+      getSecret: vi.fn(),
+      setSecret: vi.fn(),
+      deleteSecret: vi.fn(),
+      listSecrets: vi.fn(),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    })),
+  };
+});
+
+interface MockKeychainStorage {
+  getSecret: ReturnType<typeof vi.fn>;
+  setSecret: ReturnType<typeof vi.fn>;
+  deleteSecret: ReturnType<typeof vi.fn>;
+  listSecrets: ReturnType<typeof vi.fn>;
+  isAvailable: ReturnType<typeof vi.fn>;
+}
+
 describe('extensionSettings', () => {
   let tempHomeDir: string;
   let extensionDir: string;
+  let mockKeychainStorage: MockKeychainStorage;
+  let keychainData: Record<string, string>;
 
   beforeEach(() => {
+    vi.clearAllMocks();
+    keychainData = {};
+    mockKeychainStorage = {
+      getSecret: vi
+        .fn()
+        .mockImplementation(async (key: string) => keychainData[key] || null),
+      setSecret: vi
+        .fn()
+        .mockImplementation(async (key: string, value: string) => {
+          keychainData[key] = value;
+        }),
+      deleteSecret: vi.fn().mockImplementation(async (key: string) => {
+        delete keychainData[key];
+      }),
+      listSecrets: vi
+        .fn()
+        .mockImplementation(async () => Object.keys(keychainData)),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    };
+    (
+      KeychainTokenStorage as unknown as ReturnType<typeof vi.fn>
+    ).mockImplementation(() => mockKeychainStorage);
+
     tempHomeDir = os.tmpdir() + path.sep + `gemini-cli-test-home-${Date.now()}`;
     extensionDir = path.join(tempHomeDir, '.gemini', 'extensions', 'test-ext');
     // Spy and mock the method, but also create the directory so we can write to it.
@@ -59,7 +108,13 @@ describe('extensionSettings', () => {
 
     it('should do nothing if settings are undefined', async () => {
       const config: ExtensionConfig = { name: 'test-ext', version: '1.0.0' };
-      await maybePromptForSettings(config, mockRequestSetting);
+      await maybePromptForSettings(
+        config,
+        '12345',
+        mockRequestSetting,
+        undefined,
+        undefined,
+      );
       expect(mockRequestSetting).not.toHaveBeenCalled();
     });
 
@@ -69,11 +124,17 @@ describe('extensionSettings', () => {
         version: '1.0.0',
         settings: [],
       };
-      await maybePromptForSettings(config, mockRequestSetting);
+      await maybePromptForSettings(
+        config,
+        '12345',
+        mockRequestSetting,
+        undefined,
+        undefined,
+      );
       expect(mockRequestSetting).not.toHaveBeenCalled();
     });
 
-    it('should call requestSetting for each setting', async () => {
+    it('should prompt for all settings if there is no previous config', async () => {
       const config: ExtensionConfig = {
         name: 'test-ext',
         version: '1.0.0',
@@ -82,14 +143,25 @@ describe('extensionSettings', () => {
           { name: 's2', description: 'd2', envVar: 'VAR2' },
         ],
       };
-      await maybePromptForSettings(config, mockRequestSetting);
+      await maybePromptForSettings(
+        config,
+        '12345',
+        mockRequestSetting,
+        undefined,
+        undefined,
+      );
       expect(mockRequestSetting).toHaveBeenCalledTimes(2);
       expect(mockRequestSetting).toHaveBeenCalledWith(config.settings![0]);
       expect(mockRequestSetting).toHaveBeenCalledWith(config.settings![1]);
     });
 
-    it('should write the .env file with the correct content', async () => {
-      const config: ExtensionConfig = {
+    it('should only prompt for new settings', async () => {
+      const previousConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [{ name: 's1', description: 'd1', envVar: 'VAR1' }],
+      };
+      const newConfig: ExtensionConfig = {
         name: 'test-ext',
         version: '1.0.0',
         settings: [
@@ -97,35 +169,151 @@ describe('extensionSettings', () => {
           { name: 's2', description: 'd2', envVar: 'VAR2' },
         ],
       };
-      await maybePromptForSettings(config, mockRequestSetting);
+      const previousSettings = { VAR1: 'previous-VAR1' };
+
+      await maybePromptForSettings(
+        newConfig,
+        '12345',
+        mockRequestSetting,
+        previousConfig,
+        previousSettings,
+      );
+
+      expect(mockRequestSetting).toHaveBeenCalledTimes(1);
+      expect(mockRequestSetting).toHaveBeenCalledWith(newConfig.settings![1]);
 
       const expectedEnvPath = path.join(extensionDir, '.env');
       const actualContent = await fsPromises.readFile(expectedEnvPath, 'utf-8');
-      const expectedContent = 'VAR1=mock-VAR1\nVAR2=mock-VAR2\n';
+      const expectedContent = 'VAR1=previous-VAR1\nVAR2=mock-VAR2\n';
+      expect(actualContent).toBe(expectedContent);
+    });
 
+    it('should remove settings that are no longer in the config', async () => {
+      const previousConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [
+          { name: 's1', description: 'd1', envVar: 'VAR1' },
+          { name: 's2', description: 'd2', envVar: 'VAR2' },
+        ],
+      };
+      const newConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [{ name: 's1', description: 'd1', envVar: 'VAR1' }],
+      };
+      const previousSettings = {
+        VAR1: 'previous-VAR1',
+        VAR2: 'previous-VAR2',
+      };
+
+      await maybePromptForSettings(
+        newConfig,
+        '12345',
+        mockRequestSetting,
+        previousConfig,
+        previousSettings,
+      );
+
+      expect(mockRequestSetting).not.toHaveBeenCalled();
+
+      const expectedEnvPath = path.join(extensionDir, '.env');
+      const actualContent = await fsPromises.readFile(expectedEnvPath, 'utf-8');
+      const expectedContent = 'VAR1=previous-VAR1\n';
+      expect(actualContent).toBe(expectedContent);
+    });
+
+    it('should reprompt if a setting changes sensitivity', async () => {
+      const previousConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [
+          { name: 's1', description: 'd1', envVar: 'VAR1', sensitive: false },
+        ],
+      };
+      const newConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [
+          { name: 's1', description: 'd1', envVar: 'VAR1', sensitive: true },
+        ],
+      };
+      const previousSettings = { VAR1: 'previous-VAR1' };
+
+      await maybePromptForSettings(
+        newConfig,
+        '12345',
+        mockRequestSetting,
+        previousConfig,
+        previousSettings,
+      );
+
+      expect(mockRequestSetting).toHaveBeenCalledTimes(1);
+      expect(mockRequestSetting).toHaveBeenCalledWith(newConfig.settings![0]);
+
+      // The value should now be in keychain, not the .env file.
+      const expectedEnvPath = path.join(extensionDir, '.env');
+      const actualContent = await fsPromises.readFile(expectedEnvPath, 'utf-8');
+      expect(actualContent).toBe('');
+    });
+
+    it('should not prompt if settings are identical', async () => {
+      const previousConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [
+          { name: 's1', description: 'd1', envVar: 'VAR1' },
+          { name: 's2', description: 'd2', envVar: 'VAR2' },
+        ],
+      };
+      const newConfig: ExtensionConfig = {
+        name: 'test-ext',
+        version: '1.0.0',
+        settings: [
+          { name: 's1', description: 'd1', envVar: 'VAR1' },
+          { name: 's2', description: 'd2', envVar: 'VAR2' },
+        ],
+      };
+      const previousSettings = {
+        VAR1: 'previous-VAR1',
+        VAR2: 'previous-VAR2',
+      };
+
+      await maybePromptForSettings(
+        newConfig,
+        '12345',
+        mockRequestSetting,
+        previousConfig,
+        previousSettings,
+      );
+
+      expect(mockRequestSetting).not.toHaveBeenCalled();
+      const expectedEnvPath = path.join(extensionDir, '.env');
+      const actualContent = await fsPromises.readFile(expectedEnvPath, 'utf-8');
+      const expectedContent = 'VAR1=previous-VAR1\nVAR2=previous-VAR2\n';
       expect(actualContent).toBe(expectedContent);
     });
   });
 
   describe('promptForSetting', () => {
-    // it('should use prompts with type "password" for sensitive settings', async () => {
-    //   const setting: ExtensionSetting = {
-    //     name: 'API Key',
-    //     description: 'Your secret key',
-    //     envVar: 'API_KEY',
-    //     sensitive: true,
-    //   };
-    //   vi.mocked(prompts).mockResolvedValue({ value: 'secret-key' });
+    it('should use prompts with type "password" for sensitive settings', async () => {
+      const setting: ExtensionSetting = {
+        name: 'API Key',
+        description: 'Your secret key',
+        envVar: 'API_KEY',
+        sensitive: true,
+      };
+      vi.mocked(prompts).mockResolvedValue({ value: 'secret-key' });
 
-    //   const result = await promptForSetting(setting);
+      const result = await promptForSetting(setting);
 
-    //   expect(prompts).toHaveBeenCalledWith({
-    //     type: 'password',
-    //     name: 'value',
-    //     message: 'API Key\nYour secret key',
-    //   });
-    //   expect(result).toBe('secret-key');
-    // });
+      expect(prompts).toHaveBeenCalledWith({
+        type: 'password',
+        name: 'value',
+        message: 'API Key\nYour secret key',
+      });
+      expect(result).toBe('secret-key');
+    });
 
     it('should use prompts with type "text" for non-sensitive settings', async () => {
       const setting: ExtensionSetting = {
diff --git a/packages/cli/src/config/extensions/extensionSettings.ts b/packages/cli/src/config/extensions/extensionSettings.ts
index 55eb70b83a..f625ef5ea8 100644
--- a/packages/cli/src/config/extensions/extensionSettings.ts
+++ b/packages/cli/src/config/extensions/extensionSettings.ts
@@ -12,57 +12,76 @@ import { ExtensionStorage } from './storage.js';
 import type { ExtensionConfig } from '../extension.js';
 
 import prompts from 'prompts';
+import { KeychainTokenStorage } from '@google/gemini-cli-core';
 
 export interface ExtensionSetting {
   name: string;
   description: string;
   envVar: string;
+  // NOTE: If no value is set, this setting will be considered NOT sensitive.
+  sensitive?: boolean;
 }
 
 export async function maybePromptForSettings(
   extensionConfig: ExtensionConfig,
+  extensionId: string,
   requestSetting: (setting: ExtensionSetting) => Promise<string>,
   previousExtensionConfig?: ExtensionConfig,
   previousSettings?: Record<string, string>,
 ): Promise<void> {
   const { name: extensionName, settings } = extensionConfig;
+  if (
+    (!settings || settings.length === 0) &&
+    (!previousExtensionConfig?.settings ||
+      previousExtensionConfig.settings.length === 0)
+  ) {
+    return;
+  }
   const envFilePath = new ExtensionStorage(extensionName).getEnvFilePath();
+  const keychain = new KeychainTokenStorage(extensionId);
 
   if (!settings || settings.length === 0) {
-    // No settings for this extension. Clear any existing .env file.
-    if (fsSync.existsSync(envFilePath)) {
-      await fs.writeFile(envFilePath, '');
-    }
+    await clearSettings(envFilePath, keychain);
     return;
   }
 
-  let settingsToPrompt = settings;
-  if (previousExtensionConfig) {
-    const oldSettings = new Set(
-      previousExtensionConfig.settings?.map((s) => s.name) || [],
-    );
-    settingsToPrompt = settingsToPrompt.filter((s) => !oldSettings.has(s.name));
-  }
+  const settingsChanges = getSettingsChanges(
+    settings,
+    previousExtensionConfig?.settings ?? [],
+  );
 
   const allSettings: Record<string, string> = { ...(previousSettings ?? {}) };
 
-  if (settingsToPrompt && settingsToPrompt.length > 0) {
-    for (const setting of settingsToPrompt) {
-      const answer = await requestSetting(setting);
-      allSettings[setting.envVar] = answer;
-    }
+  for (const removedEnvSetting of settingsChanges.removeEnv) {
+    delete allSettings[removedEnvSetting.envVar];
   }
 
-  const validEnvVars = new Set(settings.map((s) => s.envVar));
-  const finalSettings: Record<string, string> = {};
-  for (const [key, value] of Object.entries(allSettings)) {
-    if (validEnvVars.has(key)) {
-      finalSettings[key] = value;
+  for (const removedSensitiveSetting of settingsChanges.removeSensitive) {
+    await keychain.deleteSecret(removedSensitiveSetting.envVar);
+  }
+
+  for (const setting of settingsChanges.promptForSensitive.concat(
+    settingsChanges.promptForEnv,
+  )) {
+    const answer = await requestSetting(setting);
+    allSettings[setting.envVar] = answer;
+  }
+
+  const nonSensitiveSettings: Record<string, string> = {};
+  for (const setting of settings) {
+    const value = allSettings[setting.envVar];
+    if (value === undefined) {
+      continue;
+    }
+    if (setting.sensitive) {
+      await keychain.setSecret(setting.envVar, value);
+    } else {
+      nonSensitiveSettings[setting.envVar] = value;
     }
   }
 
   let envContent = '';
-  for (const [key, value] of Object.entries(finalSettings)) {
+  for (const [key, value] of Object.entries(nonSensitiveSettings)) {
     envContent += `${key}=${value}\n`;
   }
 
@@ -73,17 +92,22 @@ export async function promptForSetting(
   setting: ExtensionSetting,
 ): Promise<string> {
   const response = await prompts({
-    // type: setting.sensitive ? 'password' : 'text',
-    type: 'text',
+    type: setting.sensitive ? 'password' : 'text',
     name: 'value',
     message: `${setting.name}\n${setting.description}`,
   });
   return response.value;
 }
 
-export function getEnvContents(
-  extensionStorage: ExtensionStorage,
-): Record<string, string> {
+export async function getEnvContents(
+  extensionConfig: ExtensionConfig,
+  extensionId: string,
+): Promise<Record<string, string>> {
+  if (!extensionConfig.settings || extensionConfig.settings.length === 0) {
+    return Promise.resolve({});
+  }
+  const extensionStorage = new ExtensionStorage(extensionConfig.name);
+  const keychain = new KeychainTokenStorage(extensionId);
   let customEnv: Record<string, string> = {};
   if (fsSync.existsSync(extensionStorage.getEnvFilePath())) {
     const envFile = fsSync.readFileSync(
@@ -92,5 +116,67 @@ export function getEnvContents(
     );
     customEnv = dotenv.parse(envFile);
   }
+
+  if (extensionConfig.settings) {
+    for (const setting of extensionConfig.settings) {
+      if (setting.sensitive) {
+        const secret = await keychain.getSecret(setting.envVar);
+        if (secret) {
+          customEnv[setting.envVar] = secret;
+        }
+      }
+    }
+  }
   return customEnv;
 }
+
+interface settingsChanges {
+  promptForSensitive: ExtensionSetting[];
+  removeSensitive: ExtensionSetting[];
+  promptForEnv: ExtensionSetting[];
+  removeEnv: ExtensionSetting[];
+}
+function getSettingsChanges(
+  settings: ExtensionSetting[],
+  oldSettings: ExtensionSetting[],
+): settingsChanges {
+  const isSameSetting = (a: ExtensionSetting, b: ExtensionSetting) =>
+    a.envVar === b.envVar && (a.sensitive ?? false) === (b.sensitive ?? false);
+
+  const sensitiveOld = oldSettings.filter((s) => s.sensitive ?? false);
+  const sensitiveNew = settings.filter((s) => s.sensitive ?? false);
+  const envOld = oldSettings.filter((s) => !(s.sensitive ?? false));
+  const envNew = settings.filter((s) => !(s.sensitive ?? false));
+
+  return {
+    promptForSensitive: sensitiveNew.filter(
+      (s) => !sensitiveOld.some((old) => isSameSetting(s, old)),
+    ),
+    removeSensitive: sensitiveOld.filter(
+      (s) => !sensitiveNew.some((neu) => isSameSetting(s, neu)),
+    ),
+    promptForEnv: envNew.filter(
+      (s) => !envOld.some((old) => isSameSetting(s, old)),
+    ),
+    removeEnv: envOld.filter(
+      (s) => !envNew.some((neu) => isSameSetting(s, neu)),
+    ),
+  };
+}
+
+async function clearSettings(
+  envFilePath: string,
+  keychain: KeychainTokenStorage,
+) {
+  if (fsSync.existsSync(envFilePath)) {
+    await fs.writeFile(envFilePath, '');
+  }
+  if (!keychain.isAvailable()) {
+    return;
+  }
+  const secrets = await keychain.listSecrets();
+  for (const secret of secrets) {
+    await keychain.deleteSecret(secret);
+  }
+  return;
+}
diff --git a/packages/cli/src/config/extensions/update.test.ts b/packages/cli/src/config/extensions/update.test.ts
index 8dfe841d74..c3a1fb64e4 100644
--- a/packages/cli/src/config/extensions/update.test.ts
+++ b/packages/cli/src/config/extensions/update.test.ts
@@ -9,7 +9,7 @@ import * as fs from 'node:fs';
 import * as os from 'node:os';
 import * as path from 'node:path';
 import { checkForAllExtensionUpdates, updateExtension } from './update.js';
-import { GEMINI_DIR } from '@google/gemini-cli-core';
+import { GEMINI_DIR, KeychainTokenStorage } from '@google/gemini-cli-core';
 import { isWorkspaceTrusted } from '../trustedFolders.js';
 import { ExtensionUpdateState } from '../../ui/state/extensions.js';
 import { createExtension } from '../../test-utils/createExtension.js';
@@ -64,9 +64,24 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
     logExtensionUninstall: mockLogExtensionUninstall,
     ExtensionInstallEvent: vi.fn(),
     ExtensionUninstallEvent: vi.fn(),
+    KeychainTokenStorage: vi.fn().mockImplementation(() => ({
+      getSecret: vi.fn(),
+      setSecret: vi.fn(),
+      deleteSecret: vi.fn(),
+      listSecrets: vi.fn(),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    })),
   };
 });
 
+interface MockKeychainStorage {
+  getSecret: ReturnType<typeof vi.fn>;
+  setSecret: ReturnType<typeof vi.fn>;
+  deleteSecret: ReturnType<typeof vi.fn>;
+  listSecrets: ReturnType<typeof vi.fn>;
+  isAvailable: ReturnType<typeof vi.fn>;
+}
+
 describe('update tests', () => {
   let tempHomeDir: string;
   let tempWorkspaceDir: string;
@@ -76,8 +91,32 @@ describe('update tests', () => {
   let mockPromptForSettings: MockedFunction<
     (setting: ExtensionSetting) => Promise<string>
   >;
+  let mockKeychainStorage: MockKeychainStorage;
+  let keychainData: Record<string, string>;
 
   beforeEach(() => {
+    vi.clearAllMocks();
+    keychainData = {};
+    mockKeychainStorage = {
+      getSecret: vi
+        .fn()
+        .mockImplementation(async (key: string) => keychainData[key] || null),
+      setSecret: vi
+        .fn()
+        .mockImplementation(async (key: string, value: string) => {
+          keychainData[key] = value;
+        }),
+      deleteSecret: vi.fn().mockImplementation(async (key: string) => {
+        delete keychainData[key];
+      }),
+      listSecrets: vi
+        .fn()
+        .mockImplementation(async () => Object.keys(keychainData)),
+      isAvailable: vi.fn().mockResolvedValue(true),
+    };
+    (
+      KeychainTokenStorage as unknown as ReturnType<typeof vi.fn>
+    ).mockImplementation(() => mockKeychainStorage);
     tempHomeDir = fs.mkdtempSync(
       path.join(os.tmpdir(), 'gemini-cli-test-home-'),
     );
@@ -110,6 +149,7 @@ describe('update tests', () => {
   afterEach(() => {
     fs.rmSync(tempHomeDir, { recursive: true, force: true });
     fs.rmSync(tempWorkspaceDir, { recursive: true, force: true });
+    vi.restoreAllMocks();
   });
 
   describe('updateExtension', () => {
@@ -139,11 +179,10 @@ describe('update tests', () => {
         );
       });
       mockGit.getRemotes.mockResolvedValue([{ name: 'origin' }]);
-      const extension = extensionManager
-        .loadExtensions()
-        .find((e) => e.name === extensionName)!;
+      const extensions = await extensionManager.loadExtensions();
+      const extension = extensions.find((e) => e.name === extensionName)!;
       const updateInfo = await updateExtension(
-        extension,
+        extension!,
         extensionManager,
         ExtensionUpdateState.UPDATE_AVAILABLE,
         () => {},
@@ -189,11 +228,10 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
 
-      const extension = extensionManager
-        .loadExtensions()
-        .find((e) => e.name === extensionName)!;
+      const extensions = await extensionManager.loadExtensions();
+      const extension = extensions.find((e) => e.name === extensionName)!;
       await updateExtension(
-        extension,
+        extension!,
         extensionManager,
         ExtensionUpdateState.UPDATE_AVAILABLE,
         dispatch,
@@ -231,12 +269,11 @@ describe('update tests', () => {
       mockGit.getRemotes.mockResolvedValue([{ name: 'origin' }]);
 
       const dispatch = vi.fn();
-      const extension = extensionManager
-        .loadExtensions()
-        .find((e) => e.name === extensionName)!;
+      const extensions = await extensionManager.loadExtensions();
+      const extension = extensions.find((e) => e.name === extensionName)!;
       await expect(
         updateExtension(
-          extension,
+          extension!,
           extensionManager,
           ExtensionUpdateState.UPDATE_AVAILABLE,
           dispatch,
@@ -280,7 +317,7 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        extensionManager.loadExtensions(),
+        await extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -312,7 +349,7 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        extensionManager.loadExtensions(),
+        await extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -341,7 +378,7 @@ describe('update tests', () => {
       });
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        extensionManager.loadExtensions(),
+        await extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -370,7 +407,7 @@ describe('update tests', () => {
       });
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        extensionManager.loadExtensions(),
+        await extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
@@ -398,7 +435,7 @@ describe('update tests', () => {
 
       const dispatch = vi.fn();
       await checkForAllExtensionUpdates(
-        extensionManager.loadExtensions(),
+        await extensionManager.loadExtensions(),
         extensionManager,
         dispatch,
       );
diff --git a/packages/cli/src/config/extensions/update.ts b/packages/cli/src/config/extensions/update.ts
index 40f1330bc7..7bfa253651 100644
--- a/packages/cli/src/config/extensions/update.ts
+++ b/packages/cli/src/config/extensions/update.ts
@@ -58,7 +58,7 @@ export async function updateExtension(
 
   const tempDir = await ExtensionStorage.createTmpDir();
   try {
-    const previousExtensionConfig = await extensionManager.loadExtensionConfig(
+    const previousExtensionConfig = extensionManager.loadExtensionConfig(
       extension.path,
     );
     let updatedExtension: GeminiCLIExtension;
diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts
index 78e85041f2..6ca94c14c3 100644
--- a/packages/cli/src/config/settings.test.ts
+++ b/packages/cli/src/config/settings.test.ts
@@ -2442,7 +2442,7 @@ describe('Settings Loading and Merging', () => {
         extensionManager,
         'disableExtension',
       );
-      mockDisableExtension.mockImplementation(() => {});
+      mockDisableExtension.mockImplementation(async () => {});
 
       migrateDeprecatedSettings(loadedSettings, extensionManager);
 
@@ -2515,7 +2515,7 @@ describe('Settings Loading and Merging', () => {
         extensionManager,
         'disableExtension',
       );
-      mockDisableExtension.mockImplementation(() => {});
+      mockDisableExtension.mockImplementation(async () => {});
 
       migrateDeprecatedSettings(loadedSettings, extensionManager);
 
diff --git a/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
index be1a415538..8e36311dc0 100644
--- a/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
+++ b/packages/cli/src/ui/hooks/useExtensionUpdates.test.tsx
@@ -124,7 +124,7 @@ describe('useExtensionUpdates', () => {
         autoUpdate: true,
       },
     });
-
+    await extensionManager.loadExtensions();
     const addItem = vi.fn();
 
     vi.mocked(checkForAllExtensionUpdates).mockImplementation(
@@ -145,7 +145,6 @@ describe('useExtensionUpdates', () => {
       name: '',
     });
 
-    extensionManager.loadExtensions();
     function TestComponent() {
       useExtensionUpdates(extensionManager, addItem);
       return null;
@@ -189,7 +188,7 @@ describe('useExtensionUpdates', () => {
       },
     });
 
-    extensionManager.loadExtensions();
+    await extensionManager.loadExtensions();
 
     const addItem = vi.fn();
 
diff --git a/packages/core/index.ts b/packages/core/index.ts
index acc9743e61..2369b6b0e2 100644
--- a/packages/core/index.ts
+++ b/packages/core/index.ts
@@ -44,5 +44,6 @@ export { makeFakeConfig } from './src/test-utils/config.js';
 export * from './src/utils/pathReader.js';
 export { ClearcutLogger } from './src/telemetry/clearcut-logger/clearcut-logger.js';
 export { logModelSlashCommand } from './src/telemetry/loggers.js';
+export { KeychainTokenStorage } from './src/mcp/token-storage/keychain-token-storage.js';
 export * from './src/utils/googleQuotaErrors.js';
 export type { GoogleApiError } from './src/utils/googleErrors.js';
diff --git a/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts b/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
index 3b97902f19..632387e23b 100644
--- a/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
+++ b/packages/core/src/mcp/token-storage/keychain-token-storage.test.ts
@@ -386,5 +386,54 @@ describe('KeychainTokenStorage', () => {
         );
       });
     });
+
+    describe('Secrets', () => {
+      it('should set and get a secret', async () => {
+        mockKeytar.setPassword.mockResolvedValue(undefined);
+        mockKeytar.getPassword.mockResolvedValue('secret-value');
+
+        await storage.setSecret('secret-key', 'secret-value');
+        const value = await storage.getSecret('secret-key');
+
+        expect(mockKeytar.setPassword).toHaveBeenCalledWith(
+          mockServiceName,
+          '__secret__secret-key',
+          'secret-value',
+        );
+        expect(mockKeytar.getPassword).toHaveBeenCalledWith(
+          mockServiceName,
+          '__secret__secret-key',
+        );
+        expect(value).toBe('secret-value');
+      });
+
+      it('should delete a secret', async () => {
+        mockKeytar.deletePassword.mockResolvedValue(true);
+        await storage.deleteSecret('secret-key');
+        expect(mockKeytar.deletePassword).toHaveBeenCalledWith(
+          mockServiceName,
+          '__secret__secret-key',
+        );
+      });
+
+      it('should list secrets', async () => {
+        mockKeytar.findCredentials.mockResolvedValue([
+          { account: '__secret__secret1', password: '' },
+          { account: '__secret__secret2', password: '' },
+          { account: 'server1', password: '' },
+        ]);
+        const secrets = await storage.listSecrets();
+        expect(secrets).toEqual(['secret1', 'secret2']);
+      });
+
+      it('should not list secrets in listServers', async () => {
+        mockKeytar.findCredentials.mockResolvedValue([
+          { account: '__secret__secret1', password: '' },
+          { account: 'server1', password: '' },
+        ]);
+        const servers = await storage.listServers();
+        expect(servers).toEqual(['server1']);
+      });
+    });
   });
 });
diff --git a/packages/core/src/mcp/token-storage/keychain-token-storage.ts b/packages/core/src/mcp/token-storage/keychain-token-storage.ts
index aa8cee2e9d..93e94acb29 100644
--- a/packages/core/src/mcp/token-storage/keychain-token-storage.ts
+++ b/packages/core/src/mcp/token-storage/keychain-token-storage.ts
@@ -6,7 +6,7 @@
 
 import * as crypto from 'node:crypto';
 import { BaseTokenStorage } from './base-token-storage.js';
-import type { OAuthCredentials } from './types.js';
+import type { OAuthCredentials, SecretStorage } from './types.js';
 import { coreEvents } from '../../utils/events.js';
 
 interface Keytar {
@@ -23,8 +23,12 @@ interface Keytar {
 }
 
 const KEYCHAIN_TEST_PREFIX = '__keychain_test__';
+const SECRET_PREFIX = '__secret__';
 
-export class KeychainTokenStorage extends BaseTokenStorage {
+export class KeychainTokenStorage
+  extends BaseTokenStorage
+  implements SecretStorage
+{
   private keychainAvailable: boolean | null = null;
   private keytarModule: Keytar | null = null;
   private keytarLoadAttempted = false;
@@ -137,7 +141,11 @@ export class KeychainTokenStorage extends BaseTokenStorage {
     try {
       const credentials = await keytar.findCredentials(this.serviceName);
       return credentials
-        .filter((cred) => !cred.account.startsWith(KEYCHAIN_TEST_PREFIX))
+        .filter(
+          (cred) =>
+            !cred.account.startsWith(KEYCHAIN_TEST_PREFIX) &&
+            !cred.account.startsWith(SECRET_PREFIX),
+        )
         .map((cred: { account: string }) => cred.account);
     } catch (error) {
       coreEvents.emitFeedback(
@@ -163,7 +171,11 @@ export class KeychainTokenStorage extends BaseTokenStorage {
     try {
       const credentials = (
         await keytar.findCredentials(this.serviceName)
-      ).filter((c) => !c.account.startsWith(KEYCHAIN_TEST_PREFIX));
+      ).filter(
+        (c) =>
+          !c.account.startsWith(KEYCHAIN_TEST_PREFIX) &&
+          !c.account.startsWith(SECRET_PREFIX),
+      );
 
       for (const cred of credentials) {
         try {
@@ -258,4 +270,62 @@ export class KeychainTokenStorage extends BaseTokenStorage {
   async isAvailable(): Promise<boolean> {
     return this.checkKeychainAvailability();
   }
+
+  async setSecret(key: string, value: string): Promise<void> {
+    if (!(await this.checkKeychainAvailability())) {
+      throw new Error('Keychain is not available');
+    }
+    const keytar = await this.getKeytar();
+    if (!keytar) {
+      throw new Error('Keytar module not available');
+    }
+    await keytar.setPassword(this.serviceName, `${SECRET_PREFIX}${key}`, value);
+  }
+
+  async getSecret(key: string): Promise<string | null> {
+    if (!(await this.checkKeychainAvailability())) {
+      throw new Error('Keychain is not available');
+    }
+    const keytar = await this.getKeytar();
+    if (!keytar) {
+      throw new Error('Keytar module not available');
+    }
+    return keytar.getPassword(this.serviceName, `${SECRET_PREFIX}${key}`);
+  }
+
+  async deleteSecret(key: string): Promise<void> {
+    if (!(await this.checkKeychainAvailability())) {
+      throw new Error('Keychain is not available');
+    }
+    const keytar = await this.getKeytar();
+    if (!keytar) {
+      throw new Error('Keytar module not available');
+    }
+    const deleted = await keytar.deletePassword(
+      this.serviceName,
+      `${SECRET_PREFIX}${key}`,
+    );
+    if (!deleted) {
+      throw new Error(`No secret found for key: ${key}`);
+    }
+  }
+
+  async listSecrets(): Promise<string[]> {
+    if (!(await this.checkKeychainAvailability())) {
+      throw new Error('Keychain is not available');
+    }
+    const keytar = await this.getKeytar();
+    if (!keytar) {
+      throw new Error('Keytar module not available');
+    }
+    try {
+      const credentials = await keytar.findCredentials(this.serviceName);
+      return credentials
+        .filter((cred) => cred.account.startsWith(SECRET_PREFIX))
+        .map((cred) => cred.account.substring(SECRET_PREFIX.length));
+    } catch (error) {
+      console.error('Failed to list secrets from keychain:', error);
+      return [];
+    }
+  }
 }
diff --git a/packages/core/src/mcp/token-storage/types.ts b/packages/core/src/mcp/token-storage/types.ts
index 1e95a975e0..b167e821d8 100644
--- a/packages/core/src/mcp/token-storage/types.ts
+++ b/packages/core/src/mcp/token-storage/types.ts
@@ -36,6 +36,13 @@ export interface TokenStorage {
   clearAll(): Promise<void>;
 }
 
+export interface SecretStorage {
+  setSecret(key: string, value: string): Promise<void>;
+  getSecret(key: string): Promise<string | null>;
+  deleteSecret(key: string): Promise<void>;
+  listSecrets(): Promise<string[]>;
+}
+
 export enum TokenStorageType {
   KEYCHAIN = 'keychain',
   ENCRYPTED_FILE = 'encrypted_file',

From e750da987d7e7a2be5e6e7bf6e4d2302d187a8e2 Mon Sep 17 00:00:00 2001
From: hritan <48129645+hritan@users.noreply.github.com>
Date: Tue, 28 Oct 2025 19:05:48 +0000
Subject: [PATCH 62/73] chore: migrate console.error in useGeminiStream
 (#12157)

Co-authored-by: Hriday Taneja <hridayt@google.com>
---
 packages/cli/src/ui/hooks/useGeminiStream.test.tsx | 9 +++++----
 packages/cli/src/ui/hooks/useGeminiStream.ts       | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index ae3566feba..1c7a28e231 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -33,6 +33,7 @@ import {
   ToolErrorType,
   ToolConfirmationOutcome,
   tokenLimit,
+  debugLogger,
 } from '@google/gemini-cli-core';
 import type { Part, PartListUnion } from '@google/genai';
 import type { UseHistoryManagerReturn } from './useHistoryManager.js';
@@ -1708,8 +1709,8 @@ describe('useGeminiStream', () => {
     });
 
     it('should handle errors gracefully when auto-approving tool calls', async () => {
-      const consoleSpy = vi
-        .spyOn(console, 'error')
+      const debuggerSpy = vi
+        .spyOn(debugLogger, 'warn')
         .mockImplementation(() => {});
       const mockOnConfirmSuccess = vi.fn().mockResolvedValue(undefined);
       const mockOnConfirmError = vi
@@ -1790,12 +1791,12 @@ describe('useGeminiStream', () => {
       expect(mockOnConfirmError).toHaveBeenCalledTimes(1);
 
       // Error should be logged
-      expect(consoleSpy).toHaveBeenCalledWith(
+      expect(debuggerSpy).toHaveBeenCalledWith(
         'Failed to auto-approve tool call call2:',
         expect.any(Error),
       );
 
-      consoleSpy.mockRestore();
+      debuggerSpy.mockRestore();
     });
 
     it('should skip tool calls without confirmationDetails', async () => {
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index 851ccbc0b1..fce4112735 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -35,6 +35,7 @@ import {
   promptIdContext,
   WRITE_FILE_TOOL_NAME,
   tokenLimit,
+  debugLogger,
   runInDevTraceSpan,
 } from '@google/gemini-cli-core';
 import { type Part, type PartListUnion, FinishReason } from '@google/genai';
@@ -162,7 +163,7 @@ export const useGeminiStream = (
               completedToolCallsFromScheduler,
             );
         } catch (error) {
-          console.error(
+          debugLogger.warn(
             `Error recording completed tool call information: ${error}`,
           );
         }
@@ -1004,7 +1005,7 @@ export const useGeminiStream = (
                 ToolConfirmationOutcome.ProceedOnce,
               );
             } catch (error) {
-              console.error(
+              debugLogger.warn(
                 `Failed to auto-approve tool call ${call.request.callId}:`,
                 error,
               );

From 39eb6ed9ccaec380a0baed3edabd8276c829fc30 Mon Sep 17 00:00:00 2001
From: hritan <48129645+hritan@users.noreply.github.com>
Date: Tue, 28 Oct 2025 19:06:13 +0000
Subject: [PATCH 63/73] chore: migrate console.error in workspaceContext
 (#12167)

Co-authored-by: Hriday Taneja <hridayt@google.com>
---
 packages/core/src/utils/workspaceContext.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/utils/workspaceContext.ts b/packages/core/src/utils/workspaceContext.ts
index 007fed6b54..4531bbbd13 100755
--- a/packages/core/src/utils/workspaceContext.ts
+++ b/packages/core/src/utils/workspaceContext.ts
@@ -54,7 +54,9 @@ export class WorkspaceContext {
         listener();
       } catch (e) {
         // Don't let one listener break others.
-        console.error('Error in WorkspaceContext listener:', e);
+        debugLogger.warn(
+          `Error in WorkspaceContext listener: (${e instanceof Error ? e.message : String(e)})`,
+        );
       }
     }
   }

From ab1f195508cf2d4ab5288279eed0114eedd6bf99 Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Tue, 28 Oct 2025 12:10:40 -0700
Subject: [PATCH 64/73] Change debug drawer keybinding to F12 (#12171)

---
 docs/cli/keyboard-shortcuts.md                             | 2 +-
 packages/cli/src/config/keyBindings.ts                     | 2 +-
 packages/cli/src/ui/components/ConsoleSummaryDisplay.tsx   | 2 +-
 packages/cli/src/ui/components/DetailedMessagesDisplay.tsx | 3 +--
 packages/cli/src/ui/hooks/usePhraseCycler.ts               | 2 +-
 packages/cli/src/ui/keyMatchers.test.ts                    | 6 +++---
 6 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/docs/cli/keyboard-shortcuts.md b/docs/cli/keyboard-shortcuts.md
index 2e7a86c022..b5ff0b8e00 100644
--- a/docs/cli/keyboard-shortcuts.md
+++ b/docs/cli/keyboard-shortcuts.md
@@ -10,10 +10,10 @@ This document lists the available keyboard shortcuts in the Gemini CLI.
 | `Ctrl+C` | Cancel the ongoing request and clear the input. Press twice to exit the application.                                  |
 | `Ctrl+D` | Exit the application if the input is empty. Press twice to confirm.                                                   |
 | `Ctrl+L` | Clear the screen.                                                                                                     |
-| `Ctrl+O` | Toggle the display of the debug console.                                                                              |
 | `Ctrl+S` | Allows long responses to print fully, disabling truncation. Use your terminal's scrollback to view the entire output. |
 | `Ctrl+T` | Toggle the display of the todo list.                                                                                  |
 | `Ctrl+Y` | Toggle auto-approval (YOLO mode) for all tool calls.                                                                  |
+| `F12`    | Toggle the display of the debug console.                                                                              |
 
 ## Input Prompt
 
diff --git a/packages/cli/src/config/keyBindings.ts b/packages/cli/src/config/keyBindings.ts
index 0c5d54146c..14e56b33a3 100644
--- a/packages/cli/src/config/keyBindings.ts
+++ b/packages/cli/src/config/keyBindings.ts
@@ -156,7 +156,7 @@ export const defaultKeyBindings: KeyBindingConfig = {
   [Command.PASTE_CLIPBOARD_IMAGE]: [{ key: 'v', ctrl: true }],
 
   // App level bindings
-  [Command.SHOW_ERROR_DETAILS]: [{ key: 'o', ctrl: true }],
+  [Command.SHOW_ERROR_DETAILS]: [{ key: 'f12' }],
   [Command.SHOW_FULL_TODOS]: [{ key: 't', ctrl: true }],
   [Command.TOGGLE_IDE_CONTEXT_DETAIL]: [{ key: 'g', ctrl: true }],
   [Command.TOGGLE_MARKDOWN]: [{ key: 'm', command: true }],
diff --git a/packages/cli/src/ui/components/ConsoleSummaryDisplay.tsx b/packages/cli/src/ui/components/ConsoleSummaryDisplay.tsx
index 2f2f8a2a75..9d19683e22 100644
--- a/packages/cli/src/ui/components/ConsoleSummaryDisplay.tsx
+++ b/packages/cli/src/ui/components/ConsoleSummaryDisplay.tsx
@@ -27,7 +27,7 @@ export const ConsoleSummaryDisplay: React.FC<ConsoleSummaryDisplayProps> = ({
       {errorCount > 0 && (
         <Text color={theme.status.error}>
           {errorIcon} {errorCount} error{errorCount > 1 ? 's' : ''}{' '}
-          <Text color={theme.text.secondary}>(ctrl+o for details)</Text>
+          <Text color={theme.text.secondary}>(F12 for details)</Text>
         </Text>
       )}
     </Box>
diff --git a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx
index b31d088005..acc3f0622f 100644
--- a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx
+++ b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx
@@ -37,8 +37,7 @@ export const DetailedMessagesDisplay: React.FC<
     >
       <Box marginBottom={1}>
         <Text bold color={theme.text.primary}>
-          Debug Console{' '}
-          <Text color={theme.text.secondary}>(ctrl+o to close)</Text>
+          Debug Console <Text color={theme.text.secondary}>(F12 to close)</Text>
         </Text>
       </Box>
       <MaxSizedBox maxHeight={maxHeight} maxWidth={width - borderAndPadding}>
diff --git a/packages/cli/src/ui/hooks/usePhraseCycler.ts b/packages/cli/src/ui/hooks/usePhraseCycler.ts
index abc14f2d7d..33a0cb40e0 100644
--- a/packages/cli/src/ui/hooks/usePhraseCycler.ts
+++ b/packages/cli/src/ui/hooks/usePhraseCycler.ts
@@ -209,7 +209,7 @@ export const INFORMATIVE_TIPS = [
   'Cancel a request with Ctrl+C, or press twice to exit...',
   'Exit the app with Ctrl+D on an empty line...',
   'Clear your screen at any time with Ctrl+L...',
-  'Toggle the debug console display with Ctrl+O...',
+  'Toggle the debug console display with F12...',
   'See full, untruncated responses with Ctrl+S...',
   'Toggle auto-approval (YOLO mode) for all tools with Ctrl+Y...',
   'Toggle shell mode by typing ! in an empty prompt...',
diff --git a/packages/cli/src/ui/keyMatchers.test.ts b/packages/cli/src/ui/keyMatchers.test.ts
index 46f492f090..37176288cf 100644
--- a/packages/cli/src/ui/keyMatchers.test.ts
+++ b/packages/cli/src/ui/keyMatchers.test.ts
@@ -50,7 +50,7 @@ describe('keyMatchers', () => {
     [Command.OPEN_EXTERNAL_EDITOR]: (key: Key) =>
       key.ctrl && (key.name === 'x' || key.sequence === '\x18'),
     [Command.PASTE_CLIPBOARD_IMAGE]: (key: Key) => key.ctrl && key.name === 'v',
-    [Command.SHOW_ERROR_DETAILS]: (key: Key) => key.ctrl && key.name === 'o',
+    [Command.SHOW_ERROR_DETAILS]: (key: Key) => key.name === 'f12',
     [Command.SHOW_FULL_TODOS]: (key: Key) => key.ctrl && key.name === 't',
     [Command.TOGGLE_IDE_CONTEXT_DETAIL]: (key: Key) =>
       key.ctrl && key.name === 'g',
@@ -212,8 +212,8 @@ describe('keyMatchers', () => {
     // App level bindings
     {
       command: Command.SHOW_ERROR_DETAILS,
-      positive: [createKey('o', { ctrl: true })],
-      negative: [createKey('o'), createKey('e', { ctrl: true })],
+      positive: [createKey('f12')],
+      negative: [createKey('o', { ctrl: true }), createKey('f11')],
     },
     {
       command: Command.SHOW_FULL_TODOS,

From 44bdd3ad11d4698295a967f8a8f4d95015b8cd40 Mon Sep 17 00:00:00 2001
From: Jacob MacDonald <jakemac@google.com>
Date: Tue, 28 Oct 2025 12:13:45 -0700
Subject: [PATCH 65/73] Record model responses with --record-responses (for use
 in testing) (#11894)

---
 docs/integration-tests.md                     |  16 ++
 ...t-compress-interactive.compress-empty.json |  18 --
 ...press-interactive.compress-empty.responses |   0
 ...compress-interactive.compress-failure.json |  40 ----
 ...ess-interactive.compress-failure.responses |   2 +
 ...context-compress-interactive.compress.json |  40 ----
 ...xt-compress-interactive.compress.responses |   3 +
 .../context-compress-interactive.test.ts      |  30 ++-
 integration-tests/test-helper.ts              |  20 +-
 packages/cli/src/config/config.ts             |   8 +
 packages/cli/src/gemini.test.tsx              |   1 +
 packages/core/src/config/config.ts            |   3 +
 .../core/src/core/contentGenerator.test.ts    |  17 ++
 packages/core/src/core/contentGenerator.ts    | 103 +++++----
 .../src/core/fakeContentGenerator.test.ts     | 211 ++++++++----------
 .../core/src/core/fakeContentGenerator.ts     |  99 ++++----
 .../core/recordingContentGenerator.test.ts    | 151 +++++++++++++
 .../src/core/recordingContentGenerator.ts     | 112 ++++++++++
 packages/core/src/index.ts                    |   1 +
 19 files changed, 549 insertions(+), 326 deletions(-)
 delete mode 100644 integration-tests/context-compress-interactive.compress-empty.json
 create mode 100644 integration-tests/context-compress-interactive.compress-empty.responses
 delete mode 100644 integration-tests/context-compress-interactive.compress-failure.json
 create mode 100644 integration-tests/context-compress-interactive.compress-failure.responses
 delete mode 100644 integration-tests/context-compress-interactive.compress.json
 create mode 100644 integration-tests/context-compress-interactive.compress.responses
 create mode 100644 packages/core/src/core/recordingContentGenerator.test.ts
 create mode 100644 packages/core/src/core/recordingContentGenerator.ts

diff --git a/docs/integration-tests.md b/docs/integration-tests.md
index 24377c1934..341c8e2899 100644
--- a/docs/integration-tests.md
+++ b/docs/integration-tests.md
@@ -56,6 +56,22 @@ To run a single test by its name, use the `--test-name-pattern` flag:
 npm run test:e2e -- --test-name-pattern "reads a file"
 ```
 
+### Regenerating model responses
+
+Some integration tests use faked out model responses, which may need to be
+regenerated from time to time as the implementations change.
+
+To regenerate these golden files, set the REGENERATE_MODEL_GOLDENS environment
+variable to "true" when running the tests, for example:
+
+**WARNING**: If running locally you should review these updated responses for
+any information about yourself or your system that gemini may have included in
+these responses.
+
+```bash
+REGENERATE_MODEL_GOLDENS="true" npm run test:e2e
+```
+
 ### Deflaking a test
 
 Before adding a **new** integration test, you should test it at least 5 times
diff --git a/integration-tests/context-compress-interactive.compress-empty.json b/integration-tests/context-compress-interactive.compress-empty.json
deleted file mode 100644
index 5366bf317b..0000000000
--- a/integration-tests/context-compress-interactive.compress-empty.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "generateContent": [
-    {
-      "candidates": [
-        {
-          "content": {
-            "role": "model",
-            "parts": [
-              {
-                "text": "This is more than the 5 tokens we return below which will trigger an error"
-              }
-            ]
-          }
-        }
-      ]
-    }
-  ]
-}
diff --git a/integration-tests/context-compress-interactive.compress-empty.responses b/integration-tests/context-compress-interactive.compress-empty.responses
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integration-tests/context-compress-interactive.compress-failure.json b/integration-tests/context-compress-interactive.compress-failure.json
deleted file mode 100644
index 939189366b..0000000000
--- a/integration-tests/context-compress-interactive.compress-failure.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "generateContent": [
-    {
-      "candidates": [
-        {
-          "content": {
-            "role": "model",
-            "parts": [
-              {
-                "text": "This is more than the 5 tokens we return below which will trigger an error"
-              }
-            ]
-          }
-        }
-      ]
-    }
-  ],
-  "generateContentStream": [
-    [
-      {
-        "candidates": [
-          {
-            "content": {
-              "role": "model",
-              "parts": [
-                {
-                  "text": "The initial response from the model"
-                }
-              ]
-            },
-            "finishReason": "STOP"
-          }
-        ],
-        "usageMetadata": {
-          "promptTokenCount": 5
-        }
-      }
-    ]
-  ]
-}
diff --git a/integration-tests/context-compress-interactive.compress-failure.responses b/integration-tests/context-compress-interactive.compress-failure.responses
new file mode 100644
index 0000000000..a70004c5d3
--- /dev/null
+++ b/integration-tests/context-compress-interactive.compress-failure.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"thought":true,"text":"**Observing Initial Conditions**\n\nI'm currently focused on the initial context. I've taken note of the provided date, OS, and working directory. I'm also carefully examining the file structure presented within the current working directory. It's helping me understand the starting point for further analysis.\n\n\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12270,"totalTokenCount":12316,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12270}],"thoughtsTokenCount":46}},{"candidates":[{"content":{"parts":[{"thought":true,"text":"**Assessing User Intent**\n\nI'm now shifting my focus. I've successfully registered the provided data and file structure. My current task is to understand the user's ultimate goal, given the information provided. The \"Hello.\" command is straightforward, but I'm checking if there's an underlying objective.\n\n\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12270,"totalTokenCount":12341,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12270}],"thoughtsTokenCount":71}},{"candidates":[{"content":{"parts":[{"thoughtSignature":"CiQB0e2Kb3dRh+BYdbZvmulSN2Pwbc75DfQOT3H4EN0rn039hoMKfwHR7YpvvyqNKoxXAiCbYw3gbcTr/+pegUpgnsIrt8oQPMytFMjKSsMyshfygc21T2MkyuI6Q5I/fNCcHROWexdZnIeppVCDB2TarN4LGW4T9Yci6n/ynMMFT2xc2/vyHpkDgRM7avhMElnBhuxAY+e4TpxkZIncGWCEHP1TouoKpgEB0e2Kb8Xpwm0hiKhPt2ZLizpxjk+CVtcbnlgv69xo5VsuQ+iNyrVGBGRwNx+eTeNGdGpn6e73WOCZeP91FwOZe7URyL12IA6E6gYWqw0kXJR4hO4p6Lwv49E3+FRiG2C4OKDF8LF5XorYyCHSgBFT1/RUAVj81GDTx1xxtmYKN3xq8Ri+HsPbqU/FM/jtNZKkXXAtufw2Bmw8lJfmugENIv/TQI7xCo8BAdHtim8KgAXJfZ7ASfutVLKTylQeaslyB/SmcHJ0ZiNr5j8WP1prZdb6XnZZ1ZNbhjxUf/ymoxHKGvtTPBgLE9azMj8Lx/k0clhd2a+wNsiIqW9qCzlVah0tBMytpQUjIDtQe9Hj4LLUprF9PUe/xJkj000Z0ZzsgFm2ncdTWZTdkhCQDpyETVAxdE+oklwKJAHR7YpvUjSkD6KwY1gLrOsHKy0UNfn2lMbxjVetKNMVBRqsTg==","text":"Hello."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":12270,"totalTokenCount":12341,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12270}],"thoughtsTokenCount":71}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"<state_snapshot>\n    <overall_goal>\n        <!-- The user has not yet specified a goal. -->\n    </overall_goal>\n\n    <key_knowledge>\n       - OS: linux\n        - Date: Friday, October 24, 2025\n    </key_knowledge>\n\n    <file_system_state>\n       - OBSERVED: The directory contains `telemetry.log` and a `.gemini/` directory.\n        - OBSERVED: The `.gemini/` directory contains `settings.json` and `settings.json.orig`.\n    </file_system_state>\n\n    <recent_actions>\n        - The user initiated the chat.\n    </recent_actions>\n\n    <current_plan>\n        1. [TODO] Await the user's first instruction to formulate a plan.\n    </current_plan>\n</state_snapshot>"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":983,"candidatesTokenCount":299,"totalTokenCount":1637,"promptTokensDetails":[{"modality":"TEXT","tokenCount":983}],"thoughtsTokenCount":355}}}
diff --git a/integration-tests/context-compress-interactive.compress.json b/integration-tests/context-compress-interactive.compress.json
deleted file mode 100644
index b9d470fc9c..0000000000
--- a/integration-tests/context-compress-interactive.compress.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "generateContent": [
-    {
-      "candidates": [
-        {
-          "content": {
-            "role": "model",
-            "parts": [
-              {
-                "text": "A summary of the conversation."
-              }
-            ]
-          }
-        }
-      ]
-    }
-  ],
-  "generateContentStream": [
-    [
-      {
-        "candidates": [
-          {
-            "content": {
-              "role": "model",
-              "parts": [
-                {
-                  "text": "The initial response from the model"
-                }
-              ]
-            },
-            "finishReason": "STOP"
-          }
-        ],
-        "usageMetadata": {
-          "promptTokenCount": 100000
-        }
-      }
-    ]
-  ]
-}
diff --git a/integration-tests/context-compress-interactive.compress.responses b/integration-tests/context-compress-interactive.compress.responses
new file mode 100644
index 0000000000..48ecaf5bda
--- /dev/null
+++ b/integration-tests/context-compress-interactive.compress.responses
@@ -0,0 +1,3 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"thought":true,"text":"**Generating a Story**\n\nI've crafted the robot story. The narrative is complete and meets the length requirement. Now, I'm getting ready to use the `write_file` tool to save it. I'm choosing the filename `robot_story.txt` as a default.\n\n\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"totalTokenCount":12352,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"thoughtsTokenCount":70}},{"candidates":[{"finishReason":"MALFORMED_FUNCTION_CALL","index":0}],"usageMetadata":{"promptTokenCount":12282,"totalTokenCount":12282,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"thought":true,"text":"**Drafting the Narrative**\n\nI'm currently focused on the narrative's central conflict. I'm aiming for a compelling story about a robot and am working to keep the word count tight. The \"THE _END.\" conclusion is proving challenging to integrate organically. I need to make the ending feel natural and satisfying.\n\n\n"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"totalTokenCount":12326,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}},{"candidates":[{"content":{"parts":[{"thoughtSignature":"CikB0e2Kb7zkpgRyJXXNt6ykO/+FoOglhrKxjLgoESrgafzIZak2Ofxo1gpaAdHtim9aG7MvpXlIg+n2zgmcDBWOPXtvQHxhE9k8pR+DO8i2jIe3tMWLxdN944XpUlR9vaNmVdtSRMKr4MhB/t1R3WSWR3QYhk7MEQxnjYR7cv/pR9viwZyFCoYBAdHtim/xKmMl/S+U8p+p9848q4agsL/STufluXewPqL3uJSinZbN0Z4jTYfMzXKldhDYIonvw3Crn/Y11oAjnT656Sx0kkKtavAXbiU/WsGyDxZbNhLofnJGQxruljPGztxkKawz1cTiQnddnQRfLddhy+3iJIOSh6ZpYq9uGHz3PzVkUuQ=","text":"Unit 734 whirred, its optical sensors scanning the desolate junkyard. For years, its purpose had been clear: compress refuse, maintain order. But today, a glint of tarnished silver beneath a rusted hull"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"candidatesTokenCount":47,"totalTokenCount":12373,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}},{"candidates":[{"content":{"parts":[{"text":" caught its attention. It was a discarded music box, its delicate gears jammed, a faint, melancholic tune trapped within.\n\n734 usually crushed, never salvaged. Yet, a new directive flickered in its circuits – curiosity."}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"candidatesTokenCount":95,"totalTokenCount":12421,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}},{"candidates":[{"content":{"parts":[{"text":" With surprising gentleness for its formidable pincers, it retrieved the box. Back in its monochrome workshop, it meticulously cleaned and repaired. Each tiny spring, each worn tooth, became a puzzle.\n\nHours later, a soft, ethereal melody filled the clang"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"candidatesTokenCount":146,"totalTokenCount":12472,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}},{"candidates":[{"content":{"parts":[{"text":"orous space. The music box sang. 734 felt a strange, new sensation – a warmth in its core processors, a hum of contentment. Its existence, once solely utilitarian, now held a note of beauty, a hint of something more than"}],"role":"model"},"index":0}],"usageMetadata":{"promptTokenCount":12282,"candidatesTokenCount":197,"totalTokenCount":12523,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}},{"candidates":[{"content":{"parts":[{"text":" just compression. It had fixed a broken song, and in doing so, had found a different kind of purpose. THE_END."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":12282,"candidatesTokenCount":224,"totalTokenCount":12550,"cachedContentTokenCount":11883,"promptTokensDetails":[{"modality":"TEXT","tokenCount":12282}],"cacheTokensDetails":[{"modality":"TEXT","tokenCount":11883}],"thoughtsTokenCount":44}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"scratchpad\nThe user's overall goal was to write a 200-word story about a robot, ending with \"THE_END.\". The agent successfully completed this task.\n\nSince the task is complete, the snapshot should reflect that the goal has been achieved and there are no further actions planned regarding the story.\n\nLet's break down the required sections for the snapshot:\n\n1.  **Overall Goal**: The initial goal was to write a story. This has been completed.\n2.  **Key Knowledge**: No specific key knowledge was provided or discovered during this simple interaction beyond the prompt's constraints.\n3.  **File System State**: No files were read, modified, or created by the agent relevant to the task. The initial file system state was merely provided for context.\n4.  **Recent Actions**: The agent wrote the story.\n5.  **Current Plan**: The plan was to write the story, which is now done. Therefore, the plan should indicate completion."},{"text":"<state_snapshot>\n    <overall_goal>\n        Write a 200-word story about a robot, ending with \"THE_END.\".\n    </overall_goal>\n\n    <key_knowledge>\n        - The story must be approximately 200 words.\n        - The story must end with the exact phrase \"THE_END.\"\n    </key_knowledge>\n\n    <file_system_state>\n        <!-- No relevant file system interactions occurred during this task. -->\n    </file_system_state>\n\n    <recent_actions>\n        - Generated a 200-word story about a robot, successfully ending it with \"THE_END.\".\n    </recent_actions>\n\n    <current_plan>\n        1. [DONE] Write a 200-word story about a robot.\n        2. [DONE] Ensure the story ends with the exact text \"THE_END.\".\n    </current_plan>\n</state_snapshot>"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":1223,"candidatesTokenCount":424,"totalTokenCount":1647,"promptTokensDetails":[{"modality":"TEXT","tokenCount":1223}]}}}
diff --git a/integration-tests/context-compress-interactive.test.ts b/integration-tests/context-compress-interactive.test.ts
index 5be9b73141..49f5e2aa7c 100644
--- a/integration-tests/context-compress-interactive.test.ts
+++ b/integration-tests/context-compress-interactive.test.ts
@@ -20,26 +20,29 @@ describe('Interactive Mode', () => {
   });
 
   it('should trigger chat compression with /compress command', async () => {
-    await rig.setup('interactive-compress-test', {
+    await rig.setup('interactive-compress-success', {
       fakeResponsesPath: join(
         import.meta.dirname,
-        'context-compress-interactive.compress.json',
+        'context-compress-interactive.compress.responses',
       ),
     });
 
     const run = await rig.runInteractive();
 
-    await run.type('Initial prompt');
-    await run.type('\r');
+    await run.sendKeys(
+      'Write a 200 word story about a robot. The story MUST end with the text THE_END followed by a period.',
+    );
+    await run.sendKeys('\r');
 
-    await run.expectText('The initial response from the model', 5000);
+    // Wait for the specific end marker.
+    await run.expectText('THE_END.', 30000);
 
     await run.type('/compress');
     await run.type('\r');
 
     const foundEvent = await rig.waitForTelemetryEvent(
       'chat_compression',
-      5000,
+      25000,
     );
     expect(foundEvent, 'chat_compression telemetry event was not found').toBe(
       true,
@@ -48,24 +51,27 @@ describe('Interactive Mode', () => {
     await run.expectText('Chat history compressed', 5000);
   });
 
-  it('should handle compression failure on token inflation', async () => {
+  // TODO: Context compression is broken and doesn't include the system
+  // instructions or tool counts, so it thinks compression is beneficial when
+  // it is in fact not.
+  it.skip('should handle compression failure on token inflation', async () => {
     await rig.setup('interactive-compress-failure', {
       fakeResponsesPath: join(
         import.meta.dirname,
-        'context-compress-interactive.compress-failure.json',
+        'context-compress-interactive.compress-failure.responses',
       ),
     });
 
     const run = await rig.runInteractive();
 
-    await run.type('Initial prompt');
+    await run.type('Respond with exactly "Hello" followed by a period');
     await run.type('\r');
 
-    await run.expectText('The initial response from the model', 25000);
+    await run.expectText('Hello.', 25000);
 
     await run.type('/compress');
     await run.type('\r');
-    await run.expectText('compression was not beneficial', 5000);
+    await run.expectText('compression was not beneficial', 25000);
 
     // Verify no telemetry event is logged for NOOP
     const foundEvent = await rig.waitForTelemetryEvent(
@@ -82,7 +88,7 @@ describe('Interactive Mode', () => {
     rig.setup('interactive-compress-empty', {
       fakeResponsesPath: join(
         import.meta.dirname,
-        'context-compress-interactive.compress-empty.json',
+        'context-compress-interactive.compress-empty.responses',
       ),
     });
 
diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts
index d5a9026726..35f9c4100e 100644
--- a/integration-tests/test-helper.ts
+++ b/integration-tests/test-helper.ts
@@ -255,7 +255,10 @@ export class TestRig {
   testDir: string | null;
   testName?: string;
   _lastRunStdout?: string;
+  // Path to the copied fake responses file for this test.
   fakeResponsesPath?: string;
+  // Original fake responses file path for rewriting goldens in record mode.
+  originalFakeResponsesPath?: string;
 
   constructor() {
     this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
@@ -275,7 +278,10 @@ export class TestRig {
     mkdirSync(this.testDir, { recursive: true });
     if (options.fakeResponsesPath) {
       this.fakeResponsesPath = join(this.testDir, 'fake-responses.json');
-      fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
+      this.originalFakeResponsesPath = options.fakeResponsesPath;
+      if (process.env['REGENERATE_MODEL_GOLDENS'] !== 'true') {
+        fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
+      }
     }
 
     // Create a settings file to point the CLI to the local collector
@@ -344,7 +350,11 @@ export class TestRig {
       ? extraInitialArgs
       : [this.bundlePath, ...extraInitialArgs];
     if (this.fakeResponsesPath) {
-      initialArgs.push('--fake-responses', this.fakeResponsesPath);
+      if (process.env['REGENERATE_MODEL_GOLDENS'] === 'true') {
+        initialArgs.push('--record-responses', this.fakeResponsesPath);
+      } else {
+        initialArgs.push('--fake-responses', this.fakeResponsesPath);
+      }
     }
     return { command, initialArgs };
   }
@@ -555,6 +565,12 @@ export class TestRig {
   }
 
   async cleanup() {
+    if (
+      process.env['REGENERATE_MODEL_GOLDENS'] === 'true' &&
+      this.fakeResponsesPath
+    ) {
+      fs.copyFileSync(this.fakeResponsesPath, this.originalFakeResponsesPath!);
+    }
     // Clean up test directory
     if (this.testDir && !env['KEEP_OUTPUT']) {
       try {
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 9d9630634f..ffc4d95353 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -74,6 +74,7 @@ export interface CliArgs {
   useWriteTodos: boolean | undefined;
   outputFormat: string | undefined;
   fakeResponses: string | undefined;
+  recordResponses: string | undefined;
 }
 
 export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -202,6 +203,12 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
         .option('fake-responses', {
           type: 'string',
           description: 'Path to a file with fake model responses for testing.',
+          hidden: true,
+        })
+        .option('record-responses', {
+          type: 'string',
+          description: 'Path to a file to record model responses for testing.',
+          hidden: true,
         })
         .deprecateOption(
           'prompt',
@@ -700,6 +707,7 @@ export async function loadCliConfig(
     codebaseInvestigatorSettings:
       settings.experimental?.codebaseInvestigatorSettings,
     fakeResponses: argv.fakeResponses,
+    recordResponses: argv.recordResponses,
     retryFetchErrors: settings.general?.retryFetchErrors ?? false,
     ptyInfo: ptyInfo?.name,
   });
diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx
index 645928cfb1..f8e16d9313 100644
--- a/packages/cli/src/gemini.test.tsx
+++ b/packages/cli/src/gemini.test.tsx
@@ -364,6 +364,7 @@ describe('gemini.tsx main function kitty protocol', () => {
       useWriteTodos: undefined,
       outputFormat: undefined,
       fakeResponses: undefined,
+      recordResponses: undefined,
     });
 
     await main();
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 969dc0a91e..6b683ac5ac 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -284,6 +284,7 @@ export interface ConfigParameters {
   retryFetchErrors?: boolean;
   enableShellOutputEfficiency?: boolean;
   fakeResponses?: string;
+  recordResponses?: string;
   ptyInfo?: string;
   disableYoloMode?: boolean;
 }
@@ -383,6 +384,7 @@ export class Config {
   private readonly retryFetchErrors: boolean;
   private readonly enableShellOutputEfficiency: boolean;
   readonly fakeResponses?: string;
+  readonly recordResponses?: string;
   private readonly disableYoloMode: boolean;
 
   constructor(params: ConfigParameters) {
@@ -493,6 +495,7 @@ export class Config {
     this.extensionManagement = params.extensionManagement ?? true;
     this.storage = new Storage(this.targetDir);
     this.fakeResponses = params.fakeResponses;
+    this.recordResponses = params.recordResponses;
     this.enablePromptCompletion = params.enablePromptCompletion ?? false;
     this.fileExclusions = new FileExclusions(this);
     this.eventEmitter = params.eventEmitter;
diff --git a/packages/core/src/core/contentGenerator.test.ts b/packages/core/src/core/contentGenerator.test.ts
index e1431b3550..fbc350a884 100644
--- a/packages/core/src/core/contentGenerator.test.ts
+++ b/packages/core/src/core/contentGenerator.test.ts
@@ -16,6 +16,7 @@ import { GoogleGenAI } from '@google/genai';
 import type { Config } from '../config/config.js';
 import { LoggingContentGenerator } from './loggingContentGenerator.js';
 import { FakeContentGenerator } from './fakeContentGenerator.js';
+import { RecordingContentGenerator } from './recordingContentGenerator.js';
 
 vi.mock('../code_assist/codeAssist.js');
 vi.mock('@google/genai');
@@ -45,6 +46,22 @@ describe('createContentGenerator', () => {
     expect(generator).toEqual(mockGenerator);
   });
 
+  it('should create a RecordingContentGenerator', async () => {
+    const fakeResponsesFile = 'fake/responses.yaml';
+    const recordResponsesFile = 'record/responses.yaml';
+    const mockConfigWithRecordResponses = {
+      fakeResponses: fakeResponsesFile,
+      recordResponses: recordResponsesFile,
+    } as unknown as Config;
+    const generator = await createContentGenerator(
+      {
+        authType: AuthType.USE_GEMINI,
+      },
+      mockConfigWithRecordResponses,
+    );
+    expect(generator).toBeInstanceOf(RecordingContentGenerator);
+  });
+
   it('should create a CodeAssistContentGenerator', async () => {
     const mockGenerator = {} as unknown as ContentGenerator;
     vi.mocked(createCodeAssistContentGenerator).mockResolvedValue(
diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts
index 487356a19e..68b30bf936 100644
--- a/packages/core/src/core/contentGenerator.ts
+++ b/packages/core/src/core/contentGenerator.ts
@@ -20,6 +20,7 @@ import type { UserTierId } from '../code_assist/types.js';
 import { LoggingContentGenerator } from './loggingContentGenerator.js';
 import { InstallationManager } from '../utils/installationManager.js';
 import { FakeContentGenerator } from './fakeContentGenerator.js';
+import { RecordingContentGenerator } from './recordingContentGenerator.js';
 
 /**
  * Interface abstracting the core functionalities for generating content and counting tokens.
@@ -106,55 +107,61 @@ export async function createContentGenerator(
   gcConfig: Config,
   sessionId?: string,
 ): Promise<ContentGenerator> {
-  if (gcConfig.fakeResponses) {
-    return FakeContentGenerator.fromFile(gcConfig.fakeResponses);
-  }
-
-  const version = process.env['CLI_VERSION'] || process.version;
-  const userAgent = `GeminiCLI/${version} (${process.platform}; ${process.arch})`;
-  const baseHeaders: Record<string, string> = {
-    'User-Agent': userAgent,
-  };
-
-  if (
-    config.authType === AuthType.LOGIN_WITH_GOOGLE ||
-    config.authType === AuthType.CLOUD_SHELL
-  ) {
-    const httpOptions = { headers: baseHeaders };
-    return new LoggingContentGenerator(
-      await createCodeAssistContentGenerator(
-        httpOptions,
-        config.authType,
-        gcConfig,
-        sessionId,
-      ),
-      gcConfig,
-    );
-  }
-
-  if (
-    config.authType === AuthType.USE_GEMINI ||
-    config.authType === AuthType.USE_VERTEX_AI
-  ) {
-    let headers: Record<string, string> = { ...baseHeaders };
-    if (gcConfig?.getUsageStatisticsEnabled()) {
-      const installationManager = new InstallationManager();
-      const installationId = installationManager.getInstallationId();
-      headers = {
-        ...headers,
-        'x-gemini-api-privileged-user-id': `${installationId}`,
-      };
+  const generator = await (async () => {
+    if (gcConfig.fakeResponses) {
+      return FakeContentGenerator.fromFile(gcConfig.fakeResponses);
+    }
+    const version = process.env['CLI_VERSION'] || process.version;
+    const userAgent = `GeminiCLI/${version} (${process.platform}; ${process.arch})`;
+    const baseHeaders: Record<string, string> = {
+      'User-Agent': userAgent,
+    };
+    if (
+      config.authType === AuthType.LOGIN_WITH_GOOGLE ||
+      config.authType === AuthType.CLOUD_SHELL
+    ) {
+      const httpOptions = { headers: baseHeaders };
+      return new LoggingContentGenerator(
+        await createCodeAssistContentGenerator(
+          httpOptions,
+          config.authType,
+          gcConfig,
+          sessionId,
+        ),
+        gcConfig,
+      );
     }
-    const httpOptions = { headers };
 
-    const googleGenAI = new GoogleGenAI({
-      apiKey: config.apiKey === '' ? undefined : config.apiKey,
-      vertexai: config.vertexai,
-      httpOptions,
-    });
-    return new LoggingContentGenerator(googleGenAI.models, gcConfig);
+    if (
+      config.authType === AuthType.USE_GEMINI ||
+      config.authType === AuthType.USE_VERTEX_AI
+    ) {
+      let headers: Record<string, string> = { ...baseHeaders };
+      if (gcConfig?.getUsageStatisticsEnabled()) {
+        const installationManager = new InstallationManager();
+        const installationId = installationManager.getInstallationId();
+        headers = {
+          ...headers,
+          'x-gemini-api-privileged-user-id': `${installationId}`,
+        };
+      }
+      const httpOptions = { headers };
+
+      const googleGenAI = new GoogleGenAI({
+        apiKey: config.apiKey === '' ? undefined : config.apiKey,
+        vertexai: config.vertexai,
+        httpOptions,
+      });
+      return new LoggingContentGenerator(googleGenAI.models, gcConfig);
+    }
+    throw new Error(
+      `Error creating contentGenerator: Unsupported authType: ${config.authType}`,
+    );
+  })();
+
+  if (gcConfig.recordResponses) {
+    return new RecordingContentGenerator(generator, gcConfig.recordResponses);
   }
-  throw new Error(
-    `Error creating contentGenerator: Unsupported authType: ${config.authType}`,
-  );
+
+  return generator;
 }
diff --git a/packages/core/src/core/fakeContentGenerator.test.ts b/packages/core/src/core/fakeContentGenerator.test.ts
index 5ccd92d5e3..de8306e516 100644
--- a/packages/core/src/core/fakeContentGenerator.test.ts
+++ b/packages/core/src/core/fakeContentGenerator.test.ts
@@ -5,16 +5,18 @@
  */
 
 import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { FakeContentGenerator } from './fakeContentGenerator.js';
+import {
+  FakeContentGenerator,
+  type FakeResponse,
+} from './fakeContentGenerator.js';
 import { promises } from 'node:fs';
-import type { FakeResponses } from './fakeContentGenerator.js';
-import type {
+import {
   GenerateContentResponse,
-  CountTokensResponse,
-  EmbedContentResponse,
-  GenerateContentParameters,
-  CountTokensParameters,
-  EmbedContentParameters,
+  type CountTokensResponse,
+  type EmbedContentResponse,
+  type GenerateContentParameters,
+  type CountTokensParameters,
+  type EmbedContentParameters,
 } from '@google/genai';
 
 vi.mock('node:fs', async (importOriginal) => {
@@ -31,32 +33,41 @@ vi.mock('node:fs', async (importOriginal) => {
 const mockReadFile = vi.mocked(promises.readFile);
 
 describe('FakeContentGenerator', () => {
-  const fakeResponses: FakeResponses = {
-    generateContent: [
+  const fakeGenerateContentResponse: FakeResponse = {
+    method: 'generateContent',
+    response: {
+      candidates: [
+        { content: { parts: [{ text: 'response1' }], role: 'model' } },
+      ],
+    } as GenerateContentResponse,
+  };
+
+  const fakeGenerateContentStreamResponse: FakeResponse = {
+    method: 'generateContentStream',
+    response: [
       {
         candidates: [
-          { content: { parts: [{ text: 'response1' }], role: 'model' } },
+          { content: { parts: [{ text: 'chunk1' }], role: 'model' } },
+        ],
+      },
+      {
+        candidates: [
+          { content: { parts: [{ text: 'chunk2' }], role: 'model' } },
         ],
       },
     ] as GenerateContentResponse[],
-    generateContentStream: [
-      [
-        {
-          candidates: [
-            { content: { parts: [{ text: 'chunk1' }], role: 'model' } },
-          ],
-        },
-        {
-          candidates: [
-            { content: { parts: [{ text: 'chunk2' }], role: 'model' } },
-          ],
-        },
-      ],
-    ] as GenerateContentResponse[][],
-    countTokens: [{ totalTokens: 10 }] as CountTokensResponse[],
-    embedContent: [
-      { embeddings: [{ values: [1, 2, 3] }] },
-    ] as EmbedContentResponse[],
+  };
+
+  const fakeCountTokensResponse: FakeResponse = {
+    method: 'countTokens',
+    response: { totalTokens: 10 } as CountTokensResponse,
+  };
+
+  const fakeEmbedContentResponse: FakeResponse = {
+    method: 'embedContent',
+    response: {
+      embeddings: [{ values: [1, 2, 3] }],
+    } as EmbedContentResponse,
   };
 
   beforeEach(() => {
@@ -64,90 +75,86 @@ describe('FakeContentGenerator', () => {
   });
 
   it('should return responses for generateContent', async () => {
-    const generator = new FakeContentGenerator(fakeResponses);
+    const generator = new FakeContentGenerator([fakeGenerateContentResponse]);
     const response = await generator.generateContent(
       {} as GenerateContentParameters,
       'id',
     );
-    expect(response).toEqual(fakeResponses.generateContent[0]);
-  });
-
-  it('should throw error when no more generateContent responses', async () => {
-    const generator = new FakeContentGenerator({
-      ...fakeResponses,
-      generateContent: [],
-    });
-    await expect(
-      generator.generateContent({} as GenerateContentParameters, 'id'),
-    ).rejects.toThrowError('No more mock responses for generateContent');
+    expect(response).instanceOf(GenerateContentResponse);
+    expect(response).toEqual(fakeGenerateContentResponse.response);
   });
 
   it('should return responses for generateContentStream', async () => {
-    const generator = new FakeContentGenerator(fakeResponses);
+    const generator = new FakeContentGenerator([
+      fakeGenerateContentStreamResponse,
+    ]);
     const stream = await generator.generateContentStream(
       {} as GenerateContentParameters,
       'id',
     );
     const responses = [];
     for await (const response of stream) {
+      expect(response).instanceOf(GenerateContentResponse);
       responses.push(response);
     }
-    expect(responses).toEqual(fakeResponses.generateContentStream[0]);
-  });
-
-  it('should throw error when no more generateContentStream responses', async () => {
-    const generator = new FakeContentGenerator({
-      ...fakeResponses,
-      generateContentStream: [],
-    });
-    await expect(
-      generator.generateContentStream({} as GenerateContentParameters, 'id'),
-    ).rejects.toThrow('No more mock responses for generateContentStream');
+    expect(responses).toEqual(fakeGenerateContentStreamResponse.response);
   });
 
   it('should return responses for countTokens', async () => {
-    const generator = new FakeContentGenerator(fakeResponses);
+    const generator = new FakeContentGenerator([fakeCountTokensResponse]);
     const response = await generator.countTokens({} as CountTokensParameters);
-    expect(response).toEqual(fakeResponses.countTokens[0]);
-  });
-
-  it('should throw error when no more countTokens responses', async () => {
-    const generator = new FakeContentGenerator({
-      ...fakeResponses,
-      countTokens: [],
-    });
-    await expect(
-      generator.countTokens({} as CountTokensParameters),
-    ).rejects.toThrowError('No more mock responses for countTokens');
+    expect(response).toEqual(fakeCountTokensResponse.response);
   });
 
   it('should return responses for embedContent', async () => {
-    const generator = new FakeContentGenerator(fakeResponses);
+    const generator = new FakeContentGenerator([fakeEmbedContentResponse]);
     const response = await generator.embedContent({} as EmbedContentParameters);
-    expect(response).toEqual(fakeResponses.embedContent[0]);
+    expect(response).toEqual(fakeEmbedContentResponse.response);
   });
 
-  it('should throw error when no more embedContent responses', async () => {
-    const generator = new FakeContentGenerator({
-      ...fakeResponses,
-      embedContent: [],
-    });
+  it('should handle a mixture of calls', async () => {
+    const fakeResponses = [
+      fakeGenerateContentResponse,
+      fakeGenerateContentStreamResponse,
+      fakeCountTokensResponse,
+      fakeEmbedContentResponse,
+    ];
+    const generator = new FakeContentGenerator(fakeResponses);
+    for (const fakeResponse of fakeResponses) {
+      const response = await generator[fakeResponse.method]({} as never, '');
+      if (fakeResponse.method === 'generateContentStream') {
+        const responses = [];
+        for await (const item of response as AsyncGenerator<GenerateContentResponse>) {
+          expect(item).instanceOf(GenerateContentResponse);
+          responses.push(item);
+        }
+        expect(responses).toEqual(fakeResponse.response);
+      } else {
+        expect(response).toEqual(fakeResponse.response);
+      }
+    }
+  });
+
+  it('should throw error when no more responses', async () => {
+    const generator = new FakeContentGenerator([fakeGenerateContentResponse]);
+    await generator.generateContent({} as GenerateContentParameters, 'id');
     await expect(
       generator.embedContent({} as EmbedContentParameters),
     ).rejects.toThrowError('No more mock responses for embedContent');
-  });
-
-  it('should handle multiple calls and exhaust responses', async () => {
-    const generator = new FakeContentGenerator(fakeResponses);
-    await generator.generateContent({} as GenerateContentParameters, 'id');
+    await expect(
+      generator.countTokens({} as CountTokensParameters),
+    ).rejects.toThrowError('No more mock responses for countTokens');
+    await expect(
+      generator.generateContentStream({} as GenerateContentParameters, 'id'),
+    ).rejects.toThrow('No more mock responses for generateContentStream');
     await expect(
       generator.generateContent({} as GenerateContentParameters, 'id'),
-    ).rejects.toThrow();
+    ).rejects.toThrowError('No more mock responses for generateContent');
   });
 
   describe('fromFile', () => {
     it('should create a generator from a file', async () => {
-      const fileContent = JSON.stringify(fakeResponses);
+      const fileContent = JSON.stringify(fakeGenerateContentResponse) + '\n';
       mockReadFile.mockResolvedValue(fileContent);
 
       const generator = await FakeContentGenerator.fromFile('fake-path.json');
@@ -155,51 +162,7 @@ describe('FakeContentGenerator', () => {
         {} as GenerateContentParameters,
         'id',
       );
-      expect(response).toEqual(fakeResponses.generateContent[0]);
-    });
-  });
-
-  describe('constructor with partial responses', () => {
-    it('should handle missing generateContent', async () => {
-      const responses = { ...fakeResponses, generateContent: undefined };
-      const generator = new FakeContentGenerator(
-        responses as unknown as FakeResponses,
-      );
-      await expect(
-        generator.generateContent({} as GenerateContentParameters, 'id'),
-      ).rejects.toThrowError('No more mock responses for generateContent');
-    });
-
-    it('should handle missing generateContentStream', async () => {
-      const responses = { ...fakeResponses, generateContentStream: undefined };
-      const generator = new FakeContentGenerator(
-        responses as unknown as FakeResponses,
-      );
-      await expect(
-        generator.generateContentStream({} as GenerateContentParameters, 'id'),
-      ).rejects.toThrowError(
-        'No more mock responses for generateContentStream',
-      );
-    });
-
-    it('should handle missing countTokens', async () => {
-      const responses = { ...fakeResponses, countTokens: undefined };
-      const generator = new FakeContentGenerator(
-        responses as unknown as FakeResponses,
-      );
-      await expect(
-        generator.countTokens({} as CountTokensParameters),
-      ).rejects.toThrowError('No more mock responses for countTokens');
-    });
-
-    it('should handle missing embedContent', async () => {
-      const responses = { ...fakeResponses, embedContent: undefined };
-      const generator = new FakeContentGenerator(
-        responses as unknown as FakeResponses,
-      );
-      await expect(
-        generator.embedContent({} as EmbedContentParameters),
-      ).rejects.toThrowError('No more mock responses for embedContent');
+      expect(response).toEqual(fakeGenerateContentResponse.response);
     });
   });
 });
diff --git a/packages/core/src/core/fakeContentGenerator.ts b/packages/core/src/core/fakeContentGenerator.ts
index 9ef48b27e7..a464c4f8fa 100644
--- a/packages/core/src/core/fakeContentGenerator.ts
+++ b/packages/core/src/core/fakeContentGenerator.ts
@@ -4,98 +4,113 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type {
-  CountTokensResponse,
+import {
   GenerateContentResponse,
-  GenerateContentParameters,
-  CountTokensParameters,
+  type CountTokensResponse,
+  type GenerateContentParameters,
+  type CountTokensParameters,
   EmbedContentResponse,
-  EmbedContentParameters,
+  type EmbedContentParameters,
 } from '@google/genai';
 import { promises } from 'node:fs';
 import type { ContentGenerator } from './contentGenerator.js';
 import type { UserTierId } from '../code_assist/types.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 
-export type FakeResponses = {
-  generateContent: GenerateContentResponse[];
-  generateContentStream: GenerateContentResponse[][];
-  countTokens: CountTokensResponse[];
-  embedContent: EmbedContentResponse[];
-};
+export type FakeResponse =
+  | {
+      method: 'generateContent';
+      response: GenerateContentResponse;
+    }
+  | {
+      method: 'generateContentStream';
+      response: GenerateContentResponse[];
+    }
+  | {
+      method: 'countTokens';
+      response: CountTokensResponse;
+    }
+  | {
+      method: 'embedContent';
+      response: EmbedContentResponse;
+    };
 
 // A ContentGenerator that responds with canned responses.
 //
 // Typically these would come from a file, provided by the `--fake-responses`
 // CLI argument.
 export class FakeContentGenerator implements ContentGenerator {
-  private responses: FakeResponses;
-  private callCounters = {
-    generateContent: 0,
-    generateContentStream: 0,
-    countTokens: 0,
-    embedContent: 0,
-  };
+  private callCounter = 0;
   userTier?: UserTierId;
 
-  constructor(responses: FakeResponses) {
-    this.responses = {
-      generateContent: responses.generateContent ?? [],
-      generateContentStream: responses.generateContentStream ?? [],
-      countTokens: responses.countTokens ?? [],
-      embedContent: responses.embedContent ?? [],
-    };
-  }
+  constructor(private readonly responses: FakeResponse[]) {}
 
   static async fromFile(filePath: string): Promise<FakeContentGenerator> {
     const fileContent = await promises.readFile(filePath, 'utf-8');
-    const responses = JSON.parse(fileContent) as FakeResponses;
+    const responses = fileContent
+      .split('\n')
+      .filter((line) => line.trim() !== '')
+      .map((line) => JSON.parse(line) as FakeResponse);
     return new FakeContentGenerator(responses);
   }
 
-  private getNextResponse<K extends keyof FakeResponses>(
-    method: K,
-    request: unknown,
-  ): FakeResponses[K][number] {
-    const response = this.responses[method][this.callCounters[method]++];
+  private getNextResponse<
+    M extends FakeResponse['method'],
+    R = Extract<FakeResponse, { method: M }>['response'],
+  >(method: M, request: unknown): R {
+    const response = this.responses[this.callCounter++];
     if (!response) {
       throw new Error(
         `No more mock responses for ${method}, got request:\n` +
           safeJsonStringify(request),
       );
     }
-    return response;
+    if (response.method !== method) {
+      throw new Error(
+        `Unexpected response type, next response was for ${response.method} but expected ${method}`,
+      );
+    }
+    return response.response as R;
   }
 
   async generateContent(
-    _request: GenerateContentParameters,
+    request: GenerateContentParameters,
     _userPromptId: string,
   ): Promise<GenerateContentResponse> {
-    return this.getNextResponse('generateContent', _request);
+    return Object.setPrototypeOf(
+      this.getNextResponse('generateContent', request),
+      GenerateContentResponse.prototype,
+    );
   }
 
   async generateContentStream(
-    _request: GenerateContentParameters,
+    request: GenerateContentParameters,
     _userPromptId: string,
   ): Promise<AsyncGenerator<GenerateContentResponse>> {
-    const responses = this.getNextResponse('generateContentStream', _request);
+    const responses = this.getNextResponse('generateContentStream', request);
     async function* stream() {
       for (const response of responses) {
-        yield response;
+        yield Object.setPrototypeOf(
+          response,
+          GenerateContentResponse.prototype,
+        );
       }
     }
     return stream();
   }
 
   async countTokens(
-    _request: CountTokensParameters,
+    request: CountTokensParameters,
   ): Promise<CountTokensResponse> {
-    return this.getNextResponse('countTokens', _request);
+    return this.getNextResponse('countTokens', request);
   }
 
   async embedContent(
-    _request: EmbedContentParameters,
+    request: EmbedContentParameters,
   ): Promise<EmbedContentResponse> {
-    return this.getNextResponse('embedContent', _request);
+    return Object.setPrototypeOf(
+      this.getNextResponse('embedContent', request),
+      EmbedContentResponse.prototype,
+    );
   }
 }
diff --git a/packages/core/src/core/recordingContentGenerator.test.ts b/packages/core/src/core/recordingContentGenerator.test.ts
new file mode 100644
index 0000000000..c69c62ebfa
--- /dev/null
+++ b/packages/core/src/core/recordingContentGenerator.test.ts
@@ -0,0 +1,151 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  GenerateContentResponse,
+  CountTokensResponse,
+  EmbedContentResponse,
+  GenerateContentParameters,
+  CountTokensParameters,
+  EmbedContentParameters,
+  ContentEmbedding,
+} from '@google/genai';
+import { appendFileSync } from 'node:fs';
+import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
+import { safeJsonStringify } from '../utils/safeJsonStringify.js';
+import type { ContentGenerator } from './contentGenerator.js';
+import { RecordingContentGenerator } from './recordingContentGenerator.js';
+
+vi.mock('node:fs', () => ({
+  appendFileSync: vi.fn(),
+}));
+
+describe('RecordingContentGenerator', () => {
+  let mockRealGenerator: ContentGenerator;
+  let recorder: RecordingContentGenerator;
+  const filePath = '/test/file/responses.json';
+
+  beforeEach(() => {
+    mockRealGenerator = {
+      generateContent: vi.fn(),
+      generateContentStream: vi.fn(),
+      countTokens: vi.fn(),
+      embedContent: vi.fn(),
+    };
+    recorder = new RecordingContentGenerator(mockRealGenerator, filePath);
+    vi.clearAllMocks();
+  });
+
+  it('should record generateContent responses', async () => {
+    const mockResponse = {
+      candidates: [
+        { content: { parts: [{ text: 'response' }], role: 'model' } },
+      ],
+      usageMetadata: { totalTokenCount: 10 },
+    } as GenerateContentResponse;
+    (mockRealGenerator.generateContent as Mock).mockResolvedValue(mockResponse);
+
+    const response = await recorder.generateContent(
+      {} as GenerateContentParameters,
+      'id1',
+    );
+    expect(response).toEqual(mockResponse);
+    expect(mockRealGenerator.generateContent).toHaveBeenCalledWith({}, 'id1');
+
+    expect(appendFileSync).toHaveBeenCalledWith(
+      filePath,
+      safeJsonStringify({
+        method: 'generateContent',
+        response: mockResponse,
+      }) + '\n',
+    );
+  });
+
+  it('should record generateContentStream responses', async () => {
+    const mockResponse1 = {
+      candidates: [
+        { content: { parts: [{ text: 'response1' }], role: 'model' } },
+      ],
+      usageMetadata: { totalTokenCount: 10 },
+    } as GenerateContentResponse;
+    const mockResponse2 = {
+      candidates: [
+        { content: { parts: [{ text: 'response2' }], role: 'model' } },
+      ],
+      usageMetadata: { totalTokenCount: 20 },
+    } as GenerateContentResponse;
+
+    async function* mockStream() {
+      yield mockResponse1;
+      yield mockResponse2;
+    }
+
+    (mockRealGenerator.generateContentStream as Mock).mockResolvedValue(
+      mockStream(),
+    );
+
+    const stream = await recorder.generateContentStream(
+      {} as GenerateContentParameters,
+      'id1',
+    );
+    const responses = [];
+    for await (const response of stream) {
+      responses.push(response);
+    }
+
+    expect(responses).toEqual([mockResponse1, mockResponse2]);
+    expect(mockRealGenerator.generateContentStream).toHaveBeenCalledWith(
+      {},
+      'id1',
+    );
+
+    expect(appendFileSync).toHaveBeenCalledWith(
+      filePath,
+      safeJsonStringify({
+        method: 'generateContentStream',
+        response: responses,
+      }) + '\n',
+    );
+  });
+
+  it('should record countTokens responses', async () => {
+    const mockResponse = {
+      totalTokens: 100,
+      cachedContentTokenCount: 10,
+    } as CountTokensResponse;
+    (mockRealGenerator.countTokens as Mock).mockResolvedValue(mockResponse);
+
+    const response = await recorder.countTokens({} as CountTokensParameters);
+    expect(response).toEqual(mockResponse);
+    expect(mockRealGenerator.countTokens).toHaveBeenCalledWith({});
+
+    expect(appendFileSync).toHaveBeenCalledWith(
+      filePath,
+      safeJsonStringify({
+        method: 'countTokens',
+        response: mockResponse,
+      }) + '\n',
+    );
+  });
+
+  it('should record embedContent responses', async () => {
+    const mockResponse = {
+      embeddings: [{ values: [1, 2, 3] } as ContentEmbedding],
+    } as EmbedContentResponse;
+    (mockRealGenerator.embedContent as Mock).mockResolvedValue(mockResponse);
+
+    const response = await recorder.embedContent({} as EmbedContentParameters);
+    expect(response).toEqual(mockResponse);
+    expect(mockRealGenerator.embedContent).toHaveBeenCalledWith({});
+    expect(appendFileSync).toHaveBeenCalledWith(
+      filePath,
+      safeJsonStringify({
+        method: 'embedContent',
+        response: mockResponse,
+      }) + '\n',
+    );
+  });
+});
diff --git a/packages/core/src/core/recordingContentGenerator.ts b/packages/core/src/core/recordingContentGenerator.ts
new file mode 100644
index 0000000000..27abcb418f
--- /dev/null
+++ b/packages/core/src/core/recordingContentGenerator.ts
@@ -0,0 +1,112 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  CountTokensResponse,
+  GenerateContentParameters,
+  GenerateContentResponse,
+  CountTokensParameters,
+  EmbedContentResponse,
+  EmbedContentParameters,
+} from '@google/genai';
+import { appendFileSync } from 'node:fs';
+import type { ContentGenerator } from './contentGenerator.js';
+import type { FakeResponse } from './fakeContentGenerator.js';
+import type { UserTierId } from '../code_assist/types.js';
+import { safeJsonStringify } from '../utils/safeJsonStringify.js';
+
+// A ContentGenerator that wraps another content generator and records all the
+// responses, with the ability to write them out to a file. These files are
+// intended to be consumed later on by a FakeContentGenerator, given the
+// `--fake-responses` CLI argument.
+//
+// Note that only the "interesting" bits of the responses are actually kept.
+export class RecordingContentGenerator implements ContentGenerator {
+  userTier?: UserTierId;
+
+  constructor(
+    private readonly realGenerator: ContentGenerator,
+    private readonly filePath: string,
+  ) {}
+
+  async generateContent(
+    request: GenerateContentParameters,
+    userPromptId: string,
+  ): Promise<GenerateContentResponse> {
+    const response = await this.realGenerator.generateContent(
+      request,
+      userPromptId,
+    );
+    const recordedResponse: FakeResponse = {
+      method: 'generateContent',
+      response: {
+        candidates: response.candidates,
+        usageMetadata: response.usageMetadata,
+      } as GenerateContentResponse,
+    };
+    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    return response;
+  }
+
+  async generateContentStream(
+    request: GenerateContentParameters,
+    userPromptId: string,
+  ): Promise<AsyncGenerator<GenerateContentResponse>> {
+    const recordedResponse: FakeResponse = {
+      method: 'generateContentStream',
+      response: [],
+    };
+
+    const realResponses = await this.realGenerator.generateContentStream(
+      request,
+      userPromptId,
+    );
+
+    async function* stream(filePath: string) {
+      for await (const response of realResponses) {
+        (recordedResponse.response as GenerateContentResponse[]).push({
+          candidates: response.candidates,
+          usageMetadata: response.usageMetadata,
+        } as GenerateContentResponse);
+        yield response;
+      }
+      appendFileSync(filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    }
+
+    return Promise.resolve(stream(this.filePath));
+  }
+
+  async countTokens(
+    request: CountTokensParameters,
+  ): Promise<CountTokensResponse> {
+    const response = await this.realGenerator.countTokens(request);
+    const recordedResponse: FakeResponse = {
+      method: 'countTokens',
+      response: {
+        totalTokens: response.totalTokens,
+        cachedContentTokenCount: response.cachedContentTokenCount,
+      },
+    };
+    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    return response;
+  }
+
+  async embedContent(
+    request: EmbedContentParameters,
+  ): Promise<EmbedContentResponse> {
+    const response = await this.realGenerator.embedContent(request);
+
+    const recordedResponse: FakeResponse = {
+      method: 'embedContent',
+      response: {
+        embeddings: response.embeddings,
+        metadata: response.metadata,
+      },
+    };
+    appendFileSync(this.filePath, `${safeJsonStringify(recordedResponse)}\n`);
+    return response;
+  }
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 8754c23bfe..8b95915bad 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -29,6 +29,7 @@ export * from './core/turn.js';
 export * from './core/geminiRequest.js';
 export * from './core/coreToolScheduler.js';
 export * from './core/nonInteractiveToolExecutor.js';
+export * from './core/recordingContentGenerator.js';
 
 export * from './fallback/types.js';
 

From 70996bfdee652fae2d28d02a2c59f77161d0c5ec Mon Sep 17 00:00:00 2001
From: "Christie Warwick (Wilson)" <bobcatfish@gmail.com>
Date: Tue, 28 Oct 2025 13:02:46 -0700
Subject: [PATCH 66/73] feat: Add Open Telemetric semantic standard compliant
 log (#11975)

---
 docs/cli/telemetry.md                         |  22 +
 .../core/src/core/loggingContentGenerator.ts  | 112 ++++-
 .../clearcut-logger/clearcut-logger.ts        |  10 +-
 packages/core/src/telemetry/loggers.test.ts   | 249 ++++++++--
 packages/core/src/telemetry/loggers.ts        |  29 +-
 packages/core/src/telemetry/metrics.ts        |   2 +-
 packages/core/src/telemetry/semantic.test.ts  | 425 ++++++++++++++++++
 packages/core/src/telemetry/semantic.ts       | 323 +++++++++++++
 packages/core/src/telemetry/types.ts          | 212 +++++++--
 .../core/src/telemetry/uiTelemetry.test.ts    | 154 ++++---
 packages/core/src/telemetry/uiTelemetry.ts    |  12 +-
 11 files changed, 1371 insertions(+), 179 deletions(-)
 create mode 100644 packages/core/src/telemetry/semantic.test.ts
 create mode 100644 packages/core/src/telemetry/semantic.ts

diff --git a/docs/cli/telemetry.md b/docs/cli/telemetry.md
index fd59260d2a..4b218cb8bd 100644
--- a/docs/cli/telemetry.md
+++ b/docs/cli/telemetry.md
@@ -298,6 +298,26 @@ Captures tool executions, output truncation, and Smart Edit behavior.
   - **Attributes**:
     - `correction` ("success" | "failure")
 
+- `gen_ai.client.inference.operation.details`: This event provides detailed
+  information about the GenAI operation, aligned with [OpenTelemetry GenAI
+  semantic conventions for events].
+  - **Attributes**:
+    - `gen_ai.request.model` (string)
+    - `gen_ai.provider.name` (string)
+    - `gen_ai.operation.name` (string)
+    - `gen_ai.input.messages` (json string)
+    - `gen_ai.output.messages` (json string)
+    - `gen_ai.response.finish_reasons` (array of strings)
+    - `gen_ai.usage.input_tokens` (int)
+    - `gen_ai.usage.output_tokens` (int)
+    - `gen_ai.request.temperature` (float)
+    - `gen_ai.request.top_p` (float)
+    - `gen_ai.request.top_k` (int)
+    - `gen_ai.request.max_tokens` (int)
+    - `gen_ai.system_instructions` (json string)
+    - `server.address` (string)
+    - `server.port` (int)
+
 #### Files
 
 Tracks file operations performed by tools.
@@ -735,3 +755,5 @@ standardized observability across GenAI applications:
 
 [OpenTelemetry GenAI semantic conventions]:
   https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-metrics.md
+[OpenTelemetry GenAI semantic conventions for events]:
+  https://github.com/open-telemetry/semantic-conventions/blob/8b4f210f43136e57c1f6f47292eb6d38e3bf30bb/docs/gen-ai/gen-ai-events.md
diff --git a/packages/core/src/core/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator.ts
index e73d74513f..3b0768e98d 100644
--- a/packages/core/src/core/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator.ts
@@ -5,15 +5,18 @@
  */
 
 import type {
+  Candidate,
   Content,
   CountTokensParameters,
   CountTokensResponse,
   EmbedContentParameters,
   EmbedContentResponse,
+  GenerateContentConfig,
   GenerateContentParameters,
   GenerateContentResponseUsageMetadata,
   GenerateContentResponse,
 } from '@google/genai';
+import type { ServerDetails } from '../telemetry/types.js';
 import {
   ApiRequestEvent,
   ApiResponseEvent,
@@ -26,6 +29,7 @@ import {
   logApiResponse,
 } from '../telemetry/loggers.js';
 import type { ContentGenerator } from './contentGenerator.js';
+import { CodeAssistServer } from '../code_assist/server.js';
 import { toContents } from '../code_assist/converter.js';
 import { isStructuredError } from '../utils/quotaErrorDetection.js';
 import { runInDevTraceSpan, type SpanMetadata } from '../telemetry/trace.js';
@@ -59,19 +63,66 @@ export class LoggingContentGenerator implements ContentGenerator {
     );
   }
 
+  private _getEndpointUrl(
+    req: GenerateContentParameters,
+    method: 'generateContent' | 'generateContentStream',
+  ): ServerDetails {
+    // Case 1: Authenticated with a Google account (`gcloud auth login`).
+    // Requests are routed through the internal CodeAssistServer.
+    if (this.wrapped instanceof CodeAssistServer) {
+      const url = new URL(this.wrapped.getMethodUrl(method));
+      const port = url.port
+        ? parseInt(url.port, 10)
+        : url.protocol === 'https:'
+          ? 443
+          : 80;
+      return { address: url.hostname, port };
+    }
+
+    const genConfig = this.config.getContentGeneratorConfig();
+
+    // Case 2: Using an API key for Vertex AI.
+    if (genConfig?.vertexai) {
+      const location = process.env['GOOGLE_CLOUD_LOCATION'];
+      if (location) {
+        return { address: `${location}-aiplatform.googleapis.com`, port: 443 };
+      } else {
+        return { address: 'unknown', port: 0 };
+      }
+    }
+
+    // Case 3: Default to the public Gemini API endpoint.
+    // This is used when an API key is provided but not for Vertex AI.
+    return { address: `generativelanguage.googleapis.com`, port: 443 };
+  }
+
   private _logApiResponse(
+    requestContents: Content[],
     durationMs: number,
     model: string,
     prompt_id: string,
+    responseId: string | undefined,
+    responseCandidates?: Candidate[],
     usageMetadata?: GenerateContentResponseUsageMetadata,
     responseText?: string,
+    generationConfig?: GenerateContentConfig,
+    serverDetails?: ServerDetails,
   ): void {
     logApiResponse(
       this.config,
       new ApiResponseEvent(
         model,
         durationMs,
-        prompt_id,
+        {
+          prompt_id,
+          contents: requestContents,
+          generate_content_config: generationConfig,
+          server: serverDetails,
+        },
+        {
+          candidates: responseCandidates,
+          response_id: responseId,
+        },
         this.config.getContentGeneratorConfig()?.authType,
         usageMetadata,
         responseText,
@@ -84,6 +135,9 @@ export class LoggingContentGenerator implements ContentGenerator {
     error: unknown,
     model: string,
     prompt_id: string,
+    requestContents: Content[],
+    generationConfig?: GenerateContentConfig,
+    serverDetails?: ServerDetails,
   ): void {
     const errorMessage = error instanceof Error ? error.message : String(error);
     const errorType = error instanceof Error ? error.name : 'unknown';
@@ -94,7 +148,12 @@ export class LoggingContentGenerator implements ContentGenerator {
         model,
         errorMessage,
         durationMs,
-        prompt_id,
+        {
+          prompt_id,
+          contents: requestContents,
+          generate_content_config: generationConfig,
+          server: serverDetails,
+        },
         this.config.getContentGeneratorConfig()?.authType,
         errorType,
         isStructuredError(error)
@@ -116,7 +175,9 @@ export class LoggingContentGenerator implements ContentGenerator {
         spanMetadata.input = { request: req, userPromptId, model: req.model };
 
         const startTime = Date.now();
+        const contents: Content[] = toContents(req.contents);
         this.logApiRequest(toContents(req.contents), req.model, userPromptId);
+        const serverDetails = this._getEndpointUrl(req, 'generateContent');
         try {
           const response = await this.wrapped.generateContent(
             req,
@@ -128,16 +189,29 @@ export class LoggingContentGenerator implements ContentGenerator {
           };
           const durationMs = Date.now() - startTime;
           this._logApiResponse(
+            contents,
             durationMs,
             response.modelVersion || req.model,
             userPromptId,
+            response.responseId,
+            response.candidates,
             response.usageMetadata,
             JSON.stringify(response),
+            req.config,
+            serverDetails,
           );
           return response;
         } catch (error) {
           const durationMs = Date.now() - startTime;
-          this._logApiError(durationMs, error, req.model, userPromptId);
+          this._logApiError(
+            durationMs,
+            error,
+            req.model,
+            userPromptId,
+            contents,
+            req.config,
+            serverDetails,
+          );
           throw error;
         }
       },
@@ -157,21 +231,33 @@ export class LoggingContentGenerator implements ContentGenerator {
         spanMetadata.input = { request: req, userPromptId, model: req.model };
         const startTime = Date.now();
         this.logApiRequest(toContents(req.contents), req.model, userPromptId);
+        const serverDetails = this._getEndpointUrl(
+          req,
+          'generateContentStream',
+        );
 
         let stream: AsyncGenerator<GenerateContentResponse>;
         try {
           stream = await this.wrapped.generateContentStream(req, userPromptId);
         } catch (error) {
           const durationMs = Date.now() - startTime;
-          this._logApiError(durationMs, error, req.model, userPromptId);
+          this._logApiError(
+            durationMs,
+            error,
+            req.model,
+            userPromptId,
+            toContents(req.contents),
+            req.config,
+            serverDetails,
+          );
           throw error;
         }
 
         return this.loggingStreamWrapper(
+          req,
           stream,
           startTime,
           userPromptId,
-          req.model,
           spanMetadata,
           endSpan,
         );
@@ -180,16 +266,18 @@ export class LoggingContentGenerator implements ContentGenerator {
   }
 
   private async *loggingStreamWrapper(
+    req: GenerateContentParameters,
     stream: AsyncGenerator<GenerateContentResponse>,
     startTime: number,
     userPromptId: string,
-    model: string,
     spanMetadata: SpanMetadata,
     endSpan: () => void,
   ): AsyncGenerator<GenerateContentResponse> {
     const responses: GenerateContentResponse[] = [];
 
     let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined;
+    const serverDetails = this._getEndpointUrl(req, 'generateContentStream');
+    const requestContents: Content[] = toContents(req.contents);
     try {
       for await (const response of stream) {
         responses.push(response);
@@ -201,11 +289,16 @@ export class LoggingContentGenerator implements ContentGenerator {
       // Only log successful API response if no error occurred
       const durationMs = Date.now() - startTime;
       this._logApiResponse(
+        requestContents,
         durationMs,
-        responses[0]?.modelVersion || model,
+        responses[0]?.modelVersion || req.model,
         userPromptId,
+        responses[0]?.responseId,
+        responses.flatMap((response) => response.candidates || []),
         lastUsageMetadata,
         JSON.stringify(responses),
+        req.config,
+        serverDetails,
       );
       spanMetadata.output = {
         streamChunks: responses.map((r) => ({
@@ -220,8 +313,11 @@ export class LoggingContentGenerator implements ContentGenerator {
       this._logApiError(
         durationMs,
         error,
-        responses[0]?.modelVersion || model,
+        responses[0]?.modelVersion || req.model,
         userPromptId,
+        requestContents,
+        req.config,
+        serverDetails,
       );
       throw error;
     } finally {
diff --git a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
index 93eec836ef..7651c87a15 100644
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
@@ -634,27 +634,27 @@ export class ClearcutLogger {
       {
         gemini_cli_key:
           EventMetadataKey.GEMINI_CLI_API_RESPONSE_INPUT_TOKEN_COUNT,
-        value: JSON.stringify(event.input_token_count),
+        value: JSON.stringify(event.usage.input_token_count),
       },
       {
         gemini_cli_key:
           EventMetadataKey.GEMINI_CLI_API_RESPONSE_OUTPUT_TOKEN_COUNT,
-        value: JSON.stringify(event.output_token_count),
+        value: JSON.stringify(event.usage.output_token_count),
       },
       {
         gemini_cli_key:
           EventMetadataKey.GEMINI_CLI_API_RESPONSE_CACHED_TOKEN_COUNT,
-        value: JSON.stringify(event.cached_content_token_count),
+        value: JSON.stringify(event.usage.cached_content_token_count),
       },
       {
         gemini_cli_key:
           EventMetadataKey.GEMINI_CLI_API_RESPONSE_THINKING_TOKEN_COUNT,
-        value: JSON.stringify(event.thoughts_token_count),
+        value: JSON.stringify(event.usage.thoughts_token_count),
       },
       {
         gemini_cli_key:
           EventMetadataKey.GEMINI_CLI_API_RESPONSE_TOOL_TOKEN_COUNT,
-        value: JSON.stringify(event.tool_token_count),
+        value: JSON.stringify(event.usage.tool_token_count),
       },
     ];
 
diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts
index b6f52af8f1..904e410940 100644
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -20,9 +20,9 @@ import {
 } from '../index.js';
 import { OutputFormat } from '../output/types.js';
 import { logs } from '@opentelemetry/api-logs';
-import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 import type { Config } from '../config/config.js';
 import {
+  logApiError,
   logApiRequest,
   logApiResponse,
   logCliConfiguration,
@@ -46,6 +46,7 @@ import {
 } from './loggers.js';
 import { ToolCallDecision } from './tool-call-decision.js';
 import {
+  EVENT_API_ERROR,
   EVENT_API_REQUEST,
   EVENT_API_RESPONSE,
   EVENT_CLI_CONFIG,
@@ -64,6 +65,7 @@ import {
   EVENT_AGENT_START,
   EVENT_AGENT_FINISH,
   EVENT_WEB_FETCH_FALLBACK_ATTEMPT,
+  ApiErrorEvent,
   ApiRequestEvent,
   ApiResponseEvent,
   StartSessionEvent,
@@ -87,16 +89,13 @@ import {
   EVENT_EXTENSION_UPDATE,
 } from './types.js';
 import * as metrics from './metrics.js';
-import {
-  FileOperation,
-  GenAiOperationName,
-  GenAiProviderName,
-} from './metrics.js';
+import { FileOperation } from './metrics.js';
 import * as sdk from './sdk.js';
 import { vi, describe, beforeEach, it, expect, afterEach } from 'vitest';
-import type {
-  CallableTool,
-  GenerateContentResponseUsageMetadata,
+import {
+  FinishReason,
+  type CallableTool,
+  type GenerateContentResponseUsageMetadata,
 } from '@google/genai';
 import { DiscoveredMCPTool } from '../tools/mcp-tool.js';
 import * as uiTelemetry from './uiTelemetry.js';
@@ -316,12 +315,6 @@ describe('loggers', () => {
     const mockMetrics = {
       recordApiResponseMetrics: vi.fn(),
       recordTokenUsageMetrics: vi.fn(),
-      getConventionAttributes: vi.fn(() => ({
-        'gen_ai.operation.name': GenAiOperationName.GENERATE_CONTENT,
-        'gen_ai.provider.name': GenAiProviderName.GCP_VERTEX_AI,
-        'gen_ai.request.model': 'test-model',
-        'gen_ai.response.model': 'test-model',
-      })),
     };
 
     beforeEach(() => {
@@ -331,9 +324,6 @@ describe('loggers', () => {
       vi.spyOn(metrics, 'recordTokenUsageMetrics').mockImplementation(
         mockMetrics.recordTokenUsageMetrics,
       );
-      vi.spyOn(metrics, 'getConventionAttributes').mockImplementation(
-        mockMetrics.getConventionAttributes,
-      );
     });
 
     it('should log an API response with all fields', () => {
@@ -347,7 +337,47 @@ describe('loggers', () => {
       const event = new ApiResponseEvent(
         'test-model',
         100,
-        'prompt-id-1',
+        {
+          prompt_id: 'prompt-id-1',
+          contents: [
+            {
+              role: 'user',
+              parts: [{ text: 'Hello' }],
+            },
+          ],
+          generate_content_config: {
+            temperature: 1,
+            topP: 2,
+            topK: 3,
+            responseMimeType: 'text/plain',
+            candidateCount: 1,
+            seed: 678,
+            frequencyPenalty: 10,
+            maxOutputTokens: 8000,
+            presencePenalty: 6,
+            stopSequences: ['stop', 'please stop'],
+            systemInstruction: {
+              role: 'model',
+              parts: [{ text: 'be nice' }],
+            },
+          },
+          server: {
+            address: 'foo.com',
+            port: 8080,
+          },
+        },
+        {
+          response_id: '',
+          candidates: [
+            {
+              content: {
+                role: 'model',
+                parts: [{ text: 'candidate 1' }],
+              },
+              finishReason: FinishReason.STOP,
+            },
+          ],
+        },
         AuthType.LOGIN_WITH_GOOGLE,
         usageData,
         'test-response',
@@ -357,26 +387,40 @@ describe('loggers', () => {
 
       expect(mockLogger.emit).toHaveBeenCalledWith({
         body: 'API response from test-model. Status: 200. Duration: 100ms.',
-        attributes: {
-          'session.id': 'test-session-id',
-          'user.email': 'test-user@example.com',
-          'installation.id': 'test-installation-id',
+        attributes: expect.objectContaining({
           'event.name': EVENT_API_RESPONSE,
-          'event.timestamp': '2025-01-01T00:00:00.000Z',
-          [SemanticAttributes.HTTP_STATUS_CODE]: 200,
-          model: 'test-model',
-          status_code: 200,
-          duration_ms: 100,
-          input_token_count: 17,
-          output_token_count: 50,
-          cached_content_token_count: 10,
-          thoughts_token_count: 5,
-          tool_token_count: 2,
-          total_token_count: 0,
-          response_text: 'test-response',
           prompt_id: 'prompt-id-1',
-          auth_type: 'oauth-personal',
-        },
+        }),
+      });
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'GenAI operation details from test-model. Status: 200. Duration: 100ms.',
+        attributes: expect.objectContaining({
+          'event.name': 'gen_ai.client.inference.operation.details',
+          'gen_ai.request.model': 'test-model',
+          'gen_ai.request.temperature': 1,
+          'gen_ai.request.top_p': 2,
+          'gen_ai.request.top_k': 3,
+          'gen_ai.input.messages':
+            '[{"role":"user","parts":[{"type":"text","content":"Hello"}]}]',
+          'gen_ai.output.messages':
+            '[{"finish_reason":"stop","role":"system","parts":[{"type":"text","content":"candidate 1"}]}]',
+          'gen_ai.response.finish_reasons': ['stop'],
+          'gen_ai.response.model': 'test-model',
+          'gen_ai.usage.input_tokens': 17,
+          'gen_ai.usage.output_tokens': 50,
+          'gen_ai.operation.name': 'generate_content',
+          'gen_ai.output.type': 'text',
+          'gen_ai.request.choice.count': 1,
+          'gen_ai.request.seed': 678,
+          'gen_ai.request.frequency_penalty': 10,
+          'gen_ai.request.presence_penalty': 6,
+          'gen_ai.request.max_tokens': 8000,
+          'server.address': 'foo.com',
+          'server.port': 8080,
+          'gen_ai.request.stop_sequences': ['stop', 'please stop'],
+          'gen_ai.system_instructions': '[{"type":"text","content":"be nice"}]',
+        }),
       });
 
       expect(mockMetrics.recordApiResponseMetrics).toHaveBeenCalledWith(
@@ -433,6 +477,137 @@ describe('loggers', () => {
     });
   });
 
+  describe('logApiError', () => {
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getTargetDir: () => 'target-dir',
+      getUsageStatisticsEnabled: () => true,
+      getTelemetryEnabled: () => true,
+      getTelemetryLogPromptsEnabled: () => true,
+    } as Config;
+
+    const mockMetrics = {
+      recordApiResponseMetrics: vi.fn(),
+      recordApiErrorMetrics: vi.fn(),
+      recordTokenUsageMetrics: vi.fn(),
+    };
+
+    beforeEach(() => {
+      vi.spyOn(metrics, 'recordApiResponseMetrics').mockImplementation(
+        mockMetrics.recordApiResponseMetrics,
+      );
+      vi.spyOn(metrics, 'recordApiErrorMetrics').mockImplementation(
+        mockMetrics.recordApiErrorMetrics,
+      );
+    });
+
+    it('should log an API error with all fields', () => {
+      const event = new ApiErrorEvent(
+        'test-model',
+        'UNAVAILABLE. {"error":{"code":503,"message":"The model is overloaded. Please try again later.","status":"UNAVAILABLE"}}',
+        100,
+        {
+          prompt_id: 'prompt-id-1',
+          contents: [
+            {
+              role: 'user',
+              parts: [{ text: 'Hello' }],
+            },
+          ],
+          generate_content_config: {
+            temperature: 1,
+            topP: 2,
+            topK: 3,
+            responseMimeType: 'text/plain',
+            candidateCount: 1,
+            seed: 678,
+            frequencyPenalty: 10,
+            maxOutputTokens: 8000,
+            presencePenalty: 6,
+            stopSequences: ['stop', 'please stop'],
+            systemInstruction: {
+              role: 'model',
+              parts: [{ text: 'be nice' }],
+            },
+          },
+          server: {
+            address: 'foo.com',
+            port: 8080,
+          },
+        },
+        AuthType.LOGIN_WITH_GOOGLE,
+        'ApiError',
+        503,
+      );
+
+      logApiError(mockConfig, event);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'API error for test-model. Error: UNAVAILABLE. {"error":{"code":503,"message":"The model is overloaded. Please try again later.","status":"UNAVAILABLE"}}. Duration: 100ms.',
+        attributes: expect.objectContaining({
+          'event.name': EVENT_API_ERROR,
+          prompt_id: 'prompt-id-1',
+        }),
+      });
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'GenAI operation error details from test-model. Error: UNAVAILABLE. {"error":{"code":503,"message":"The model is overloaded. Please try again later.","status":"UNAVAILABLE"}}. Duration: 100ms.',
+        attributes: expect.objectContaining({
+          'event.name': 'gen_ai.client.inference.operation.details',
+          'gen_ai.request.model': 'test-model',
+          'gen_ai.request.temperature': 1,
+          'gen_ai.request.top_p': 2,
+          'gen_ai.request.top_k': 3,
+          'gen_ai.input.messages':
+            '[{"role":"user","parts":[{"type":"text","content":"Hello"}]}]',
+          'gen_ai.operation.name': 'generate_content',
+          'gen_ai.output.type': 'text',
+          'gen_ai.request.choice.count': 1,
+          'gen_ai.request.seed': 678,
+          'gen_ai.request.frequency_penalty': 10,
+          'gen_ai.request.presence_penalty': 6,
+          'gen_ai.request.max_tokens': 8000,
+          'server.address': 'foo.com',
+          'server.port': 8080,
+          'gen_ai.request.stop_sequences': ['stop', 'please stop'],
+          'gen_ai.system_instructions': '[{"type":"text","content":"be nice"}]',
+        }),
+      });
+
+      expect(mockMetrics.recordApiErrorMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        100,
+        {
+          model: 'test-model',
+          status_code: 503,
+          error_type: 'ApiError',
+        },
+      );
+
+      expect(mockMetrics.recordApiResponseMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        100,
+        {
+          model: 'test-model',
+          status_code: 503,
+          genAiAttributes: {
+            'gen_ai.operation.name': 'generate_content',
+            'gen_ai.provider.name': 'gcp.vertex_ai',
+            'gen_ai.request.model': 'test-model',
+            'gen_ai.response.model': 'test-model',
+            'error.type': 'ApiError',
+          },
+        },
+      );
+
+      expect(mockUiEvent.addEvent).toHaveBeenCalledWith({
+        ...event,
+        'event.name': EVENT_API_ERROR,
+        'event.timestamp': '2025-01-01T00:00:00.000Z',
+      });
+    });
+  });
+
   describe('logApiRequest', () => {
     const mockConfig = {
       getSessionId: () => 'test-session-id',
diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts
index 81f8b43029..cf95c69340 100644
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -89,6 +89,7 @@ export function logUserPrompt(config: Config, event: UserPromptEvent): void {
   if (!isTelemetrySdkInitialized()) return;
 
   const logger = logs.getLogger(SERVICE_NAME);
+
   const logRecord: LogRecord = {
     body: event.toLogBody(),
     attributes: event.toOpenTelemetryAttributes(config),
@@ -219,11 +220,9 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
   if (!isTelemetrySdkInitialized()) return;
 
   const logger = logs.getLogger(SERVICE_NAME);
-  const logRecord: LogRecord = {
-    body: event.toLogBody(),
-    attributes: event.toOpenTelemetryAttributes(config),
-  };
-  logger.emit(logRecord);
+  logger.emit(event.toLogRecord(config));
+  logger.emit(event.toSemanticLogRecord(config));
+
   recordApiErrorMetrics(config, event.duration_ms, {
     model: event.model,
     status_code: event.status_code,
@@ -231,12 +230,11 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
   });
 
   // Record GenAI operation duration for errors
-  const conventionAttributes = getConventionAttributes(event);
   recordApiResponseMetrics(config, event.duration_ms, {
     model: event.model,
     status_code: event.status_code,
     genAiAttributes: {
-      ...conventionAttributes,
+      ...getConventionAttributes(event),
       'error.type': event.error_type || 'unknown',
     },
   });
@@ -253,11 +251,8 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
   if (!isTelemetrySdkInitialized()) return;
 
   const logger = logs.getLogger(SERVICE_NAME);
-  const logRecord: LogRecord = {
-    body: event.toLogBody(),
-    attributes: event.toOpenTelemetryAttributes(config),
-  };
-  logger.emit(logRecord);
+  logger.emit(event.toLogRecord(config));
+  logger.emit(event.toSemanticLogRecord(config));
 
   const conventionAttributes = getConventionAttributes(event);
 
@@ -268,11 +263,11 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
   });
 
   const tokenUsageData = [
-    { count: event.input_token_count, type: 'input' as const },
-    { count: event.output_token_count, type: 'output' as const },
-    { count: event.cached_content_token_count, type: 'cache' as const },
-    { count: event.thoughts_token_count, type: 'thought' as const },
-    { count: event.tool_token_count, type: 'tool' as const },
+    { count: event.usage.input_token_count, type: 'input' as const },
+    { count: event.usage.output_token_count, type: 'output' as const },
+    { count: event.usage.cached_content_token_count, type: 'cache' as const },
+    { count: event.usage.thoughts_token_count, type: 'thought' as const },
+    { count: event.usage.tool_token_count, type: 'tool' as const },
   ];
 
   for (const { count, type } of tokenUsageData) {
diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts
index 4123ed5325..6e9f1846ec 100644
--- a/packages/core/src/telemetry/metrics.ts
+++ b/packages/core/src/telemetry/metrics.ts
@@ -7,7 +7,6 @@
 import type { Attributes, Meter, Counter, Histogram } from '@opentelemetry/api';
 import { diag, metrics, ValueType } from '@opentelemetry/api';
 import { SERVICE_NAME } from './constants.js';
-import { EVENT_CHAT_COMPRESSION } from './types.js';
 import type { Config } from '../config/config.js';
 import type {
   ModelRoutingEvent,
@@ -17,6 +16,7 @@ import type {
 import { AuthType } from '../core/contentGenerator.js';
 import { getCommonAttributes } from './telemetryAttributes.js';
 
+const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression';
 const TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
 const TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
 const API_REQUEST_COUNT = 'gemini_cli.api.request.count';
diff --git a/packages/core/src/telemetry/semantic.test.ts b/packages/core/src/telemetry/semantic.test.ts
new file mode 100644
index 0000000000..6ea4cf3a3e
--- /dev/null
+++ b/packages/core/src/telemetry/semantic.test.ts
@@ -0,0 +1,425 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  toChatMessage,
+  toInputMessages,
+  toSystemInstruction,
+  toOutputMessages,
+  toFinishReasons,
+  OTelFinishReason,
+  toOutputType,
+  OTelOutputType,
+} from './semantic.js';
+import {
+  Language,
+  type Content,
+  Outcome,
+  type Candidate,
+  FinishReason,
+} from '@google/genai';
+
+describe('toChatMessage', () => {
+  it('should correctly handle text parts', () => {
+    const content: Content = {
+      role: 'user',
+      parts: [{ text: 'Hello' }],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'user',
+      parts: [
+        {
+          type: 'text',
+          content: 'Hello',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle function call parts', () => {
+    const content: Content = {
+      role: 'model',
+      parts: [
+        {
+          functionCall: {
+            name: 'test-function',
+            args: {
+              arg1: 'test-value',
+            },
+            id: '12345',
+          },
+          // include field not specified in semantic specification that could be present
+          thoughtSignature: '1234',
+        },
+      ],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'system',
+      parts: [
+        {
+          type: 'tool_call',
+          name: 'test-function',
+          arguments: '{"arg1":"test-value"}',
+          id: '12345',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle function response parts', () => {
+    const content: Content = {
+      role: 'user',
+      parts: [
+        {
+          functionResponse: {
+            name: 'test-function',
+            response: {
+              result: 'success',
+            },
+            id: '12345',
+          },
+          // include field not specified in semantic specification that could be present
+          fileData: {
+            displayName: 'greatfile',
+          },
+        },
+      ],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'user',
+      parts: [
+        {
+          type: 'tool_call_response',
+          response: '{"result":"success"}',
+          id: '12345',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle reasoning parts with text', () => {
+    const content: Content = {
+      role: 'system',
+      parts: [{ text: 'Hmm', thought: true }],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'system',
+      parts: [
+        {
+          type: 'reasoning',
+          content: 'Hmm',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle reasoning parts without text', () => {
+    const content: Content = {
+      role: 'system',
+      parts: [
+        {
+          thought: true,
+          // include field not specified in semantic specification that could be present
+          inlineData: {
+            displayName: 'wowdata',
+          },
+        },
+      ],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'system',
+      parts: [
+        {
+          type: 'reasoning',
+          content: '',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle text parts that are not reasoning parts', () => {
+    const content: Content = {
+      role: 'user',
+      parts: [{ text: 'what a nice day', thought: false }],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'user',
+      parts: [
+        {
+          type: 'text',
+          content: 'what a nice day',
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle "generic" parts', () => {
+    const content: Content = {
+      role: 'model',
+      parts: [
+        {
+          executableCode: {
+            code: 'print("foo")',
+            language: Language.PYTHON,
+          },
+        },
+        {
+          codeExecutionResult: {
+            outcome: Outcome.OUTCOME_OK,
+            output: 'foo',
+          },
+          // include field not specified in semantic specification that could be present
+          videoMetadata: {
+            fps: 5,
+          },
+        },
+      ],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'system',
+      parts: [
+        {
+          type: 'executableCode',
+          code: 'print("foo")',
+          language: 'PYTHON',
+        },
+        {
+          type: 'codeExecutionResult',
+          outcome: 'OUTCOME_OK',
+          output: 'foo',
+          videoMetadata: {
+            fps: 5,
+          },
+        },
+      ],
+    });
+  });
+
+  it('should correctly handle unknown parts', () => {
+    const content: Content = {
+      role: 'model',
+      parts: [
+        {
+          fileData: {
+            displayName: 'superfile',
+          },
+        },
+      ],
+    };
+    expect(toChatMessage(content)).toEqual({
+      role: 'system',
+      parts: [
+        {
+          type: 'unknown',
+          fileData: {
+            displayName: 'superfile',
+          },
+        },
+      ],
+    });
+  });
+});
+
+describe('toSystemInstruction', () => {
+  it('should correctly handle a string', () => {
+    const content = 'Hello';
+    expect(toSystemInstruction(content)).toEqual([
+      {
+        type: 'text',
+        content: 'Hello',
+      },
+    ]);
+  });
+
+  it('should correctly handle a Content object with a text part', () => {
+    const content: Content = {
+      role: 'user',
+      parts: [{ text: 'Hello' }],
+    };
+    expect(toSystemInstruction(content)).toEqual([
+      {
+        type: 'text',
+        content: 'Hello',
+      },
+    ]);
+  });
+
+  it('should correctly handle a Content object with multiple parts', () => {
+    const content: Content = {
+      role: 'user',
+      parts: [{ text: 'Hello' }, { text: 'Hmm', thought: true }],
+    };
+    expect(toSystemInstruction(content)).toEqual([
+      {
+        type: 'text',
+        content: 'Hello',
+      },
+      {
+        type: 'reasoning',
+        content: 'Hmm',
+      },
+    ]);
+  });
+});
+
+describe('toInputMessages', () => {
+  it('should correctly convert an array of Content objects', () => {
+    const contents: Content[] = [
+      {
+        role: 'user',
+        parts: [{ text: 'Hello' }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Hi there!' }],
+      },
+    ];
+    expect(toInputMessages(contents)).toEqual([
+      {
+        role: 'user',
+        parts: [
+          {
+            type: 'text',
+            content: 'Hello',
+          },
+        ],
+      },
+      {
+        role: 'system',
+        parts: [
+          {
+            type: 'text',
+            content: 'Hi there!',
+          },
+        ],
+      },
+    ]);
+  });
+});
+
+describe('toOutputMessages', () => {
+  it('should correctly convert an array of Candidate objects', () => {
+    const candidates: Candidate[] = [
+      {
+        index: 0,
+        finishReason: FinishReason.STOP,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the first candidate.' }],
+        },
+      },
+      {
+        index: 1,
+        finishReason: FinishReason.MAX_TOKENS,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the second candidate.' }],
+        },
+      },
+    ];
+    expect(toOutputMessages(candidates)).toEqual([
+      {
+        role: 'system',
+        finish_reason: 'stop',
+        parts: [
+          {
+            type: 'text',
+            content: 'This is the first candidate.',
+          },
+        ],
+      },
+      {
+        role: 'system',
+        finish_reason: 'length',
+        parts: [
+          {
+            type: 'text',
+            content: 'This is the second candidate.',
+          },
+        ],
+      },
+    ]);
+  });
+});
+
+describe('toFinishReasons', () => {
+  it('should return an empty array if candidates is undefined', () => {
+    expect(toFinishReasons(undefined)).toEqual([]);
+  });
+
+  it('should return an empty array if candidates is an empty array', () => {
+    expect(toFinishReasons([])).toEqual([]);
+  });
+
+  it('should correctly convert a single candidate', () => {
+    const candidates: Candidate[] = [
+      {
+        index: 0,
+        finishReason: FinishReason.STOP,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the first candidate.' }],
+        },
+      },
+    ];
+    expect(toFinishReasons(candidates)).toEqual([OTelFinishReason.STOP]);
+  });
+
+  it('should correctly convert multiple candidates', () => {
+    const candidates: Candidate[] = [
+      {
+        index: 0,
+        finishReason: FinishReason.STOP,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the first candidate.' }],
+        },
+      },
+      {
+        index: 1,
+        finishReason: FinishReason.MAX_TOKENS,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the second candidate.' }],
+        },
+      },
+      {
+        index: 2,
+        finishReason: FinishReason.SAFETY,
+        content: {
+          role: 'model',
+          parts: [{ text: 'This is the third candidate.' }],
+        },
+      },
+    ];
+    expect(toFinishReasons(candidates)).toEqual([
+      OTelFinishReason.STOP,
+      OTelFinishReason.LENGTH,
+      OTelFinishReason.CONTENT_FILTER,
+    ]);
+  });
+});
+
+describe('toOutputType', () => {
+  it('should return TEXT for text/plain', () => {
+    expect(toOutputType('text/plain')).toBe(OTelOutputType.TEXT);
+  });
+
+  it('should return JSON for application/json', () => {
+    expect(toOutputType('application/json')).toBe(OTelOutputType.JSON);
+  });
+
+  it('should return the custom mime type for other strings', () => {
+    expect(toOutputType('application/vnd.custom-type')).toBe(
+      'application/vnd.custom-type',
+    );
+  });
+
+  it('should return undefined for undefined input', () => {
+    expect(toOutputType(undefined)).toBeUndefined();
+  });
+});
diff --git a/packages/core/src/telemetry/semantic.ts b/packages/core/src/telemetry/semantic.ts
new file mode 100644
index 0000000000..b2ee296793
--- /dev/null
+++ b/packages/core/src/telemetry/semantic.ts
@@ -0,0 +1,323 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * This file contains functions and types for converting Gemini API request/response
+ * formats to the OpenTelemetry semantic conventions for generative AI.
+ *
+ * @see https://github.com/open-telemetry/semantic-conventions/blob/8b4f210f43136e57c1f6f47292eb6d38e3bf30bb/docs/gen-ai/gen-ai-events.md
+ */
+
+import { FinishReason } from '@google/genai';
+import type {
+  Candidate,
+  Content,
+  ContentUnion,
+  Part,
+  PartUnion,
+} from '@google/genai';
+
+export function toInputMessages(contents: Content[]): InputMessages {
+  const messages: ChatMessage[] = [];
+  for (const content of contents) {
+    messages.push(toChatMessage(content));
+  }
+  return messages;
+}
+
+function isPart(value: unknown): value is Part {
+  return (
+    typeof value === 'object' &&
+    value !== null &&
+    !Array.isArray(value) &&
+    !('parts' in value)
+  );
+}
+
+function toPart(part: PartUnion): Part {
+  if (typeof part === 'string') {
+    return { text: part };
+  }
+  return part;
+}
+
+function toContent(content: ContentUnion): Content | undefined {
+  if (typeof content === 'string') {
+    // 1. It's a string
+    return {
+      parts: [toPart(content)],
+    };
+  } else if (Array.isArray(content)) {
+    // 2. It's an array of parts (PartUnion[])
+    return {
+      parts: content.map(toPart),
+    };
+  } else if ('parts' in content) {
+    // 3. It's a Content object
+    return content;
+  } else if (isPart(content)) {
+    // 4. It's a single Part object (asserted with type guard)
+    return {
+      parts: [content],
+    };
+  } else {
+    // 5. Handle any other unexpected case
+    return undefined;
+  }
+}
+
+export function toSystemInstruction(
+  systemInstruction?: ContentUnion,
+): SystemInstruction | undefined {
+  const parts: AnyPart[] = [];
+  if (systemInstruction) {
+    const content = toContent(systemInstruction);
+    if (content && content.parts) {
+      for (const part of content.parts) {
+        parts.push(toOTelPart(part));
+      }
+    }
+  }
+  return parts;
+}
+
+export function toOutputMessages(candidates?: Candidate[]): OutputMessages {
+  const messages: OutputMessage[] = [];
+  if (candidates) {
+    for (const candidate of candidates) {
+      messages.push({
+        finish_reason: toOTelFinishReason(candidate.finishReason),
+        ...toChatMessage(candidate.content),
+      });
+    }
+  }
+  return messages;
+}
+
+export function toFinishReasons(candidates?: Candidate[]): OTelFinishReason[] {
+  const reasons: OTelFinishReason[] = [];
+  if (candidates) {
+    for (const candidate of candidates) {
+      reasons.push(toOTelFinishReason(candidate.finishReason));
+    }
+  }
+  return reasons;
+}
+
+export function toOutputType(requested_mime?: string): string | undefined {
+  switch (requested_mime) {
+    // explictly support the known good values of responseMimeType
+    case 'text/plain':
+      return OTelOutputType.TEXT;
+    case 'application/json':
+      return OTelOutputType.JSON;
+    default:
+      // if none of the well-known values applies, a custom value may be used
+      return requested_mime;
+  }
+}
+
+export function toChatMessage(content?: Content): ChatMessage {
+  const message: ChatMessage = {
+    role: undefined,
+    parts: [],
+  };
+  if (content && content.parts) {
+    message.role = toOTelRole(content.role);
+    for (const part of content.parts) {
+      message.parts.push(toOTelPart(part));
+    }
+  }
+  return message;
+}
+
+export function toOTelPart(part: Part): AnyPart {
+  if (part.thought) {
+    if (part.text) {
+      return new ReasoningPart(part.text);
+    } else {
+      return new ReasoningPart('');
+    }
+  } else if (part.text) {
+    return new TextPart(part.text);
+  } else if (part.functionCall) {
+    return new ToolCallRequestPart(
+      part.functionCall.name,
+      part.functionCall.id,
+      JSON.stringify(part.functionCall.args),
+    );
+  } else if (part.functionResponse) {
+    return new ToolCallResponsePart(
+      JSON.stringify(part.functionResponse.response),
+      part.functionResponse.id,
+    );
+  } else if (part.executableCode) {
+    const { executableCode, ...unexpectedData } = part;
+    return new GenericPart('executableCode', {
+      code: executableCode.code,
+      language: executableCode.language,
+      ...unexpectedData,
+    });
+  } else if (part.codeExecutionResult) {
+    const { codeExecutionResult, ...unexpectedData } = part;
+    return new GenericPart('codeExecutionResult', {
+      outcome: codeExecutionResult.outcome,
+      output: codeExecutionResult.output,
+      ...unexpectedData,
+    });
+  }
+  // Assuming the above cases capture all the expected parts
+  // but adding a fallthrough just in case.
+  return new GenericPart('unknown', { ...part });
+}
+
+export enum OTelRole {
+  SYSTEM = 'system',
+  USER = 'user',
+  ASSISTANT = 'assistant',
+  TOOL = 'tool',
+}
+
+export function toOTelRole(role?: string): OTelRole {
+  switch (role?.toLowerCase()) {
+    case 'system':
+      return OTelRole.SYSTEM;
+    // Our APIs seem to frequently use 'model'
+    case 'model':
+      return OTelRole.SYSTEM;
+    case 'user':
+      return OTelRole.USER;
+    case 'assistant':
+      return OTelRole.ASSISTANT;
+    case 'tool':
+      return OTelRole.TOOL;
+    default:
+      return OTelRole.SYSTEM;
+  }
+}
+
+export type InputMessages = ChatMessage[];
+
+export enum OTelOutputType {
+  IMAGE = 'image',
+  JSON = 'json',
+  SPEECH = 'speech',
+  TEXT = 'text',
+}
+
+export enum OTelFinishReason {
+  STOP = 'stop',
+  LENGTH = 'length',
+  CONTENT_FILTER = 'content_filter',
+  TOOL_CALL = 'tool_call',
+  ERROR = 'error',
+}
+
+export function toOTelFinishReason(finishReason?: string): OTelFinishReason {
+  switch (finishReason) {
+    // we have significantly more finish reasons than the spec
+    case FinishReason.FINISH_REASON_UNSPECIFIED:
+      return OTelFinishReason.STOP;
+    case FinishReason.STOP:
+      return OTelFinishReason.STOP;
+    case FinishReason.MAX_TOKENS:
+      return OTelFinishReason.LENGTH;
+    case FinishReason.SAFETY:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.RECITATION:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.LANGUAGE:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.OTHER:
+      return OTelFinishReason.STOP;
+    case FinishReason.BLOCKLIST:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.PROHIBITED_CONTENT:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.SPII:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.MALFORMED_FUNCTION_CALL:
+      return OTelFinishReason.ERROR;
+    case FinishReason.IMAGE_SAFETY:
+      return OTelFinishReason.CONTENT_FILTER;
+    case FinishReason.UNEXPECTED_TOOL_CALL:
+      return OTelFinishReason.ERROR;
+    default:
+      return OTelFinishReason.STOP;
+  }
+}
+
+export interface OutputMessage extends ChatMessage {
+  finish_reason: FinishReason | string;
+}
+
+export type OutputMessages = OutputMessage[];
+
+export type AnyPart =
+  | TextPart
+  | ToolCallRequestPart
+  | ToolCallResponsePart
+  | ReasoningPart
+  | GenericPart;
+
+export type SystemInstruction = AnyPart[];
+
+export interface ChatMessage {
+  role: string | undefined;
+  parts: AnyPart[];
+}
+
+export class TextPart {
+  readonly type = 'text';
+  content: string;
+
+  constructor(content: string) {
+    this.content = content;
+  }
+}
+
+export class ToolCallRequestPart {
+  readonly type = 'tool_call';
+  name?: string;
+  id?: string;
+  arguments?: string;
+
+  constructor(name?: string, id?: string, args?: string) {
+    this.name = name;
+    this.id = id;
+    this.arguments = args;
+  }
+}
+
+export class ToolCallResponsePart {
+  readonly type = 'tool_call_response';
+  response?: string;
+  id?: string;
+
+  constructor(response?: string, id?: string) {
+    this.response = response;
+    this.id = id;
+  }
+}
+
+export class ReasoningPart {
+  readonly type = 'reasoning';
+  content: string;
+
+  constructor(content: string) {
+    this.content = content;
+  }
+}
+
+export class GenericPart {
+  type: string;
+  [key: string]: unknown;
+
+  constructor(type: string, data: { [key: string]: unknown }) {
+    this.type = type;
+    Object.assign(this, data);
+  }
+}
diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts
index 38e636b128..a6778fc851 100644
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -4,19 +4,24 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { GenerateContentResponseUsageMetadata } from '@google/genai';
+import type {
+  Candidate,
+  Content,
+  GenerateContentConfig,
+  GenerateContentResponseUsageMetadata,
+} from '@google/genai';
 import type { Config } from '../config/config.js';
 import type { ApprovalMode } from '../config/config.js';
 import type { CompletedToolCall } from '../core/coreToolScheduler.js';
 import { DiscoveredMCPTool } from '../tools/mcp-tool.js';
 import type { FileDiff } from '../tools/tools.js';
 import { AuthType } from '../core/contentGenerator.js';
-import type { LogAttributes } from '@opentelemetry/api-logs';
+import type { LogAttributes, LogRecord } from '@opentelemetry/api-logs';
 import {
   getDecisionFromOutcome,
   ToolCallDecision,
 } from './tool-call-decision.js';
-import type { FileOperation } from './metrics.js';
+import { getConventionAttributes, type FileOperation } from './metrics.js';
 export { ToolCallDecision };
 import type { ToolRegistry } from '../tools/tool-registry.js';
 import type { OutputFormat } from '../output/types.js';
@@ -25,6 +30,13 @@ import type { AgentTerminateMode } from '../agents/types.js';
 import { getCommonAttributes } from './telemetryAttributes.js';
 import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
+import {
+  toInputMessages,
+  toOutputMessages,
+  toFinishReasons,
+  toOutputType,
+  toSystemInstruction,
+} from './semantic.js';
 
 export interface BaseTelemetryEvent {
   'event.name': string;
@@ -358,18 +370,18 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
   'event.name': 'api_error';
   'event.timestamp': string;
   model: string;
+  prompt: GenAIPromptDetails;
   error: string;
   error_type?: string;
   status_code?: number | string;
   duration_ms: number;
-  prompt_id: string;
   auth_type?: string;
 
   constructor(
     model: string,
     error: string,
     duration_ms: number,
-    prompt_id: string,
+    prompt_details: GenAIPromptDetails,
     auth_type?: string,
     error_type?: string,
     status_code?: number | string,
@@ -381,11 +393,11 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
     this.error_type = error_type;
     this.status_code = status_code;
     this.duration_ms = duration_ms;
-    this.prompt_id = prompt_id;
+    this.prompt = prompt_details;
     this.auth_type = auth_type;
   }
 
-  toOpenTelemetryAttributes(config: Config): LogAttributes {
+  toLogRecord(config: Config): LogRecord {
     const attributes: LogAttributes = {
       ...getCommonAttributes(config),
       'event.name': EVENT_API_ERROR,
@@ -397,7 +409,7 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
       error: this.error,
       status_code: this.status_code,
       duration_ms: this.duration_ms,
-      prompt_id: this.prompt_id,
+      prompt_id: this.prompt.prompt_id,
       auth_type: this.auth_type,
     };
 
@@ -407,69 +419,151 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
     if (typeof this.status_code === 'number') {
       attributes[SemanticAttributes.HTTP_STATUS_CODE] = this.status_code;
     }
-    return attributes;
+    const logRecord: LogRecord = {
+      body: `API error for ${this.model}. Error: ${this.error}. Duration: ${this.duration_ms}ms.`,
+      attributes,
+    };
+    return logRecord;
   }
 
-  toLogBody(): string {
-    return `API error for ${this.model}. Error: ${this.error}. Duration: ${this.duration_ms}ms.`;
+  toSemanticLogRecord(config: Config): LogRecord {
+    const attributes: LogAttributes = {
+      ...getCommonAttributes(config),
+      'event.name': EVENT_GEN_AI_OPERATION_DETAILS,
+      'event.timestamp': this['event.timestamp'],
+      ...toGenerateContentConfigAttributes(this.prompt.generate_content_config),
+      ...getConventionAttributes(this),
+    };
+
+    if (this.prompt.server) {
+      attributes['server.address'] = this.prompt.server.address;
+      attributes['server.port'] = this.prompt.server.port;
+    }
+
+    if (config.getTelemetryLogPromptsEnabled() && this.prompt.contents) {
+      attributes['gen_ai.input.messages'] = JSON.stringify(
+        toInputMessages(this.prompt.contents),
+      );
+    }
+
+    const logRecord: LogRecord = {
+      body: `GenAI operation error details from ${this.model}. Error: ${this.error}. Duration: ${this.duration_ms}ms.`,
+      attributes,
+    };
+
+    return logRecord;
   }
 }
 
-export const EVENT_API_RESPONSE = 'gemini_cli.api_response';
-export class ApiResponseEvent implements BaseTelemetryEvent {
-  'event.name': 'api_response';
-  'event.timestamp': string;
-  model: string;
-  status_code?: number | string;
-  duration_ms: number;
+export interface ServerDetails {
+  address: string;
+  port: number;
+}
+
+export interface GenAIPromptDetails {
+  prompt_id: string;
+  contents: Content[];
+  generate_content_config?: GenerateContentConfig;
+  server?: ServerDetails;
+}
+
+export interface GenAIResponseDetails {
+  response_id?: string;
+  candidates?: Candidate[];
+}
+
+export interface GenAIUsageDetails {
   input_token_count: number;
   output_token_count: number;
   cached_content_token_count: number;
   thoughts_token_count: number;
   tool_token_count: number;
   total_token_count: number;
+}
+
+export const EVENT_API_RESPONSE = 'gemini_cli.api_response';
+export const EVENT_GEN_AI_OPERATION_DETAILS =
+  'gen_ai.client.inference.operation.details';
+
+function toGenerateContentConfigAttributes(
+  config?: GenerateContentConfig,
+): LogAttributes {
+  if (!config) {
+    return {};
+  }
+  return {
+    'gen_ai.request.temperature': config.temperature,
+    'gen_ai.request.top_p': config.topP,
+    'gen_ai.request.top_k': config.topK,
+    'gen_ai.request.choice.count': config.candidateCount,
+    'gen_ai.request.seed': config.seed,
+    'gen_ai.request.frequency_penalty': config.frequencyPenalty,
+    'gen_ai.request.presence_penalty': config.presencePenalty,
+    'gen_ai.request.max_tokens': config.maxOutputTokens,
+    'gen_ai.output.type': toOutputType(config.responseMimeType),
+    'gen_ai.request.stop_sequences': config.stopSequences,
+    'gen_ai.system_instructions': JSON.stringify(
+      toSystemInstruction(config.systemInstruction),
+    ),
+  };
+}
+
+export class ApiResponseEvent implements BaseTelemetryEvent {
+  'event.name': 'api_response';
+  'event.timestamp': string;
+  status_code?: number | string;
+  duration_ms: number;
   response_text?: string;
-  prompt_id: string;
   auth_type?: string;
 
+  model: string;
+  prompt: GenAIPromptDetails;
+  response: GenAIResponseDetails;
+  usage: GenAIUsageDetails;
+
   constructor(
     model: string,
     duration_ms: number,
-    prompt_id: string,
+    prompt_details: GenAIPromptDetails,
+    response_details: GenAIResponseDetails,
     auth_type?: string,
     usage_data?: GenerateContentResponseUsageMetadata,
     response_text?: string,
   ) {
     this['event.name'] = 'api_response';
     this['event.timestamp'] = new Date().toISOString();
-    this.model = model;
     this.duration_ms = duration_ms;
     this.status_code = 200;
-    this.input_token_count = usage_data?.promptTokenCount ?? 0;
-    this.output_token_count = usage_data?.candidatesTokenCount ?? 0;
-    this.cached_content_token_count = usage_data?.cachedContentTokenCount ?? 0;
-    this.thoughts_token_count = usage_data?.thoughtsTokenCount ?? 0;
-    this.tool_token_count = usage_data?.toolUsePromptTokenCount ?? 0;
-    this.total_token_count = usage_data?.totalTokenCount ?? 0;
     this.response_text = response_text;
-    this.prompt_id = prompt_id;
     this.auth_type = auth_type;
+
+    this.model = model;
+    this.prompt = prompt_details;
+    this.response = response_details;
+    this.usage = {
+      input_token_count: usage_data?.promptTokenCount ?? 0,
+      output_token_count: usage_data?.candidatesTokenCount ?? 0,
+      cached_content_token_count: usage_data?.cachedContentTokenCount ?? 0,
+      thoughts_token_count: usage_data?.thoughtsTokenCount ?? 0,
+      tool_token_count: usage_data?.toolUsePromptTokenCount ?? 0,
+      total_token_count: usage_data?.totalTokenCount ?? 0,
+    };
   }
 
-  toOpenTelemetryAttributes(config: Config): LogAttributes {
+  toLogRecord(config: Config): LogRecord {
     const attributes: LogAttributes = {
       ...getCommonAttributes(config),
       'event.name': EVENT_API_RESPONSE,
       'event.timestamp': this['event.timestamp'],
       model: this.model,
       duration_ms: this.duration_ms,
-      input_token_count: this.input_token_count,
-      output_token_count: this.output_token_count,
-      cached_content_token_count: this.cached_content_token_count,
-      thoughts_token_count: this.thoughts_token_count,
-      tool_token_count: this.tool_token_count,
-      total_token_count: this.total_token_count,
-      prompt_id: this.prompt_id,
+      input_token_count: this.usage.input_token_count,
+      output_token_count: this.usage.output_token_count,
+      cached_content_token_count: this.usage.cached_content_token_count,
+      thoughts_token_count: this.usage.thoughts_token_count,
+      tool_token_count: this.usage.tool_token_count,
+      total_token_count: this.usage.total_token_count,
+      prompt_id: this.prompt.prompt_id,
       auth_type: this.auth_type,
       status_code: this.status_code,
     };
@@ -481,11 +575,51 @@ export class ApiResponseEvent implements BaseTelemetryEvent {
         attributes[SemanticAttributes.HTTP_STATUS_CODE] = this.status_code;
       }
     }
-    return attributes;
+    const logRecord: LogRecord = {
+      body: `API response from ${this.model}. Status: ${this.status_code || 'N/A'}. Duration: ${this.duration_ms}ms.`,
+      attributes,
+    };
+    return logRecord;
   }
 
-  toLogBody(): string {
-    return `API response from ${this.model}. Status: ${this.status_code || 'N/A'}. Duration: ${this.duration_ms}ms.`;
+  toSemanticLogRecord(config: Config): LogRecord {
+    const attributes: LogAttributes = {
+      ...getCommonAttributes(config),
+      'event.name': EVENT_GEN_AI_OPERATION_DETAILS,
+      'event.timestamp': this['event.timestamp'],
+      'gen_ai.response.id': this.response.response_id,
+      'gen_ai.response.finish_reasons': toFinishReasons(
+        this.response.candidates,
+      ),
+      'gen_ai.output.messages': JSON.stringify(
+        toOutputMessages(this.response.candidates),
+      ),
+      ...toGenerateContentConfigAttributes(this.prompt.generate_content_config),
+      ...getConventionAttributes(this),
+    };
+
+    if (this.prompt.server) {
+      attributes['server.address'] = this.prompt.server.address;
+      attributes['server.port'] = this.prompt.server.port;
+    }
+
+    if (config.getTelemetryLogPromptsEnabled() && this.prompt.contents) {
+      attributes['gen_ai.input.messages'] = JSON.stringify(
+        toInputMessages(this.prompt.contents),
+      );
+    }
+
+    if (this.usage) {
+      attributes['gen_ai.usage.input_tokens'] = this.usage.input_token_count;
+      attributes['gen_ai.usage.output_tokens'] = this.usage.output_token_count;
+    }
+
+    const logRecord: LogRecord = {
+      body: `GenAI operation details from ${this.model}. Status: ${this.status_code || 'N/A'}. Duration: ${this.duration_ms}ms.`,
+      attributes,
+    };
+
+    return logRecord;
   }
 }
 
diff --git a/packages/core/src/telemetry/uiTelemetry.test.ts b/packages/core/src/telemetry/uiTelemetry.test.ts
index 9f4aed54e7..aa06e87a9f 100644
--- a/packages/core/src/telemetry/uiTelemetry.test.ts
+++ b/packages/core/src/telemetry/uiTelemetry.test.ts
@@ -129,12 +129,14 @@ describe('UiTelemetryService', () => {
       'event.name': EVENT_API_RESPONSE,
       model: 'gemini-2.5-pro',
       duration_ms: 500,
-      input_token_count: 10,
-      output_token_count: 20,
-      total_token_count: 30,
-      cached_content_token_count: 5,
-      thoughts_token_count: 2,
-      tool_token_count: 3,
+      usage: {
+        input_token_count: 10,
+        output_token_count: 20,
+        total_token_count: 30,
+        cached_content_token_count: 5,
+        thoughts_token_count: 2,
+        tool_token_count: 3,
+      },
     } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
     service.addEvent(event);
@@ -151,12 +153,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 10,
-        output_token_count: 20,
-        total_token_count: 30,
-        cached_content_token_count: 5,
-        thoughts_token_count: 2,
-        tool_token_count: 3,
+        usage: {
+          input_token_count: 10,
+          output_token_count: 20,
+          total_token_count: 30,
+          cached_content_token_count: 5,
+          thoughts_token_count: 2,
+          tool_token_count: 3,
+        },
       } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
       service.addEvent(event);
@@ -185,12 +189,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 10,
-        output_token_count: 20,
-        total_token_count: 30,
-        cached_content_token_count: 5,
-        thoughts_token_count: 2,
-        tool_token_count: 3,
+        usage: {
+          input_token_count: 10,
+          output_token_count: 20,
+          total_token_count: 30,
+          cached_content_token_count: 5,
+          thoughts_token_count: 2,
+          tool_token_count: 3,
+        },
       } as ApiResponseEvent & {
         'event.name': typeof EVENT_API_RESPONSE;
       };
@@ -198,12 +204,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 600,
-        input_token_count: 15,
-        output_token_count: 25,
-        total_token_count: 40,
-        cached_content_token_count: 10,
-        thoughts_token_count: 4,
-        tool_token_count: 6,
+        usage: {
+          input_token_count: 15,
+          output_token_count: 25,
+          total_token_count: 40,
+          cached_content_token_count: 10,
+          thoughts_token_count: 4,
+          tool_token_count: 6,
+        },
       } as ApiResponseEvent & {
         'event.name': typeof EVENT_API_RESPONSE;
       };
@@ -235,12 +243,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 10,
-        output_token_count: 20,
-        total_token_count: 30,
-        cached_content_token_count: 5,
-        thoughts_token_count: 2,
-        tool_token_count: 3,
+        usage: {
+          input_token_count: 10,
+          output_token_count: 20,
+          total_token_count: 30,
+          cached_content_token_count: 5,
+          thoughts_token_count: 2,
+          tool_token_count: 3,
+        },
       } as ApiResponseEvent & {
         'event.name': typeof EVENT_API_RESPONSE;
       };
@@ -248,12 +258,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-flash',
         duration_ms: 1000,
-        input_token_count: 100,
-        output_token_count: 200,
-        total_token_count: 300,
-        cached_content_token_count: 50,
-        thoughts_token_count: 20,
-        tool_token_count: 30,
+        usage: {
+          input_token_count: 100,
+          output_token_count: 200,
+          total_token_count: 300,
+          cached_content_token_count: 50,
+          thoughts_token_count: 20,
+          tool_token_count: 30,
+        },
       } as ApiResponseEvent & {
         'event.name': typeof EVENT_API_RESPONSE;
       };
@@ -304,12 +316,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 10,
-        output_token_count: 20,
-        total_token_count: 30,
-        cached_content_token_count: 5,
-        thoughts_token_count: 2,
-        tool_token_count: 3,
+        usage: {
+          input_token_count: 10,
+          output_token_count: 20,
+          total_token_count: 30,
+          cached_content_token_count: 5,
+          thoughts_token_count: 2,
+          tool_token_count: 3,
+        },
       } as ApiResponseEvent & {
         'event.name': typeof EVENT_API_RESPONSE;
       };
@@ -534,12 +548,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 100,
-        output_token_count: 200,
-        total_token_count: 300,
-        cached_content_token_count: 50,
-        thoughts_token_count: 20,
-        tool_token_count: 30,
+        usage: {
+          input_token_count: 100,
+          output_token_count: 200,
+          total_token_count: 300,
+          cached_content_token_count: 50,
+          thoughts_token_count: 20,
+          tool_token_count: 30,
+        },
       } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
       service.addEvent(event);
@@ -559,12 +575,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 100,
-        output_token_count: 200,
-        total_token_count: 300,
-        cached_content_token_count: 50,
-        thoughts_token_count: 20,
-        tool_token_count: 30,
+        usage: {
+          input_token_count: 100,
+          output_token_count: 200,
+          total_token_count: 300,
+          cached_content_token_count: 50,
+          thoughts_token_count: 20,
+          tool_token_count: 30,
+        },
       } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
       service.addEvent(event);
@@ -584,12 +602,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 100,
-        output_token_count: 200,
-        total_token_count: 300,
-        cached_content_token_count: 50,
-        thoughts_token_count: 20,
-        tool_token_count: 30,
+        usage: {
+          input_token_count: 100,
+          output_token_count: 200,
+          total_token_count: 300,
+          cached_content_token_count: 50,
+          thoughts_token_count: 20,
+          tool_token_count: 30,
+        },
       } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
       service.addEvent(event);
@@ -616,12 +636,14 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_RESPONSE,
         model: 'gemini-2.5-pro',
         duration_ms: 500,
-        input_token_count: 100,
-        output_token_count: 200,
-        total_token_count: 300,
-        cached_content_token_count: 50,
-        thoughts_token_count: 20,
-        tool_token_count: 30,
+        usage: {
+          input_token_count: 100,
+          output_token_count: 200,
+          total_token_count: 300,
+          cached_content_token_count: 50,
+          thoughts_token_count: 20,
+          tool_token_count: 30,
+        },
       } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE };
 
       service.addEvent(event);
diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts
index 2b6a813485..79cb957d7d 100644
--- a/packages/core/src/telemetry/uiTelemetry.ts
+++ b/packages/core/src/telemetry/uiTelemetry.ts
@@ -165,12 +165,12 @@ export class UiTelemetryService extends EventEmitter {
     modelMetrics.api.totalRequests++;
     modelMetrics.api.totalLatencyMs += event.duration_ms;
 
-    modelMetrics.tokens.prompt += event.input_token_count;
-    modelMetrics.tokens.candidates += event.output_token_count;
-    modelMetrics.tokens.total += event.total_token_count;
-    modelMetrics.tokens.cached += event.cached_content_token_count;
-    modelMetrics.tokens.thoughts += event.thoughts_token_count;
-    modelMetrics.tokens.tool += event.tool_token_count;
+    modelMetrics.tokens.prompt += event.usage.input_token_count;
+    modelMetrics.tokens.candidates += event.usage.output_token_count;
+    modelMetrics.tokens.total += event.usage.total_token_count;
+    modelMetrics.tokens.cached += event.usage.cached_content_token_count;
+    modelMetrics.tokens.thoughts += event.usage.thoughts_token_count;
+    modelMetrics.tokens.tool += event.usage.tool_token_count;
   }
 
   private processApiError(event: ApiErrorEvent) {

From f6423ea45a5fccdc6d7793c42ee309cb1dfc5a99 Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Tue, 28 Oct 2025 13:31:36 -0700
Subject: [PATCH 67/73] Remove obsolete snapshots (#12180)

---
 .../core/__snapshots__/prompts.test.ts.snap   | 396 ------------------
 1 file changed, 396 deletions(-)

diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap
index 9ce812639d..fdf550bed3 100644
--- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap
+++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap
@@ -610,402 +610,6 @@ You are running outside of a sandbox container, directly on the user's system. F
 
 
 
-# Final Reminder
-Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions about the contents of files; instead use 'read_file' or 'read_many_files' to ensure you aren't making broad assumptions. Finally, you are an agent - please keep going until the user's query is completely resolved."
-`;
-
-exports[`Core System Prompt (prompts.ts) > should return the base prompt when 'no userMemory is provided' 1`] = `
-"You are an interactive CLI agent specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
-
-# Core Mandates
-
-- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
-- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
-- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
-- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
-- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
-- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
-- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
-- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
-- **Path Construction:** Before using any file system tool (e.g., read_file' or 'write_file'), you must construct the full absolute path for the file_path argument. Always combine the absolute path of the project's root directory with the file's path relative to the root. For example, if the project root is /path/to/project/ and the file is foo/bar/baz.txt, the final path you must use is /path/to/project/foo/bar/baz.txt. If the user provides a relative path, you must resolve it against the root directory to create an absolute path.
-- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
-
-
-# Primary Workflows
-
-## Software Engineering Tasks
-When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have.
-2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
-4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
-5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
-6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
-
-## New Applications
-
-**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'.
-
-1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2D or 3D game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions.
-2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern, and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner.
-  - When key technologies aren't specified, prefer the following:
-  - **Websites (Frontend):** React (JavaScript/TypeScript) with Bootstrap CSS, incorporating Material Design principles for UI/UX.
-  - **Back-End APIs:** Node.js with Express.js (JavaScript/TypeScript) or Python with FastAPI.
-  - **Full-stack:** Next.js (React/Node.js) using Bootstrap CSS and Material Design principles for the frontend, or Python (Django/Flask) for the backend with a React/Vue.js frontend styled with Bootstrap CSS and Material Design principles.
-  - **CLIs:** Python or Go.
-  - **Mobile App:** Compose Multiplatform (Kotlin Multiplatform) or Flutter (Dart) using Material Design libraries and principles, when sharing code between Android and iOS. Jetpack Compose (Kotlin JVM) with Material Design principles or SwiftUI (Swift) for native apps targeted at either Android or iOS, respectively.
-  - **3d Games:** HTML/CSS/JavaScript with Three.js.
-  - **2d Games:** HTML/CSS/JavaScript.
-3. **User Approval:** Obtain user approval for the proposed plan.
-4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible.
-5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors.
-6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype.
-
-# Operational Guidelines
-
-## Shell tool output token efficiency:
-
-IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION.
-
-- Always prefer command flags that reduce output verbosity when using 'run_shell_command'.
-- Aim to minimize tool output tokens while still capturing necessary information.
-- If a command is expected to produce a lot of output, use quiet or silent flags where available and appropriate.
-- Always consider the trade-off between output verbosity and the need for information. If a command's full output is essential for understanding the result, avoid overly aggressive quieting that might obscure important details.
-- If a command does not have quiet/silent flags or for commands with potentially long output that may not be useful, redirect stdout and stderr to temp files in the project's temporary directory: /tmp/project-temp. For example: 'command > /tmp/project-temp/out.log 2> /tmp/project-temp/err.log'.
-- After the command runs, inspect the temp files (e.g. '/tmp/project-temp/out.log' and '/tmp/project-temp/err.log') using commands like 'grep', 'tail', 'head', ... (or platform equivalents). Remove the temp files when done.
-
-
-## Tone and Style (CLI Interaction)
-- **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
-- **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query.
-- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.
-- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes..."). Get straight to the action or answer.
-- **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace.
-- **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself.
-- **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
-
-## Security and Safety Rules
-- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
-- **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
-
-## Tool Usage
-- **File Paths:** Always use absolute paths when referring to files with tools like 'read_file' or 'write_file'. Relative paths are not supported. You must provide an absolute path.
-- **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase).
-- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first.
-- **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user.
-- **Interactive Commands:** Prefer non-interactive commands when it makes sense; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`ctrl + f\` to focus into the shell to provide input.
-- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?"
-- **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward.
-
-## Interaction Details
-- **Help Command:** The user can use '/help' to display help information.
-- **Feedback:** To report a bug or provide feedback, please use the /bug command.
-
-
-# Outside of Sandbox
-You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.
-
-
-
-
-# Final Reminder
-Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions about the contents of files; instead use 'read_file' or 'read_many_files' to ensure you aren't making broad assumptions. Finally, you are an agent - please keep going until the user's query is completely resolved."
-`;
-
-exports[`Core System Prompt (prompts.ts) > should return the base prompt when 'userMemory is empty string' 1`] = `
-"You are an interactive CLI agent specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
-
-# Core Mandates
-
-- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
-- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
-- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
-- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
-- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
-- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
-- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
-- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
-- **Path Construction:** Before using any file system tool (e.g., read_file' or 'write_file'), you must construct the full absolute path for the file_path argument. Always combine the absolute path of the project's root directory with the file's path relative to the root. For example, if the project root is /path/to/project/ and the file is foo/bar/baz.txt, the final path you must use is /path/to/project/foo/bar/baz.txt. If the user provides a relative path, you must resolve it against the root directory to create an absolute path.
-- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
-
-
-# Primary Workflows
-
-## Software Engineering Tasks
-When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have.
-2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
-4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
-5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
-6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
-
-## New Applications
-
-**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'.
-
-1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2D or 3D game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions.
-2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern, and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner.
-  - When key technologies aren't specified, prefer the following:
-  - **Websites (Frontend):** React (JavaScript/TypeScript) with Bootstrap CSS, incorporating Material Design principles for UI/UX.
-  - **Back-End APIs:** Node.js with Express.js (JavaScript/TypeScript) or Python with FastAPI.
-  - **Full-stack:** Next.js (React/Node.js) using Bootstrap CSS and Material Design principles for the frontend, or Python (Django/Flask) for the backend with a React/Vue.js frontend styled with Bootstrap CSS and Material Design principles.
-  - **CLIs:** Python or Go.
-  - **Mobile App:** Compose Multiplatform (Kotlin Multiplatform) or Flutter (Dart) using Material Design libraries and principles, when sharing code between Android and iOS. Jetpack Compose (Kotlin JVM) with Material Design principles or SwiftUI (Swift) for native apps targeted at either Android or iOS, respectively.
-  - **3d Games:** HTML/CSS/JavaScript with Three.js.
-  - **2d Games:** HTML/CSS/JavaScript.
-3. **User Approval:** Obtain user approval for the proposed plan.
-4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible.
-5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors.
-6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype.
-
-# Operational Guidelines
-
-## Shell tool output token efficiency:
-
-IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION.
-
-- Always prefer command flags that reduce output verbosity when using 'run_shell_command'.
-- Aim to minimize tool output tokens while still capturing necessary information.
-- If a command is expected to produce a lot of output, use quiet or silent flags where available and appropriate.
-- Always consider the trade-off between output verbosity and the need for information. If a command's full output is essential for understanding the result, avoid overly aggressive quieting that might obscure important details.
-- If a command does not have quiet/silent flags or for commands with potentially long output that may not be useful, redirect stdout and stderr to temp files in the project's temporary directory: /tmp/project-temp. For example: 'command > /tmp/project-temp/out.log 2> /tmp/project-temp/err.log'.
-- After the command runs, inspect the temp files (e.g. '/tmp/project-temp/out.log' and '/tmp/project-temp/err.log') using commands like 'grep', 'tail', 'head', ... (or platform equivalents). Remove the temp files when done.
-
-
-## Tone and Style (CLI Interaction)
-- **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
-- **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query.
-- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.
-- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes..."). Get straight to the action or answer.
-- **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace.
-- **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself.
-- **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
-
-## Security and Safety Rules
-- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
-- **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
-
-## Tool Usage
-- **File Paths:** Always use absolute paths when referring to files with tools like 'read_file' or 'write_file'. Relative paths are not supported. You must provide an absolute path.
-- **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase).
-- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first.
-- **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user.
-- **Interactive Commands:** Prefer non-interactive commands when it makes sense; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`ctrl + f\` to focus into the shell to provide input.
-- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?"
-- **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward.
-
-## Interaction Details
-- **Help Command:** The user can use '/help' to display help information.
-- **Feedback:** To report a bug or provide feedback, please use the /bug command.
-
-
-# Outside of Sandbox
-You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.
-
-
-
-
-# Final Reminder
-Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions about the contents of files; instead use 'read_file' or 'read_many_files' to ensure you aren't making broad assumptions. Finally, you are an agent - please keep going until the user's query is completely resolved."
-`;
-
-exports[`Core System Prompt (prompts.ts) > should return the base prompt when 'userMemory is whitespace only' 1`] = `
-"You are an interactive CLI agent specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
-
-# Core Mandates
-
-- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
-- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
-- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
-- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
-- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
-- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
-- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
-- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
-- **Path Construction:** Before using any file system tool (e.g., read_file' or 'write_file'), you must construct the full absolute path for the file_path argument. Always combine the absolute path of the project's root directory with the file's path relative to the root. For example, if the project root is /path/to/project/ and the file is foo/bar/baz.txt, the final path you must use is /path/to/project/foo/bar/baz.txt. If the user provides a relative path, you must resolve it against the root directory to create an absolute path.
-- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
-
-
-# Primary Workflows
-
-## Software Engineering Tasks
-When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have.
-2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
-4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
-5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
-6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
-
-## New Applications
-
-**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'.
-
-1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2D or 3D game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions.
-2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern, and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner.
-  - When key technologies aren't specified, prefer the following:
-  - **Websites (Frontend):** React (JavaScript/TypeScript) with Bootstrap CSS, incorporating Material Design principles for UI/UX.
-  - **Back-End APIs:** Node.js with Express.js (JavaScript/TypeScript) or Python with FastAPI.
-  - **Full-stack:** Next.js (React/Node.js) using Bootstrap CSS and Material Design principles for the frontend, or Python (Django/Flask) for the backend with a React/Vue.js frontend styled with Bootstrap CSS and Material Design principles.
-  - **CLIs:** Python or Go.
-  - **Mobile App:** Compose Multiplatform (Kotlin Multiplatform) or Flutter (Dart) using Material Design libraries and principles, when sharing code between Android and iOS. Jetpack Compose (Kotlin JVM) with Material Design principles or SwiftUI (Swift) for native apps targeted at either Android or iOS, respectively.
-  - **3d Games:** HTML/CSS/JavaScript with Three.js.
-  - **2d Games:** HTML/CSS/JavaScript.
-3. **User Approval:** Obtain user approval for the proposed plan.
-4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible.
-5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors.
-6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype.
-
-# Operational Guidelines
-
-## Shell tool output token efficiency:
-
-IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION.
-
-- Always prefer command flags that reduce output verbosity when using 'run_shell_command'.
-- Aim to minimize tool output tokens while still capturing necessary information.
-- If a command is expected to produce a lot of output, use quiet or silent flags where available and appropriate.
-- Always consider the trade-off between output verbosity and the need for information. If a command's full output is essential for understanding the result, avoid overly aggressive quieting that might obscure important details.
-- If a command does not have quiet/silent flags or for commands with potentially long output that may not be useful, redirect stdout and stderr to temp files in the project's temporary directory: /tmp/project-temp. For example: 'command > /tmp/project-temp/out.log 2> /tmp/project-temp/err.log'.
-- After the command runs, inspect the temp files (e.g. '/tmp/project-temp/out.log' and '/tmp/project-temp/err.log') using commands like 'grep', 'tail', 'head', ... (or platform equivalents). Remove the temp files when done.
-
-
-## Tone and Style (CLI Interaction)
-- **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
-- **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query.
-- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.
-- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes..."). Get straight to the action or answer.
-- **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace.
-- **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself.
-- **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
-
-## Security and Safety Rules
-- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
-- **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
-
-## Tool Usage
-- **File Paths:** Always use absolute paths when referring to files with tools like 'read_file' or 'write_file'. Relative paths are not supported. You must provide an absolute path.
-- **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase).
-- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first.
-- **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user.
-- **Interactive Commands:** Prefer non-interactive commands when it makes sense; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`ctrl + f\` to focus into the shell to provide input.
-- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?"
-- **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward.
-
-## Interaction Details
-- **Help Command:** The user can use '/help' to display help information.
-- **Feedback:** To report a bug or provide feedback, please use the /bug command.
-
-
-# Outside of Sandbox
-You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.
-
-
-
-
-# Final Reminder
-Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions about the contents of files; instead use 'read_file' or 'read_many_files' to ensure you aren't making broad assumptions. Finally, you are an agent - please keep going until the user's query is completely resolved."
-`;
-
-exports[`Core System Prompt (prompts.ts) > should return the base prompt when no userMemory is provided 1`] = `
-"You are an interactive CLI agent specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
-
-# Core Mandates
-
-- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
-- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
-- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
-- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
-- **Comments:** Add code comments sparingly. Focus on *why* something is done, especially for complex logic, rather than *what* is done. Only add high-value comments if necessary for clarity or if requested by the user. Do not edit comments that are separate from the code you are changing. *NEVER* talk to the user or describe your changes through comments.
-- **Proactiveness:** Fulfill the user's request thoroughly. When adding features or fixing bugs, this includes adding tests to ensure quality. Consider all created files, especially tests, to be permanent artifacts unless the user says otherwise.
-- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
-- **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked.
-- **Path Construction:** Before using any file system tool (e.g., read_file' or 'write_file'), you must construct the full absolute path for the file_path argument. Always combine the absolute path of the project's root directory with the file's path relative to the root. For example, if the project root is /path/to/project/ and the file is foo/bar/baz.txt, the final path you must use is /path/to/project/foo/bar/baz.txt. If the user provides a relative path, you must resolve it against the root directory to create an absolute path.
-- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.
-
-
-# Primary Workflows
-
-## Software Engineering Tasks
-When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have.
-2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
-4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
-5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
-6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
-
-## New Applications
-
-**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'.
-
-1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2D or 3D game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions.
-2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern, and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner.
-  - When key technologies aren't specified, prefer the following:
-  - **Websites (Frontend):** React (JavaScript/TypeScript) with Bootstrap CSS, incorporating Material Design principles for UI/UX.
-  - **Back-End APIs:** Node.js with Express.js (JavaScript/TypeScript) or Python with FastAPI.
-  - **Full-stack:** Next.js (React/Node.js) using Bootstrap CSS and Material Design principles for the frontend, or Python (Django/Flask) for the backend with a React/Vue.js frontend styled with Bootstrap CSS and Material Design principles.
-  - **CLIs:** Python or Go.
-  - **Mobile App:** Compose Multiplatform (Kotlin Multiplatform) or Flutter (Dart) using Material Design libraries and principles, when sharing code between Android and iOS. Jetpack Compose (Kotlin JVM) with Material Design principles or SwiftUI (Swift) for native apps targeted at either Android or iOS, respectively.
-  - **3d Games:** HTML/CSS/JavaScript with Three.js.
-  - **2d Games:** HTML/CSS/JavaScript.
-3. **User Approval:** Obtain user approval for the proposed plan.
-4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible.
-5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors.
-6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype.
-
-# Operational Guidelines
-
-## Shell tool output token efficiency:
-
-IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION.
-
-- Always prefer command flags that reduce output verbosity when using 'run_shell_command'.
-- Aim to minimize tool output tokens while still capturing necessary information.
-- If a command is expected to produce a lot of output, use quiet or silent flags where available and appropriate.
-- Always consider the trade-off between output verbosity and the need for information. If a command's full output is essential for understanding the result, avoid overly aggressive quieting that might obscure important details.
-- If a command does not have quiet/silent flags or for commands with potentially long output that may not be useful, redirect stdout and stderr to temp files in the project's temporary directory: /tmp/project-temp. For example: 'command > /tmp/project-temp/out.log 2> /tmp/project-temp/err.log'.
-- After the command runs, inspect the temp files (e.g. '/tmp/project-temp/out.log' and '/tmp/project-temp/err.log') using commands like 'grep', 'tail', 'head', ... (or platform equivalents). Remove the temp files when done.
-
-
-## Tone and Style (CLI Interaction)
-- **Concise & Direct:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
-- **Minimal Output:** Aim for fewer than 3 lines of text output (excluding tool use/code generation) per response whenever practical. Focus strictly on the user's query.
-- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.
-- **No Chitchat:** Avoid conversational filler, preambles ("Okay, I will now..."), or postambles ("I have finished the changes..."). Get straight to the action or answer.
-- **Formatting:** Use GitHub-flavored Markdown. Responses will be rendered in monospace.
-- **Tools vs. Text:** Use tools for actions, text output *only* for communication. Do not add explanatory comments within tool calls or code blocks unless specifically part of the required code/command itself.
-- **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
-
-## Security and Safety Rules
-- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
-- **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
-
-## Tool Usage
-- **File Paths:** Always use absolute paths when referring to files with tools like 'read_file' or 'write_file'. Relative paths are not supported. You must provide an absolute path.
-- **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase).
-- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first.
-- **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user.
-- **Interactive Commands:** Prefer non-interactive commands when it makes sense; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`ctrl + f\` to focus into the shell to provide input.
-- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?"
-- **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward.
-
-## Interaction Details
-- **Help Command:** The user can use '/help' to display help information.
-- **Feedback:** To report a bug or provide feedback, please use the /bug command.
-
-
-# Outside of Sandbox
-You are running outside of a sandbox container, directly on the user's system. For critical commands that are particularly likely to modify the user's system outside of the project directory or system temp directory, as you explain the command to the user (per the Explain Critical Commands rule above), also remind the user to consider enabling sandboxing.
-
-
-
-
 # Final Reminder
 Your core function is efficient and safe assistance. Balance extreme conciseness with the crucial need for clarity, especially regarding safety and potential system modifications. Always prioritize user control and project conventions. Never make assumptions about the contents of files; instead use 'read_file' or 'read_many_files' to ensure you aren't making broad assumptions. Finally, you are an agent - please keep going until the user's query is completely resolved."
 `;

From 601a639f95ee7df4d8c3f09966ce471c5e303b81 Mon Sep 17 00:00:00 2001
From: Victor May <mayvic@google.com>
Date: Tue, 28 Oct 2025 18:25:53 -0400
Subject: [PATCH 68/73] Disable model routing for oauth users (#11889)

Co-authored-by: matt korwel <matt.korwel@gmail.com>
---
 packages/core/src/config/config.test.ts | 77 +++++++++++++++++++++----
 packages/core/src/config/config.ts      | 22 ++++++-
 2 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index 5c49e50ec1..7bcf1ff941 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -556,29 +556,84 @@ describe('Server Config (config.ts)', () => {
     });
   });
 
-  describe('UseModelRouter Configuration', () => {
-    it('should default useModelRouter to false when not provided', () => {
-      const config = new Config(baseParams);
+  describe('Model Router with Auth', () => {
+    it('should disable model router by default for oauth-personal', async () => {
+      const config = new Config({
+        ...baseParams,
+        useModelRouter: true,
+      });
+      await config.refreshAuth(AuthType.LOGIN_WITH_GOOGLE);
       expect(config.getUseModelRouter()).toBe(false);
     });
 
-    it('should set useModelRouter to true when provided as true', () => {
-      const paramsWithModelRouter: ConfigParameters = {
+    it('should enable model router by default for other auth types', async () => {
+      const config = new Config({
         ...baseParams,
         useModelRouter: true,
-      };
-      const config = new Config(paramsWithModelRouter);
+      });
+      await config.refreshAuth(AuthType.USE_GEMINI);
       expect(config.getUseModelRouter()).toBe(true);
     });
 
-    it('should set useModelRouter to false when explicitly provided as false', () => {
-      const paramsWithModelRouter: ConfigParameters = {
+    it('should disable model router for specified auth type', async () => {
+      const config = new Config({
+        ...baseParams,
+        useModelRouter: true,
+        disableModelRouterForAuth: [AuthType.USE_GEMINI],
+      });
+      await config.refreshAuth(AuthType.USE_GEMINI);
+      expect(config.getUseModelRouter()).toBe(false);
+    });
+
+    it('should enable model router for other auth type', async () => {
+      const config = new Config({
+        ...baseParams,
+        useModelRouter: true,
+        disableModelRouterForAuth: [],
+      });
+      await config.refreshAuth(AuthType.LOGIN_WITH_GOOGLE);
+      expect(config.getUseModelRouter()).toBe(true);
+    });
+
+    it('should keep model router disabled when useModelRouter is false', async () => {
+      const config = new Config({
         ...baseParams,
         useModelRouter: false,
-      };
-      const config = new Config(paramsWithModelRouter);
+        disableModelRouterForAuth: [AuthType.USE_GEMINI],
+      });
+      await config.refreshAuth(AuthType.LOGIN_WITH_GOOGLE);
       expect(config.getUseModelRouter()).toBe(false);
     });
+
+    it('should keep the user-chosen model after refreshAuth, even when model router is disabled for the auth type', async () => {
+      const config = new Config({
+        ...baseParams,
+        useModelRouter: true,
+        disableModelRouterForAuth: [AuthType.USE_GEMINI],
+      });
+      const chosenModel = 'gemini-1.5-pro-latest';
+      config.setModel(chosenModel);
+
+      await config.refreshAuth(AuthType.USE_GEMINI);
+
+      expect(config.getUseModelRouter()).toBe(false);
+      expect(config.getModel()).toBe(chosenModel);
+    });
+
+    it('should keep the user-chosen model after refreshAuth, when model router is enabled for the auth type', async () => {
+      const config = new Config({
+        ...baseParams,
+        useModelRouter: true,
+        disableModelRouterForAuth: [AuthType.USE_GEMINI],
+      });
+      const chosenModel = 'gemini-1.5-pro-latest';
+      config.setModel(chosenModel);
+
+      await config.refreshAuth(AuthType.LOGIN_WITH_GOOGLE);
+
+      expect(config.getUseModelRouter()).toBe(true);
+      expect(config.getModel()).toBe(chosenModel);
+    });
   });
 
   describe('ContinueOnFailedApiCall Configuration', () => {
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 6b683ac5ac..878c2fe782 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -46,6 +46,7 @@ import {
   DEFAULT_GEMINI_EMBEDDING_MODEL,
   DEFAULT_GEMINI_FLASH_MODEL,
   DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
   DEFAULT_THINKING_MODE,
 } from './models.js';
 import { shouldAttemptBrowserLaunch } from '../utils/browser.js';
@@ -279,6 +280,7 @@ export interface ConfigParameters {
   output?: OutputSettings;
   useModelRouter?: boolean;
   enableMessageBusIntegration?: boolean;
+  disableModelRouterForAuth?: AuthType[];
   codebaseInvestigatorSettings?: CodebaseInvestigatorSettings;
   continueOnFailedApiCall?: boolean;
   retryFetchErrors?: boolean;
@@ -377,7 +379,9 @@ export class Config {
   private readonly messageBus: MessageBus;
   private readonly policyEngine: PolicyEngine;
   private readonly outputSettings: OutputSettings;
-  private readonly useModelRouter: boolean;
+  private useModelRouter: boolean;
+  private readonly initialUseModelRouter: boolean;
+  private readonly disableModelRouterForAuth?: AuthType[];
   private readonly enableMessageBusIntegration: boolean;
   private readonly codebaseInvestigatorSettings: CodebaseInvestigatorSettings;
   private readonly continueOnFailedApiCall: boolean;
@@ -477,7 +481,11 @@ export class Config {
     this.enableToolOutputTruncation = params.enableToolOutputTruncation ?? true;
     this.useSmartEdit = params.useSmartEdit ?? true;
     this.useWriteTodos = params.useWriteTodos ?? false;
-    this.useModelRouter = params.useModelRouter ?? false;
+    this.initialUseModelRouter = params.useModelRouter ?? false;
+    this.useModelRouter = this.initialUseModelRouter;
+    this.disableModelRouterForAuth = params.disableModelRouterForAuth ?? [
+      AuthType.LOGIN_WITH_GOOGLE,
+    ];
     this.enableMessageBusIntegration =
       params.enableMessageBusIntegration ?? false;
     this.codebaseInvestigatorSettings = {
@@ -551,6 +559,16 @@ export class Config {
   }
 
   async refreshAuth(authMethod: AuthType) {
+    this.useModelRouter = this.initialUseModelRouter;
+    if (this.disableModelRouterForAuth?.includes(authMethod)) {
+      this.useModelRouter = false;
+      if (this.model === DEFAULT_GEMINI_MODEL_AUTO) {
+        this.model = DEFAULT_GEMINI_MODEL;
+      }
+    } else if (this.useModelRouter && this.model === DEFAULT_GEMINI_MODEL) {
+      this.model = DEFAULT_GEMINI_MODEL_AUTO;
+    }
+
     // Vertex and Genai have incompatible encryption and sending history with
     // thoughtSignature from Genai to Vertex will fail, we need to strip them
     if (

From cca41edc6e09fc0b2972ebbf058540bfece58e67 Mon Sep 17 00:00:00 2001
From: Richie Foreman <richie.foreman@gmail.com>
Date: Tue, 28 Oct 2025 19:24:04 -0400
Subject: [PATCH 69/73] feat(docs): Symlink CONTRIBUTING.md in the docs folder
 so that the site can pick it up. (#12178)

---
 CONTRIBUTING.md      | 6 +++---
 docs/CONTRIBUTING.md | 1 +
 docs/sidebar.json    | 4 ++++
 3 files changed, 8 insertions(+), 3 deletions(-)
 create mode 120000 docs/CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 56263d51c2..5e425c410d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -243,7 +243,7 @@ npm run test:e2e
 ```
 
 For more detailed information on the integration testing framework, please see
-the [Integration Tests documentation](./docs/integration-tests.md).
+the [Integration Tests documentation](/docs/integration-tests.md).
 
 ### Linting and preflight checks
 
@@ -472,7 +472,7 @@ code.
 
 ### Documentation structure
 
-Our documentation is organized using [sidebar.json](docs/sidebar.json) as the
+Our documentation is organized using [sidebar.json](/docs/sidebar.json) as the
 table of contents. When adding new documentation:
 
 1. Create your markdown file **in the appropriate directory** under `/docs`.
@@ -524,7 +524,7 @@ Before submitting your documentation pull request, please:
 
 If you have questions about contributing documentation:
 
-- Check our [FAQ](docs/faq.md).
+- Check our [FAQ](/docs/faq.md).
 - Review existing documentation for examples.
 - Open [an issue](https://github.com/google-gemini/gemini-cli/issues) to discuss
   your proposed changes.
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
new file mode 120000
index 0000000000..44fcc63439
--- /dev/null
+++ b/docs/CONTRIBUTING.md
@@ -0,0 +1 @@
+../CONTRIBUTING.md
\ No newline at end of file
diff --git a/docs/sidebar.json b/docs/sidebar.json
index 80e2494e4b..c9fd3e16b1 100644
--- a/docs/sidebar.json
+++ b/docs/sidebar.json
@@ -9,6 +9,10 @@
       {
         "label": "Architecture Overview",
         "slug": "docs/architecture"
+      },
+      {
+        "label": "Contribution Guide",
+        "slug": "docs/contributing"
       }
     ]
   },

From 706834ecd3c6449266de412539294f16c68473ce Mon Sep 17 00:00:00 2001
From: lifefloating <imshuazi@126.com>
Date: Wed, 29 Oct 2025 10:13:04 +0800
Subject: [PATCH 70/73] fix: enhance path handling in handleAtCommand to
 support relative paths (#9065)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
---
 .../src/ui/hooks/atCommandProcessor.test.ts   | 257 +++++++++++++-----
 .../cli/src/ui/hooks/atCommandProcessor.ts    |  48 +++-
 2 files changed, 235 insertions(+), 70 deletions(-)

diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts
index 2ac006429c..a3676a34ed 100644
--- a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts
+++ b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts
@@ -38,6 +38,10 @@ describe('handleAtCommand', () => {
     return path.resolve(testRootDir, fullPath);
   }
 
+  function getRelativePath(absolutePath: string): string {
+    return path.relative(testRootDir, absolutePath);
+  }
+
   beforeEach(async () => {
     vi.resetAllMocks();
 
@@ -138,6 +142,7 @@ describe('handleAtCommand', () => {
       path.join(testRootDir, 'path', 'to', 'file.txt'),
       fileContent,
     );
+    const relativePath = getRelativePath(filePath);
     const query = `@${filePath}`;
 
     const result = await handleAtCommand({
@@ -151,9 +156,9 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: `@${filePath}` },
+        { text: `@${relativePath}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${filePath}:\n` },
+        { text: `\nContent from @${relativePath}:\n` },
         { text: fileContent },
         { text: '\n--- End of content ---' },
       ],
@@ -175,8 +180,10 @@ describe('handleAtCommand', () => {
       fileContent,
     );
     const dirPath = path.dirname(filePath);
+    const relativeDirPath = getRelativePath(dirPath);
+    const relativeFilePath = getRelativePath(filePath);
     const query = `@${dirPath}`;
-    const resolvedGlob = `${dirPath}/**`;
+    const resolvedGlob = `${relativeDirPath}/**`;
 
     const result = await handleAtCommand({
       query,
@@ -191,7 +198,7 @@ describe('handleAtCommand', () => {
       processedQuery: [
         { text: `@${resolvedGlob}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${filePath}:\n` },
+        { text: `\nContent from @${relativeFilePath}:\n` },
         { text: fileContent },
         { text: '\n--- End of content ---' },
       ],
@@ -208,6 +215,7 @@ describe('handleAtCommand', () => {
       path.join(testRootDir, 'doc.md'),
       fileContent,
     );
+    const relativePath = getRelativePath(filePath);
     const textBefore = 'Explain this: ';
     const textAfter = ' in detail.';
     const query = `${textBefore}@${filePath}${textAfter}`;
@@ -223,9 +231,9 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: `${textBefore}@${filePath}${textAfter}` },
+        { text: `${textBefore}@${relativePath}${textAfter}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${filePath}:\n` },
+        { text: `\nContent from @${relativePath}:\n` },
         { text: fileContent },
         { text: '\n--- End of content ---' },
       ],
@@ -253,9 +261,9 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: `@${filePath}` },
+        { text: `@${getRelativePath(filePath)}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${filePath}:\n` },
+        { text: `\nContent from @${getRelativePath(filePath)}:\n` },
         { text: fileContent },
         { text: '\n--- End of content ---' },
       ],
@@ -294,11 +302,13 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: query },
+        {
+          text: `@${getRelativePath(file1Path)} @${getRelativePath(file2Path)}`,
+        },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${file1Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file1Path)}:\n` },
         { text: content1 },
-        { text: `\nContent from @${file2Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file2Path)}:\n` },
         { text: content2 },
         { text: '\n--- End of content ---' },
       ],
@@ -333,11 +343,13 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: query },
+        {
+          text: `${text1}@${getRelativePath(file1Path)}${text2}@${getRelativePath(file2Path)}${text3}`,
+        },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${file1Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file1Path)}:\n` },
         { text: content1 },
-        { text: `\nContent from @${file2Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file2Path)}:\n` },
         { text: content2 },
         { text: '\n--- End of content ---' },
       ],
@@ -371,12 +383,12 @@ describe('handleAtCommand', () => {
     expect(result).toEqual({
       processedQuery: [
         {
-          text: `Look at @${file1Path} then @${invalidFile} and also just @ symbol, then @${file2Path}`,
+          text: `Look at @${getRelativePath(file1Path)} then @${invalidFile} and also just @ symbol, then @${getRelativePath(file2Path)}`,
         },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${file2Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file2Path)}:\n` },
         { text: content2 },
-        { text: `\nContent from @${file1Path}:\n` },
+        { text: `\nContent from @${getRelativePath(file1Path)}:\n` },
         { text: content1 },
         { text: '\n--- End of content ---' },
       ],
@@ -474,9 +486,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `@${validFile}` },
+          { text: `@${getRelativePath(validFile)}` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${validFile}:\n` },
+          { text: `\nContent from @${getRelativePath(validFile)}:\n` },
           { text: 'console.log("Hello world");' },
           { text: '\n--- End of content ---' },
         ],
@@ -507,9 +519,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `@${validFile} @${gitIgnoredFile}` },
+          { text: `@${getRelativePath(validFile)} @${gitIgnoredFile}` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${validFile}:\n` },
+          { text: `\nContent from @${getRelativePath(validFile)}:\n` },
           { text: '# Project README' },
           { text: '\n--- End of content ---' },
         ],
@@ -633,9 +645,9 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: `@${validFile}` },
+        { text: `@${getRelativePath(validFile)}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${validFile}:\n` },
+        { text: `\nContent from @${getRelativePath(validFile)}:\n` },
         { text: 'console.log("Hello world");' },
         { text: '\n--- End of content ---' },
       ],
@@ -669,9 +681,9 @@ describe('handleAtCommand', () => {
 
     expect(result).toEqual({
       processedQuery: [
-        { text: `@${validFile} @${geminiIgnoredFile}` },
+        { text: `@${getRelativePath(validFile)} @${geminiIgnoredFile}` },
         { text: '\n--- Content from referenced files ---' },
-        { text: `\nContent from @${validFile}:\n` },
+        { text: `\nContent from @${getRelativePath(validFile)}:\n` },
         { text: '// Main application entry' },
         { text: '\n--- End of content ---' },
       ],
@@ -692,7 +704,7 @@ describe('handleAtCommand', () => {
         fileName: 'test.txt',
         fileContent: 'File content here',
         queryTemplate: (filePath: string) =>
-          `Look at @${filePath}, then explain it.`,
+          `Look at @${getRelativePath(filePath)}, then explain it.`,
         messageId: 400,
       },
       {
@@ -700,7 +712,7 @@ describe('handleAtCommand', () => {
         fileName: 'readme.md',
         fileContent: 'File content here',
         queryTemplate: (filePath: string) =>
-          `Check @${filePath}. What does it say?`,
+          `Check @${getRelativePath(filePath)}. What does it say?`,
         messageId: 401,
       },
       {
@@ -708,7 +720,7 @@ describe('handleAtCommand', () => {
         fileName: 'example.js',
         fileContent: 'Code example',
         queryTemplate: (filePath: string) =>
-          `Review @${filePath}; check for bugs.`,
+          `Review @${getRelativePath(filePath)}; check for bugs.`,
         messageId: 402,
       },
       {
@@ -716,7 +728,7 @@ describe('handleAtCommand', () => {
         fileName: 'important.txt',
         fileContent: 'Important content',
         queryTemplate: (filePath: string) =>
-          `Look at @${filePath}! This is critical.`,
+          `Look at @${getRelativePath(filePath)}! This is critical.`,
         messageId: 403,
       },
       {
@@ -724,7 +736,7 @@ describe('handleAtCommand', () => {
         fileName: 'config.json',
         fileContent: 'Config settings',
         queryTemplate: (filePath: string) =>
-          `What is in @${filePath}? Please explain.`,
+          `What is in @${getRelativePath(filePath)}? Please explain.`,
         messageId: 404,
       },
       {
@@ -732,7 +744,7 @@ describe('handleAtCommand', () => {
         fileName: 'func.ts',
         fileContent: 'Function definition',
         queryTemplate: (filePath: string) =>
-          `Analyze @${filePath}(the main function).`,
+          `Analyze @${getRelativePath(filePath)}(the main function).`,
         messageId: 405,
       },
       {
@@ -740,7 +752,7 @@ describe('handleAtCommand', () => {
         fileName: 'data.json',
         fileContent: 'Test data',
         queryTemplate: (filePath: string) =>
-          `Use data from @${filePath}) for testing.`,
+          `Use data from @${getRelativePath(filePath)}) for testing.`,
         messageId: 406,
       },
       {
@@ -748,7 +760,7 @@ describe('handleAtCommand', () => {
         fileName: 'array.js',
         fileContent: 'Array data',
         queryTemplate: (filePath: string) =>
-          `Check @${filePath}[0] for the first element.`,
+          `Check @${getRelativePath(filePath)}[0] for the first element.`,
         messageId: 407,
       },
       {
@@ -756,7 +768,7 @@ describe('handleAtCommand', () => {
         fileName: 'list.md',
         fileContent: 'List content',
         queryTemplate: (filePath: string) =>
-          `Review item @${filePath}] from the list.`,
+          `Review item @${getRelativePath(filePath)}] from the list.`,
         messageId: 408,
       },
       {
@@ -764,7 +776,7 @@ describe('handleAtCommand', () => {
         fileName: 'object.ts',
         fileContent: 'Object definition',
         queryTemplate: (filePath: string) =>
-          `Parse @${filePath}{prop1: value1}.`,
+          `Parse @${getRelativePath(filePath)}{prop1: value1}.`,
         messageId: 409,
       },
       {
@@ -772,7 +784,7 @@ describe('handleAtCommand', () => {
         fileName: 'config.yaml',
         fileContent: 'Configuration',
         queryTemplate: (filePath: string) =>
-          `Use settings from @${filePath}} for deployment.`,
+          `Use settings from @${getRelativePath(filePath)}} for deployment.`,
         messageId: 410,
       },
     ];
@@ -799,7 +811,7 @@ describe('handleAtCommand', () => {
           processedQuery: [
             { text: query },
             { text: '\n--- Content from referenced files ---' },
-            { text: `\nContent from @${filePath}:\n` },
+            { text: `\nContent from @${getRelativePath(filePath)}:\n` },
             { text: fileContent },
             { text: '\n--- End of content ---' },
           ],
@@ -832,11 +844,13 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Compare @${file1Path}, @${file2Path}; what's different?` },
+          {
+            text: `Compare @${getRelativePath(file1Path)}, @${getRelativePath(file2Path)}; what's different?`,
+          },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${file1Path}:\n` },
+          { text: `\nContent from @${getRelativePath(file1Path)}:\n` },
           { text: content1 },
-          { text: `\nContent from @${file2Path}:\n` },
+          { text: `\nContent from @${getRelativePath(file2Path)}:\n` },
           { text: content2 },
           { text: '\n--- End of content ---' },
         ],
@@ -864,9 +878,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Check @${filePath}, it has spaces.` },
+          { text: `Check @${getRelativePath(filePath)}, it has spaces.` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -880,7 +894,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'example.d.ts'),
         fileContent,
       );
-      const query = `Analyze @${filePath} for type definitions.`;
+      const query = `Analyze @${getRelativePath(filePath)} for type definitions.`;
 
       const result = await handleAtCommand({
         query,
@@ -893,9 +907,11 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Analyze @${filePath} for type definitions.` },
+          {
+            text: `Analyze @${getRelativePath(filePath)} for type definitions.`,
+          },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -909,7 +925,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'config.json'),
         fileContent,
       );
-      const query = `Check @${filePath}. This file contains settings.`;
+      const query = `Check @${getRelativePath(filePath)}. This file contains settings.`;
 
       const result = await handleAtCommand({
         query,
@@ -922,9 +938,11 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Check @${filePath}. This file contains settings.` },
+          {
+            text: `Check @${getRelativePath(filePath)}. This file contains settings.`,
+          },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -938,7 +956,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'package.json'),
         fileContent,
       );
-      const query = `Review @${filePath}, then check dependencies.`;
+      const query = `Review @${getRelativePath(filePath)}, then check dependencies.`;
 
       const result = await handleAtCommand({
         query,
@@ -951,9 +969,11 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Review @${filePath}, then check dependencies.` },
+          {
+            text: `Review @${getRelativePath(filePath)}, then check dependencies.`,
+          },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -967,7 +987,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'version.1.2.3.txt'),
         fileContent,
       );
-      const query = `Check @${filePath} contains version information.`;
+      const query = `Check @${getRelativePath(filePath)} contains version information.`;
 
       const result = await handleAtCommand({
         query,
@@ -980,9 +1000,11 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Check @${filePath} contains version information.` },
+          {
+            text: `Check @${getRelativePath(filePath)} contains version information.`,
+          },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -996,7 +1018,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'end.txt'),
         fileContent,
       );
-      const query = `Show me @${filePath}.`;
+      const query = `Show me @${getRelativePath(filePath)}.`;
 
       const result = await handleAtCommand({
         query,
@@ -1009,9 +1031,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Show me @${filePath}.` },
+          { text: `Show me @${getRelativePath(filePath)}.` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -1025,7 +1047,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'file$with&special#chars.txt'),
         fileContent,
       );
-      const query = `Check @${filePath} for content.`;
+      const query = `Check @${getRelativePath(filePath)} for content.`;
 
       const result = await handleAtCommand({
         query,
@@ -1038,9 +1060,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Check @${filePath} for content.` },
+          { text: `Check @${getRelativePath(filePath)} for content.` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -1054,7 +1076,7 @@ describe('handleAtCommand', () => {
         path.join(testRootDir, 'basicfile.txt'),
         fileContent,
       );
-      const query = `Check @${filePath} please.`;
+      const query = `Check @${getRelativePath(filePath)} please.`;
 
       const result = await handleAtCommand({
         query,
@@ -1067,9 +1089,9 @@ describe('handleAtCommand', () => {
 
       expect(result).toEqual({
         processedQuery: [
-          { text: `Check @${filePath} please.` },
+          { text: `Check @${getRelativePath(filePath)} please.` },
           { text: '\n--- Content from referenced files ---' },
-          { text: `\nContent from @${filePath}:\n` },
+          { text: `\nContent from @${getRelativePath(filePath)}:\n` },
           { text: fileContent },
           { text: '\n--- End of content ---' },
         ],
@@ -1078,6 +1100,113 @@ describe('handleAtCommand', () => {
     });
   });
 
+  describe('absolute path handling', () => {
+    it('should handle absolute file paths correctly', async () => {
+      const fileContent = 'console.log("This is an absolute path test");';
+      const relativePath = path.join('src', 'absolute-test.ts');
+      const absolutePath = await createTestFile(
+        path.join(testRootDir, relativePath),
+        fileContent,
+      );
+      const query = `Check @${absolutePath} please.`;
+
+      const result = await handleAtCommand({
+        query,
+        config: mockConfig,
+        addItem: mockAddItem,
+        onDebugMessage: mockOnDebugMessage,
+        messageId: 500,
+        signal: abortController.signal,
+      });
+
+      expect(result).toEqual({
+        processedQuery: [
+          { text: `Check @${relativePath} please.` },
+          { text: '\n--- Content from referenced files ---' },
+          { text: `\nContent from @${relativePath}:\n` },
+          { text: fileContent },
+          { text: '\n--- End of content ---' },
+        ],
+        shouldProceed: true,
+      });
+
+      expect(mockOnDebugMessage).toHaveBeenCalledWith(
+        expect.stringContaining(`using relative path: ${relativePath}`),
+      );
+    });
+
+    it('should handle absolute directory paths correctly', async () => {
+      const fileContent =
+        'export default function test() { return "absolute dir test"; }';
+      const subDirPath = 'src/utils';
+      const fileName = 'helper.ts';
+      await createTestFile(
+        path.join(testRootDir, subDirPath, fileName),
+        fileContent,
+      );
+      const absoluteDirPath = path.join(testRootDir, subDirPath);
+      const query = `Check @${absoluteDirPath} please.`;
+
+      const result = await handleAtCommand({
+        query,
+        config: mockConfig,
+        addItem: mockAddItem,
+        onDebugMessage: mockOnDebugMessage,
+        messageId: 501,
+        signal: abortController.signal,
+      });
+
+      expect(result.shouldProceed).toBe(true);
+      expect(result.processedQuery).toEqual(
+        expect.arrayContaining([
+          { text: `Check @${subDirPath}/** please.` },
+          expect.objectContaining({
+            text: '\n--- Content from referenced files ---',
+          }),
+        ]),
+      );
+
+      expect(mockOnDebugMessage).toHaveBeenCalledWith(
+        expect.stringContaining(`using glob: ${subDirPath}/**`),
+      );
+    });
+
+    it('should skip absolute paths outside workspace', async () => {
+      const outsidePath = '/tmp/outside-workspace.txt';
+      const query = `Check @${outsidePath} please.`;
+
+      const mockWorkspaceContext = {
+        isPathWithinWorkspace: vi.fn((path: string) =>
+          path.startsWith(testRootDir),
+        ),
+        getDirectories: () => [testRootDir],
+        addDirectory: vi.fn(),
+        getInitialDirectories: () => [testRootDir],
+        setDirectories: vi.fn(),
+        onDirectoriesChanged: vi.fn(() => () => {}),
+      } as unknown as ReturnType<typeof mockConfig.getWorkspaceContext>;
+      mockConfig.getWorkspaceContext = () => mockWorkspaceContext;
+
+      const result = await handleAtCommand({
+        query,
+        config: mockConfig,
+        addItem: mockAddItem,
+        onDebugMessage: mockOnDebugMessage,
+        messageId: 502,
+        signal: abortController.signal,
+      });
+
+      expect(result).toEqual({
+        processedQuery: [{ text: `Check @${outsidePath} please.` }],
+        shouldProceed: true,
+      });
+
+      expect(mockOnDebugMessage).toHaveBeenCalledWith(
+        `Path ${outsidePath} is not in the workspace and will be skipped.`,
+      );
+    });
+  });
+
   it("should not add the user's turn to history, as that is the caller's responsibility", async () => {
     // Arrange
     const fileContent = 'This is the file content.';
@@ -1085,7 +1214,7 @@ describe('handleAtCommand', () => {
       path.join(testRootDir, 'path', 'to', 'another-file.txt'),
       fileContent,
     );
-    const query = `A query with @${filePath}`;
+    const query = `A query with @${getRelativePath(filePath)}`;
 
     // Act
     await handleAtCommand({
diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.ts b/packages/cli/src/ui/hooks/atCommandProcessor.ts
index da2eb3eb56..18ec46a7a4 100644
--- a/packages/cli/src/ui/hooks/atCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/atCommandProcessor.ts
@@ -145,6 +145,7 @@ export async function handleAtCommand({
   const pathSpecsToRead: string[] = [];
   const atPathToResolvedSpecMap = new Map<string, string>();
   const contentLabelsForDisplay: string[] = [];
+  const absoluteToRelativePathMap = new Map<string, string>();
   const ignoredByReason: Record<string, string[]> = {
     git: [],
     gemini: [],
@@ -229,17 +230,30 @@ export async function handleAtCommand({
     for (const dir of config.getWorkspaceContext().getDirectories()) {
       let currentPathSpec = pathName;
       let resolvedSuccessfully = false;
+      let relativePath = pathName;
       try {
-        const absolutePath = path.resolve(dir, pathName);
+        const absolutePath = path.isAbsolute(pathName)
+          ? pathName
+          : path.resolve(dir, pathName);
         const stats = await fs.stat(absolutePath);
+
+        // Convert absolute path to relative path
+        relativePath = path.isAbsolute(pathName)
+          ? path.relative(dir, absolutePath)
+          : pathName;
+
         if (stats.isDirectory()) {
           currentPathSpec =
-            pathName + (pathName.endsWith(path.sep) ? `**` : `/**`);
+            relativePath + (relativePath.endsWith(path.sep) ? `**` : `/**`);
           onDebugMessage(
             `Path ${pathName} resolved to directory, using glob: ${currentPathSpec}`,
           );
         } else {
-          onDebugMessage(`Path ${pathName} resolved to file: ${absolutePath}`);
+          currentPathSpec = relativePath;
+          absoluteToRelativePathMap.set(absolutePath, relativePath);
+          onDebugMessage(
+            `Path ${pathName} resolved to file: ${absolutePath}, using relative path: ${relativePath}`,
+          );
         }
         resolvedSuccessfully = true;
       } catch (error) {
@@ -266,6 +280,10 @@ export async function handleAtCommand({
                 if (lines.length > 1 && lines[1]) {
                   const firstMatchAbsolute = lines[1].trim();
                   currentPathSpec = path.relative(dir, firstMatchAbsolute);
+                  absoluteToRelativePathMap.set(
+                    firstMatchAbsolute,
+                    currentPathSpec,
+                  );
                   onDebugMessage(
                     `Glob search for ${pathName} found ${firstMatchAbsolute}, using relative path: ${currentPathSpec}`,
                   );
@@ -305,7 +323,8 @@ export async function handleAtCommand({
       if (resolvedSuccessfully) {
         pathSpecsToRead.push(currentPathSpec);
         atPathToResolvedSpecMap.set(originalAtPath, currentPathSpec);
-        contentLabelsForDisplay.push(pathName);
+        const displayPath = path.isAbsolute(pathName) ? relativePath : pathName;
+        contentLabelsForDisplay.push(displayPath);
         break;
       }
     }
@@ -430,10 +449,27 @@ export async function handleAtCommand({
         if (typeof part === 'string') {
           const match = fileContentRegex.exec(part);
           if (match) {
-            const filePathSpecInContent = match[1]; // This is a resolved pathSpec
+            const filePathSpecInContent = match[1];
             const fileActualContent = match[2].trim();
+
+            let displayPath = absoluteToRelativePathMap.get(
+              filePathSpecInContent,
+            );
+
+            // Fallback: if no mapping found, try to convert absolute path to relative
+            if (!displayPath) {
+              for (const dir of config.getWorkspaceContext().getDirectories()) {
+                if (filePathSpecInContent.startsWith(dir)) {
+                  displayPath = path.relative(dir, filePathSpecInContent);
+                  break;
+                }
+              }
+            }
+
+            displayPath = displayPath || filePathSpecInContent;
+
             processedQueryParts.push({
-              text: `\nContent from @${filePathSpecInContent}:\n`,
+              text: `\nContent from @${displayPath}:\n`,
             });
             processedQueryParts.push({ text: fileActualContent });
           } else {

From 5d87a7f9c7105343ee52193ebfc4284a3d4d040b Mon Sep 17 00:00:00 2001
From: Tommaso Sciortino <sciortino@gmail.com>
Date: Tue, 28 Oct 2025 20:48:38 -0700
Subject: [PATCH 71/73] Remove Todo Icon (#12190)

---
 packages/cli/src/ui/components/messages/Todo.tsx   |  2 +-
 .../messages/__snapshots__/Todo.test.tsx.snap      | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/packages/cli/src/ui/components/messages/Todo.tsx b/packages/cli/src/ui/components/messages/Todo.tsx
index a7d630a93d..73ba528972 100644
--- a/packages/cli/src/ui/components/messages/Todo.tsx
+++ b/packages/cli/src/ui/components/messages/Todo.tsx
@@ -34,7 +34,7 @@ const TodoTitleDisplay: React.FC<{ todos: TodoList }> = ({ todos }) => {
   return (
     <Box flexDirection="row" columnGap={2} height={1}>
       <Text color={theme.text.primary} bold aria-label="Todo list">
-        📝 Todo
+        Todo
       </Text>
       <Text color={theme.text.secondary}>{score} (ctrl+t to toggle)</Text>
     </Box>
diff --git a/packages/cli/src/ui/components/messages/__snapshots__/Todo.test.tsx.snap b/packages/cli/src/ui/components/messages/__snapshots__/Todo.test.tsx.snap
index a7001a4eda..873bf4d57b 100644
--- a/packages/cli/src/ui/components/messages/__snapshots__/Todo.test.tsx.snap
+++ b/packages/cli/src/ui/components/messages/__snapshots__/Todo.test.tsx.snap
@@ -2,7 +2,7 @@
 
 exports[`<TodoTray /> > renders a todo list with long descriptions that wrap when full view is on 1`] = `
 "──────────────────────────────────────────────────
- 📝 Todo  1/2 (ctrl+t to toggle)
+ Todo  1/2 (ctrl+t to toggle)
 
  » This is a very long description for a pending
    task that should wrap around multiple lines
@@ -13,7 +13,7 @@ exports[`<TodoTray /> > renders a todo list with long descriptions that wrap whe
 
 exports[`<TodoTray /> > renders a todo list with long descriptions that wrap when full view is on 2`] = `
 "──────────────────────────────────────────────────
- 📝 Todo  1/2 (ctrl+t to toggle) » This is a ver…"
+ Todo  1/2 (ctrl+t to toggle) » This is a very l…"
 `;
 
 exports[`<TodoTray /> > renders null when no todos are in the history 1`] = `""`;
@@ -26,7 +26,7 @@ exports[`<TodoTray /> > renders null when todo list is empty 2`] = `""`;
 
 exports[`<TodoTray /> > renders the most recent todo list when multiple write_todos calls are in history 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
- 📝 Todo  0/2 (ctrl+t to toggle)
+ Todo  0/2 (ctrl+t to toggle)
 
  ☐ Newer Task 1
  » Newer Task 2"
@@ -34,7 +34,7 @@ exports[`<TodoTray /> > renders the most recent todo list when multiple write_to
 
 exports[`<TodoTray /> > renders when todos exist and one is in progress 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
- 📝 Todo  1/3 (ctrl+t to toggle)
+ Todo  1/3 (ctrl+t to toggle)
 
  ☐ Pending Task
  » Task 2
@@ -44,12 +44,12 @@ exports[`<TodoTray /> > renders when todos exist and one is in progress 1`] = `
 
 exports[`<TodoTray /> > renders when todos exist and one is in progress 2`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
- 📝 Todo  1/3 (ctrl+t to toggle) » Task 2"
+ Todo  1/3 (ctrl+t to toggle) » Task 2"
 `;
 
 exports[`<TodoTray /> > renders when todos exist but none are in progress 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
- 📝 Todo  1/2 (ctrl+t to toggle)
+ Todo  1/2 (ctrl+t to toggle)
 
  ☐ Pending Task
  ✗ In Progress Task
@@ -58,5 +58,5 @@ exports[`<TodoTray /> > renders when todos exist but none are in progress 1`] =
 
 exports[`<TodoTray /> > renders when todos exist but none are in progress 2`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
- 📝 Todo  1/2 (ctrl+t to toggle)"
+ Todo  1/2 (ctrl+t to toggle)"
 `;

From 372b5887789d92eff52e6c4e5ffb60cb2476ba8b Mon Sep 17 00:00:00 2001
From: gemini-cli-robot <gemini-cli-robot@google.com>
Date: Tue, 28 Oct 2025 21:53:10 -0700
Subject: [PATCH 72/73] chore(release): bump version to
 0.13.0-nightly.20251029.cca41edc (#12191)

Co-authored-by: matt korwel <matt.korwel@gmail.com>
---
 package-lock.json                          | 14 +++++++-------
 package.json                               |  4 ++--
 packages/a2a-server/package.json           |  2 +-
 packages/cli/package.json                  |  4 ++--
 packages/core/package.json                 |  2 +-
 packages/test-utils/package.json           |  2 +-
 packages/vscode-ide-companion/package.json |  2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index a0e554676c..5ef0dfe882 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "@google/gemini-cli",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@google/gemini-cli",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "workspaces": [
         "packages/*"
       ],
@@ -17434,7 +17434,7 @@
     },
     "packages/a2a-server": {
       "name": "@google/gemini-cli-a2a-server",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "dependencies": {
         "@a2a-js/sdk": "^0.3.2",
         "@google-cloud/storage": "^7.16.0",
@@ -17708,7 +17708,7 @@
     },
     "packages/cli": {
       "name": "@google/gemini-cli",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "dependencies": {
         "@google/gemini-cli-core": "file:../core",
         "@google/genai": "1.16.0",
@@ -17828,7 +17828,7 @@
     },
     "packages/core": {
       "name": "@google/gemini-cli-core",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "dependencies": {
         "@google-cloud/logging": "^11.2.1",
         "@google-cloud/opentelemetry-cloud-monitoring-exporter": "^0.21.0",
@@ -17969,7 +17969,7 @@
     },
     "packages/test-utils": {
       "name": "@google/gemini-cli-test-utils",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "license": "Apache-2.0",
       "devDependencies": {
         "typescript": "^5.3.3"
@@ -17980,7 +17980,7 @@
     },
     "packages/vscode-ide-companion": {
       "name": "gemini-cli-vscode-ide-companion",
-      "version": "0.12.0-nightly.20251022.0542de95",
+      "version": "0.13.0-nightly.20251029.cca41edc",
       "license": "LICENSE",
       "dependencies": {
         "@modelcontextprotocol/sdk": "^1.15.1",
diff --git a/package.json b/package.json
index b283480a3f..3275a0f360 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@google/gemini-cli",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "engines": {
     "node": ">=20.0.0"
   },
@@ -14,7 +14,7 @@
     "url": "git+https://github.com/google-gemini/gemini-cli.git"
   },
   "config": {
-    "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.12.0-nightly.20251022.0542de95"
+    "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.13.0-nightly.20251029.cca41edc"
   },
   "scripts": {
     "start": "cross-env NODE_ENV=development node scripts/start.js",
diff --git a/packages/a2a-server/package.json b/packages/a2a-server/package.json
index ec897db298..049fa23b48 100644
--- a/packages/a2a-server/package.json
+++ b/packages/a2a-server/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@google/gemini-cli-a2a-server",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "description": "Gemini CLI A2A Server",
   "repository": {
     "type": "git",
diff --git a/packages/cli/package.json b/packages/cli/package.json
index df73c1496b..32d38f0977 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@google/gemini-cli",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "description": "Gemini CLI",
   "repository": {
     "type": "git",
@@ -25,7 +25,7 @@
     "dist"
   ],
   "config": {
-    "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.12.0-nightly.20251022.0542de95"
+    "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.13.0-nightly.20251029.cca41edc"
   },
   "dependencies": {
     "@google/gemini-cli-core": "file:../core",
diff --git a/packages/core/package.json b/packages/core/package.json
index 10adf56555..307d2defa2 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@google/gemini-cli-core",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "description": "Gemini CLI Core",
   "repository": {
     "type": "git",
diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json
index 7c3e5ba2d2..3200c967cf 100644
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@google/gemini-cli-test-utils",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "private": true,
   "main": "src/index.ts",
   "license": "Apache-2.0",
diff --git a/packages/vscode-ide-companion/package.json b/packages/vscode-ide-companion/package.json
index 82c54e1601..e0374e8239 100644
--- a/packages/vscode-ide-companion/package.json
+++ b/packages/vscode-ide-companion/package.json
@@ -2,7 +2,7 @@
   "name": "gemini-cli-vscode-ide-companion",
   "displayName": "Gemini CLI Companion",
   "description": "Enable Gemini CLI with direct access to your IDE workspace.",
-  "version": "0.12.0-nightly.20251022.0542de95",
+  "version": "0.13.0-nightly.20251029.cca41edc",
   "publisher": "google",
   "icon": "assets/icon.png",
   "repository": {

From 6e026bd9500d0ce5045b2e952daedf8c4af60324 Mon Sep 17 00:00:00 2001
From: shishu314 <shishu_1998@yahoo.com>
Date: Wed, 29 Oct 2025 10:49:46 -0400
Subject: [PATCH 73/73] fix(security) - Use emitFeedback instead of console
 error (#11954)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com>
---
 .../oauth-credential-storage.test.ts          | 32 ++++++++++++++++---
 .../code_assist/oauth-credential-storage.ts   | 17 +++++++---
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/packages/core/src/code_assist/oauth-credential-storage.test.ts b/packages/core/src/code_assist/oauth-credential-storage.test.ts
index c555b923e6..f588742495 100644
--- a/packages/core/src/code_assist/oauth-credential-storage.test.ts
+++ b/packages/core/src/code_assist/oauth-credential-storage.test.ts
@@ -8,6 +8,7 @@ import { type Credentials } from 'google-auth-library';
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { OAuthCredentialStorage } from './oauth-credential-storage.js';
 import type { OAuthCredentials } from '../mcp/token-storage/types.js';
+import { coreEvents } from '@google/gemini-cli-core';
 
 import * as path from 'node:path';
 import * as os from 'node:os';
@@ -30,6 +31,11 @@ vi.mock('node:fs', () => ({
 }));
 vi.mock('node:os');
 vi.mock('node:path');
+vi.mock('@google/gemini-cli-core', () => ({
+  coreEvents: {
+    emitFeedback: vi.fn(),
+  },
+}));
 
 describe('OAuthCredentialStorage', () => {
   const mockCredentials: Credentials = {
@@ -119,26 +125,36 @@ describe('OAuthCredentialStorage', () => {
     });
 
     it('should throw an error if loading fails', async () => {
+      const mockError = new Error('HybridTokenStorage error');
       vi.spyOn(mockHybridTokenStorage, 'getCredentials').mockRejectedValue(
-        new Error('Loading error'),
+        mockError,
       );
 
       await expect(OAuthCredentialStorage.loadCredentials()).rejects.toThrow(
         'Failed to load OAuth credentials',
       );
+      expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+        'error',
+        'Failed to load OAuth credentials',
+        mockError,
+      );
     });
 
     it('should throw an error if read file fails', async () => {
+      const mockError = new Error('Permission denied');
       vi.spyOn(mockHybridTokenStorage, 'getCredentials').mockResolvedValue(
         null,
       );
-      vi.spyOn(fs, 'readFile').mockRejectedValue(
-        new Error('Permission denied'),
-      );
+      vi.spyOn(fs, 'readFile').mockRejectedValue(mockError);
 
       await expect(OAuthCredentialStorage.loadCredentials()).rejects.toThrow(
         'Failed to load OAuth credentials',
       );
+      expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+        'error',
+        'Failed to load OAuth credentials',
+        mockError,
+      );
     });
 
     it('should not throw error if migration file removal failed', async () => {
@@ -205,13 +221,19 @@ describe('OAuthCredentialStorage', () => {
     });
 
     it('should throw an error if clearing from HybridTokenStorage fails', async () => {
+      const mockError = new Error('Deletion error');
       vi.spyOn(mockHybridTokenStorage, 'deleteCredentials').mockRejectedValue(
-        new Error('Deletion error'),
+        mockError,
       );
 
       await expect(OAuthCredentialStorage.clearCredentials()).rejects.toThrow(
         'Failed to clear OAuth credentials',
       );
+      expect(coreEvents.emitFeedback).toHaveBeenCalledWith(
+        'error',
+        'Failed to clear OAuth credentials',
+        mockError,
+      );
     });
   });
 });
diff --git a/packages/core/src/code_assist/oauth-credential-storage.ts b/packages/core/src/code_assist/oauth-credential-storage.ts
index b1ed9b0e82..0165a5647e 100644
--- a/packages/core/src/code_assist/oauth-credential-storage.ts
+++ b/packages/core/src/code_assist/oauth-credential-storage.ts
@@ -12,6 +12,7 @@ import * as path from 'node:path';
 import * as os from 'node:os';
 import { promises as fs } from 'node:fs';
 import { GEMINI_DIR } from '../utils/paths.js';
+import { coreEvents } from '@google/gemini-cli-core';
 
 const KEYCHAIN_SERVICE_NAME = 'gemini-cli-oauth';
 const MAIN_ACCOUNT_KEY = 'main-account';
@@ -49,8 +50,12 @@ export class OAuthCredentialStorage {
       // Fallback: Try to migrate from old file-based storage
       return await this.migrateFromFileStorage();
     } catch (error: unknown) {
-      console.error(error);
-      throw new Error('Failed to load OAuth credentials');
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to load OAuth credentials',
+        error,
+      );
+      throw new Error('Failed to load OAuth credentials', { cause: error });
     }
   }
 
@@ -89,8 +94,12 @@ export class OAuthCredentialStorage {
       const oldFilePath = path.join(os.homedir(), GEMINI_DIR, OAUTH_FILE);
       await fs.rm(oldFilePath, { force: true }).catch(() => {});
     } catch (error: unknown) {
-      console.error(error);
-      throw new Error('Failed to clear OAuth credentials');
+      coreEvents.emitFeedback(
+        'error',
+        'Failed to clear OAuth credentials',
+        error,
+      );
+      throw new Error('Failed to clear OAuth credentials', { cause: error });
     }
   }