From 2e0641c83b012042ccbc012d420cfe6a5d46fdd7 Mon Sep 17 00:00:00 2001
From: Abhijit Balaji <abhijitbalaji@google.com>
Date: Fri, 24 Apr 2026 14:29:38 -0700
Subject: [PATCH] feat(voice): implement real-time voice mode with cloud and
 local backends (#24174)

---
 .gemini/settings.json                         |   5 +-
 Dockerfile                                    |   4 +-
 docs/cli/settings.md                          |  33 +-
 docs/reference/configuration.md               |  26 ++
 docs/reference/keyboard-shortcuts.md          |   1 +
 integration-tests/voice-mode.test.ts          |  76 ++++
 package-lock.json                             |   1 +
 packages/cli/src/config/config.ts             |   1 +
 packages/cli/src/config/settingsSchema.ts     |  81 ++++
 .../src/services/BuiltinCommandLoader.test.ts |   2 +
 .../cli/src/services/BuiltinCommandLoader.ts  |   2 +
 packages/cli/src/test-utils/render.tsx        |   3 +
 packages/cli/src/ui/AppContainer.tsx          |  24 +
 packages/cli/src/ui/commands/types.ts         |   2 +
 packages/cli/src/ui/commands/voiceCommand.ts  |  30 ++
 .../cli/src/ui/components/DialogManager.tsx   |   4 +
 .../src/ui/components/InputPrompt.test.tsx    | 407 +++++++++++++++++
 .../cli/src/ui/components/InputPrompt.tsx     |  69 ++-
 .../src/ui/components/VoiceModelDialog.tsx    | 236 ++++++++++
 .../__snapshots__/InputPrompt.test.tsx.snap   |   7 -
 .../cli/src/ui/contexts/UIActionsContext.tsx  |   3 +
 .../cli/src/ui/contexts/UIStateContext.tsx    |   2 +
 .../ui/hooks/slashCommandProcessor.test.tsx   |   2 +
 .../cli/src/ui/hooks/slashCommandProcessor.ts |   6 +
 packages/cli/src/ui/hooks/useVoiceMode.ts     | 429 ++++++++++++++++++
 .../cli/src/ui/hooks/useVoiceModelCommand.ts  |  31 ++
 packages/cli/src/ui/key/keyBindings.ts        |  11 +-
 .../src/ui/noninteractive/nonInteractiveUi.ts |   1 +
 packages/core/package.json                    |   1 +
 packages/core/src/config/config.ts            |   7 +
 packages/core/src/index.ts                    |   9 +
 packages/core/src/utils/binaryCheck.ts        |  14 +
 packages/core/src/voice/audioRecorder.ts      | 115 +++++
 .../voice/geminiLiveTranscriptionProvider.ts  | 178 ++++++++
 .../core/src/voice/transcriptionFactory.ts    |  41 ++
 .../core/src/voice/transcriptionProvider.ts   |  33 ++
 .../core/src/voice/whisperModelManager.ts     | 107 +++++
 .../whisperTranscriptionProvider.test.ts      |  31 ++
 .../src/voice/whisperTranscriptionProvider.ts | 199 ++++++++
 schemas/settings.schema.json                  |  53 +++
 40 files changed, 2244 insertions(+), 43 deletions(-)
 create mode 100644 integration-tests/voice-mode.test.ts
 create mode 100644 packages/cli/src/ui/commands/voiceCommand.ts
 create mode 100644 packages/cli/src/ui/components/VoiceModelDialog.tsx
 create mode 100644 packages/cli/src/ui/hooks/useVoiceMode.ts
 create mode 100644 packages/cli/src/ui/hooks/useVoiceModelCommand.ts
 create mode 100644 packages/core/src/utils/binaryCheck.ts
 create mode 100644 packages/core/src/voice/audioRecorder.ts
 create mode 100644 packages/core/src/voice/geminiLiveTranscriptionProvider.ts
 create mode 100644 packages/core/src/voice/transcriptionFactory.ts
 create mode 100644 packages/core/src/voice/transcriptionProvider.ts
 create mode 100644 packages/core/src/voice/whisperModelManager.ts
 create mode 100644 packages/core/src/voice/whisperTranscriptionProvider.test.ts
 create mode 100644 packages/core/src/voice/whisperTranscriptionProvider.ts

diff --git a/.gemini/settings.json b/.gemini/settings.json
index 4ad7bc3ed6..e7ff785b7c 100644
--- a/.gemini/settings.json
+++ b/.gemini/settings.json
@@ -3,7 +3,10 @@
     "extensionReloading": true,
     "modelSteering": true,
     "autoMemory": true,
-    "gemma": true
+    "gemma": true,
+    "memoryManager": true,
+    "topicUpdateNarration": true,
+    "voiceMode": true
   },
   "general": {
     "devtools": true
diff --git a/Dockerfile b/Dockerfile
index 25d27d46c6..44ba343902 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,8 +40,8 @@ ENV PATH=$PATH:/usr/local/share/npm-global/bin
 USER node
 
 # install gemini-cli and clean up
-COPY packages/cli/dist/google-gemini-cli-*.tgz /tmp/gemini-cli.tgz
-COPY packages/core/dist/google-gemini-cli-core-*.tgz /tmp/gemini-core.tgz
+COPY --chown=node:node packages/cli/dist/google-gemini-cli-*.tgz /tmp/gemini-cli.tgz
+COPY --chown=node:node packages/core/dist/google-gemini-cli-core-*.tgz /tmp/gemini-core.tgz
 RUN npm install -g /tmp/gemini-core.tgz \
   && npm install -g /tmp/gemini-cli.tgz \
   && node -e "const fs=require('node:fs'); JSON.parse(fs.readFileSync('/usr/local/share/npm-global/lib/node_modules/@google/gemini-cli/package.json','utf8')); JSON.parse(fs.readFileSync('/usr/local/share/npm-global/lib/node_modules/@google/gemini-cli-core/package.json','utf8'));" \
diff --git a/docs/cli/settings.md b/docs/cli/settings.md
index 10bfee644f..834750fdf9 100644
--- a/docs/cli/settings.md
+++ b/docs/cli/settings.md
@@ -161,20 +161,25 @@ they appear in the UI.
 
 ### Experimental
 
-| UI Label                                             | Setting                                         | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | Default |
-| ---------------------------------------------------- | ----------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
-| Gemma Models                                         | `experimental.gemma`                            | Enable access to Gemma 4 models (experimental).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false` |
-| Enable Git Worktrees                                 | `experimental.worktrees`                        | Enable automated Git worktree management for parallel work.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `false` |
-| Use OSC 52 Paste                                     | `experimental.useOSC52Paste`                    | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).                                                                                                                                                                                                                                                                                                                                                                                                                                                            | `false` |
-| Use OSC 52 Copy                                      | `experimental.useOSC52Copy`                     | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).                                                                                                                                                                                                                                                                                                                                                                                                                                                            | `false` |
-| Model Steering                                       | `experimental.modelSteering`                    | Enable model steering (user hints) to guide the model during tool execution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false` |
-| Direct Web Fetch                                     | `experimental.directWebFetch`                   | Enable web fetch behavior that bypasses LLM summarization.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `false` |
-| Enable Gemma Model Router                            | `experimental.gemmaModelRouter.enabled`         | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `false` |
-| Auto-start LiteRT Server                             | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `false` |
-| Memory v2                                            | `experimental.memoryV2`                         | Disable the built-in save_memory tool and let the main agent persist project context by editing markdown files directly with edit/write_file. Route facts across four tiers: team-shared conventions go to project GEMINI.md files, project-specific personal notes go to the per-project private memory folder (MEMORY.md as index + sibling .md files for detail), and cross-project personal preferences go to the global ~/.gemini/GEMINI.md (the only file under ~/.gemini/ that the agent can edit — settings, credentials, etc. remain off-limits). Set to false to fall back to the legacy save_memory tool. | `true`  |
-| Auto Memory                                          | `experimental.autoMemory`                       | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `false` |
-| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile`                | Suitable for general coding and software development tasks.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `false` |
-| Enable Context Management                            | `experimental.contextManagement`                | Enable logic for context management.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false` |
+| UI Label                                             | Setting                                         | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | Default              |
+| ---------------------------------------------------- | ----------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------- |
+| Gemma Models                                         | `experimental.gemma`                            | Enable access to Gemma 4 models (experimental).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false`              |
+| Voice Mode                                           | `experimental.voiceMode`                        | Enable experimental voice dictation and commands (/voice, /voice model).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | `false`              |
+| Voice Activation Mode                                | `experimental.voice.activationMode`             | How to trigger voice recording with the Space key.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | `"push-to-talk"`     |
+| Voice Transcription Backend                          | `experimental.voice.backend`                    | The backend to use for voice transcription.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `"gemini-live"`      |
+| Whisper Model                                        | `experimental.voice.whisperModel`               | The Whisper model to use for local transcription.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | `"ggml-base.en.bin"` |
+| Voice Stop Grace Period (ms)                         | `experimental.voice.stopGracePeriodMs`          | How long to wait for final transcription after stopping recording.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | `1000`               |
+| Enable Git Worktrees                                 | `experimental.worktrees`                        | Enable automated Git worktree management for parallel work.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `false`              |
+| Use OSC 52 Paste                                     | `experimental.useOSC52Paste`                    | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).                                                                                                                                                                                                                                                                                                                                                                                                                                                            | `false`              |
+| Use OSC 52 Copy                                      | `experimental.useOSC52Copy`                     | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).                                                                                                                                                                                                                                                                                                                                                                                                                                                            | `false`              |
+| Model Steering                                       | `experimental.modelSteering`                    | Enable model steering (user hints) to guide the model during tool execution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false`              |
+| Direct Web Fetch                                     | `experimental.directWebFetch`                   | Enable web fetch behavior that bypasses LLM summarization.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `false`              |
+| Enable Gemma Model Router                            | `experimental.gemmaModelRouter.enabled`         | Enable the Gemma Model Router (experimental). Requires a local endpoint serving Gemma via the Gemini API using LiteRT-LM shim.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `false`              |
+| Auto-start LiteRT Server                             | `experimental.gemmaModelRouter.autoStartServer` | Automatically start the LiteRT-LM server when Gemini CLI starts and the Gemma router is enabled.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `false`              |
+| Memory v2                                            | `experimental.memoryV2`                         | Disable the built-in save_memory tool and let the main agent persist project context by editing markdown files directly with edit/write_file. Route facts across four tiers: team-shared conventions go to project GEMINI.md files, project-specific personal notes go to the per-project private memory folder (MEMORY.md as index + sibling .md files for detail), and cross-project personal preferences go to the global ~/.gemini/GEMINI.md (the only file under ~/.gemini/ that the agent can edit — settings, credentials, etc. remain off-limits). Set to false to fall back to the legacy save_memory tool. | `true`               |
+| Auto Memory                                          | `experimental.autoMemory`                       | Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `false`              |
+| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile`                | Suitable for general coding and software development tasks.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `false`              |
+| Enable Context Management                            | `experimental.contextManagement`                | Enable logic for context management.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false`              |
 
 ### Skills
 
diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index b2d8955d5f..94a64cc197 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -1691,6 +1691,32 @@ their corresponding top-level category object in your `settings.json` file.
   - **Default:** `false`
   - **Requires restart:** Yes
 
+- **`experimental.voiceMode`** (boolean):
+  - **Description:** Enable experimental voice dictation and commands (/voice,
+    /voice model).
+  - **Default:** `false`
+
+- **`experimental.voice.activationMode`** (enum):
+  - **Description:** How to trigger voice recording with the Space key.
+  - **Default:** `"push-to-talk"`
+  - **Values:** `"push-to-talk"`, `"toggle"`
+
+- **`experimental.voice.backend`** (enum):
+  - **Description:** The backend to use for voice transcription.
+  - **Default:** `"gemini-live"`
+  - **Values:** `"gemini-live"`, `"whisper"`
+
+- **`experimental.voice.whisperModel`** (enum):
+  - **Description:** The Whisper model to use for local transcription.
+  - **Default:** `"ggml-base.en.bin"`
+  - **Values:** `"ggml-tiny.en.bin"`, `"ggml-base.en.bin"`,
+    `"ggml-large-v3-turbo-q5_0.bin"`, `"ggml-large-v3-turbo-q8_0.bin"`
+
+- **`experimental.voice.stopGracePeriodMs`** (number):
+  - **Description:** How long to wait for final transcription after stopping
+    recording.
+  - **Default:** `1000`
+
 - **`experimental.adk.agentSessionNoninteractiveEnabled`** (boolean):
   - **Description:** Enable non-interactive agent sessions.
   - **Default:** `false`
diff --git a/docs/reference/keyboard-shortcuts.md b/docs/reference/keyboard-shortcuts.md
index 98d31c0ae2..6f7a8cce4a 100644
--- a/docs/reference/keyboard-shortcuts.md
+++ b/docs/reference/keyboard-shortcuts.md
@@ -115,6 +115,7 @@ available combinations.
 | `app.restart`                 | Restart the application.                                                                                                                           | `R`<br />`Shift+R` |
 | `app.suspend`                 | Suspend the CLI and move it to the background.                                                                                                     | `Ctrl+Z`           |
 | `app.showShellUnfocusWarning` | Show warning when trying to move focus away from shell input.                                                                                      | `Tab`              |
+| `app.voiceModePTT`            | Hold to speak in Voice Mode.                                                                                                                       | `Space`            |
 
 #### Background Shell Controls
 
diff --git a/integration-tests/voice-mode.test.ts b/integration-tests/voice-mode.test.ts
new file mode 100644
index 0000000000..49844494a8
--- /dev/null
+++ b/integration-tests/voice-mode.test.ts
@@ -0,0 +1,76 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { TestRig } from './test-helper.js';
+import {
+  WhisperModelManager,
+  WhisperTranscriptionProvider,
+} from '@google/gemini-cli-core';
+import * as fs from 'node:fs';
+import commandExists from 'command-exists';
+
+describe('Voice Mode Integration', () => {
+  let rig: TestRig;
+
+  beforeEach(() => {
+    rig = new TestRig();
+  });
+
+  afterEach(async () => await rig.cleanup());
+
+  it('should be able to download tiny whisper model', async () => {
+    // This test doesn't require the binary, only network access.
+    // However, it's slow and downloads 75MB. We'll keep it for now but
+    // wrap it in a try-catch to avoid failing on network flakiness in CI.
+    const manager = new WhisperModelManager();
+    const modelName = 'ggml-tiny.en.bin';
+
+    try {
+      // Cleanup if already exists to ensure we actually test download
+      const modelPath = manager.getModelPath(modelName);
+      if (fs.existsSync(modelPath)) {
+        fs.unlinkSync(modelPath);
+      }
+
+      await manager.downloadModel(modelName);
+      expect(fs.existsSync(modelPath)).toBe(true);
+      expect(fs.statSync(modelPath).size).toBeGreaterThan(70 * 1024 * 1024); // ~75MB
+    } catch (e) {
+      console.warn(
+        'Skipping whisper model download test due to error (possibly network):',
+        e,
+      );
+    }
+  }, 300000); // 5 min timeout for download
+
+  it('should initialize WhisperTranscriptionProvider and handle process', async () => {
+    // Skip this test if whisper-stream is not installed (typical for CI)
+    try {
+      await commandExists('whisper-stream');
+    } catch {
+      console.log(
+        'Skipping Whisper transcription test: whisper-stream not found',
+      );
+      return;
+    }
+
+    const manager = new WhisperModelManager();
+    const modelName = 'ggml-tiny.en.bin';
+    if (!manager.isModelInstalled(modelName)) {
+      await manager.downloadModel(modelName);
+    }
+
+    const provider = new WhisperTranscriptionProvider({
+      modelPath: manager.getModelPath(modelName),
+    });
+
+    // Since we can't easily provide real mic input in CI,
+    // we just verify it can start and be disconnected.
+    await provider.connect();
+    provider.disconnect();
+  });
+});
diff --git a/package-lock.json b/package-lock.json
index 89a358ef9e..d3b8904735 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18390,6 +18390,7 @@
         "ajv-formats": "^3.0.0",
         "chardet": "^2.1.0",
         "chokidar": "^5.0.0",
+        "command-exists": "^1.2.9",
         "diff": "^8.0.3",
         "dotenv": "^17.2.4",
         "dotenv-expand": "^12.0.3",
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 6b99a3606d..1ce93cf9ff 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -1000,6 +1000,7 @@ export async function loadCliConfig(
     enableExtensionReloading: settings.experimental?.extensionReloading,
     enableAgents: settings.experimental?.enableAgents,
     plan: settings.general?.plan?.enabled ?? true,
+    voiceMode: settings.experimental?.voiceMode,
     tracker: settings.experimental?.taskTracker,
     directWebFetch: settings.experimental?.directWebFetch,
     planSettings: settings.general?.plan?.directory
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index 2b6c959397..08edbd66f9 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -2061,6 +2061,87 @@ const SETTINGS_SCHEMA = {
         description: 'Enable access to Gemma 4 models (experimental).',
         showInDialog: true,
       },
+      voiceMode: {
+        type: 'boolean',
+        label: 'Voice Mode',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: false,
+        description:
+          'Enable experimental voice dictation and commands (/voice, /voice model).',
+        showInDialog: true,
+      },
+      voice: {
+        type: 'object',
+        label: 'Voice',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: {},
+        description: 'Settings for voice mode and transcription.',
+        showInDialog: false,
+        properties: {
+          activationMode: {
+            type: 'enum',
+            label: 'Voice Activation Mode',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'push-to-talk',
+            description: 'How to trigger voice recording with the Space key.',
+            showInDialog: true,
+            options: [
+              { value: 'push-to-talk', label: 'Push-To-Talk (Hold Space)' },
+              { value: 'toggle', label: 'Toggle (Press Space to start/stop)' },
+            ],
+          },
+          backend: {
+            type: 'enum',
+            label: 'Voice Transcription Backend',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'gemini-live',
+            description: 'The backend to use for voice transcription.',
+            showInDialog: true,
+            options: [
+              { value: 'gemini-live', label: 'Gemini Live API (Cloud)' },
+              { value: 'whisper', label: 'Whisper (Local)' },
+            ],
+          },
+          whisperModel: {
+            type: 'enum',
+            label: 'Whisper Model',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'ggml-base.en.bin',
+            description: 'The Whisper model to use for local transcription.',
+            showInDialog: true,
+            options: [
+              { value: 'ggml-tiny.en.bin', label: 'Tiny (EN) - Fast (~75MB)' },
+              {
+                value: 'ggml-base.en.bin',
+                label: 'Base (EN) - Balanced (~142MB)',
+              },
+              {
+                value: 'ggml-large-v3-turbo-q5_0.bin',
+                label: 'Large v3 Turbo (Q5_0) - High Accuracy (~547MB)',
+              },
+              {
+                value: 'ggml-large-v3-turbo-q8_0.bin',
+                label: 'Large v3 Turbo (Q8_0) - Max Accuracy (~834MB)',
+              },
+            ],
+          },
+          stopGracePeriodMs: {
+            type: 'number',
+            label: 'Voice Stop Grace Period (ms)',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 1000,
+            description:
+              'How long to wait for final transcription after stopping recording.',
+            showInDialog: true,
+          },
+        },
+      },
       adk: {
         type: 'object',
         label: 'ADK',
diff --git a/packages/cli/src/services/BuiltinCommandLoader.test.ts b/packages/cli/src/services/BuiltinCommandLoader.test.ts
index f166c161cd..d53273134c 100644
--- a/packages/cli/src/services/BuiltinCommandLoader.test.ts
+++ b/packages/cli/src/services/BuiltinCommandLoader.test.ts
@@ -170,6 +170,7 @@ describe('BuiltinCommandLoader', () => {
         getAllSkills: vi.fn().mockReturnValue([]),
         isAdminEnabled: vi.fn().mockReturnValue(true),
       }),
+      isVoiceModeEnabled: vi.fn().mockReturnValue(true),
       getContentGeneratorConfig: vi.fn().mockReturnValue({
         authType: 'other',
       }),
@@ -396,6 +397,7 @@ describe('BuiltinCommandLoader profile', () => {
         getAllSkills: vi.fn().mockReturnValue([]),
         isAdminEnabled: vi.fn().mockReturnValue(true),
       }),
+      isVoiceModeEnabled: vi.fn().mockReturnValue(true),
       getContentGeneratorConfig: vi.fn().mockReturnValue({
         authType: 'other',
       }),
diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts
index 94b5986eb3..1c5288707c 100644
--- a/packages/cli/src/services/BuiltinCommandLoader.ts
+++ b/packages/cli/src/services/BuiltinCommandLoader.ts
@@ -62,6 +62,7 @@ import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
 import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
 import { upgradeCommand } from '../ui/commands/upgradeCommand.js';
 import { gemmaStatusCommand } from '../ui/commands/gemmaStatusCommand.js';
+import { voiceCommand } from '../ui/commands/voiceCommand.js';
 
 /**
  * Loads the core, hard-coded slash commands that are an integral part
@@ -227,6 +228,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
       vimCommand,
       setupGithubCommand,
       terminalSetupCommand,
+      ...(this.config?.isVoiceModeEnabled() ? [voiceCommand] : []),
       ...(this.config?.getContentGeneratorConfig()?.authType ===
       AuthType.LOGIN_WITH_GOOGLE
         ? [upgradeCommand]
diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx
index a9f786f11c..83e69d6663 100644
--- a/packages/cli/src/test-utils/render.tsx
+++ b/packages/cli/src/test-utils/render.tsx
@@ -552,6 +552,8 @@ const mockUIActions: UIActions = {
   exitPrivacyNotice: vi.fn(),
   closeSettingsDialog: vi.fn(),
   closeModelDialog: vi.fn(),
+  openVoiceModelDialog: vi.fn(),
+  closeVoiceModelDialog: vi.fn(),
   openAgentConfigDialog: vi.fn(),
   closeAgentConfigDialog: vi.fn(),
   openPermissionsDialog: vi.fn(),
@@ -598,6 +600,7 @@ const mockUIActions: UIActions = {
   handleNewAgentsSelect: vi.fn(),
   getPreferredEditor: vi.fn(),
   clearAccountSuspension: vi.fn(),
+  setVoiceModeEnabled: vi.fn(),
 };
 
 import { type TextBuffer } from '../ui/components/shared/text-buffer.js';
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index fdbaf57fbe..f5294ae23e 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -103,6 +103,7 @@ import { useQuotaAndFallback } from './hooks/useQuotaAndFallback.js';
 import { useEditorSettings } from './hooks/useEditorSettings.js';
 import { useSettingsCommand } from './hooks/useSettingsCommand.js';
 import { useModelCommand } from './hooks/useModelCommand.js';
+import { useVoiceModelCommand } from './hooks/useVoiceModelCommand.js';
 import { useSlashCommandProcessor } from './hooks/slashCommandProcessor.js';
 import { useVimMode } from './contexts/VimModeContext.js';
 import {
@@ -312,6 +313,7 @@ export const AppContainer = (props: AppContainerProps) => {
   );
 
   const [shellModeActive, setShellModeActive] = useState(false);
+  const [isVoiceModeEnabled, setVoiceModeEnabled] = useState(false);
   const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
     useState<boolean>(false);
   const [historyRemountKey, setHistoryRemountKey] = useState(0);
@@ -946,6 +948,12 @@ Logging in with Google... Restarting Gemini CLI to continue.
   const { isModelDialogOpen, openModelDialog, closeModelDialog } =
     useModelCommand();
 
+  const {
+    isVoiceModelDialogOpen,
+    openVoiceModelDialog,
+    closeVoiceModelDialog,
+  } = useVoiceModelCommand();
+
   const { toggleVimEnabled } = useVimMode();
 
   const setIsBackgroundTaskListOpenRef = useRef<(open: boolean) => void>(
@@ -969,6 +977,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       openSettingsDialog,
       openSessionBrowser,
       openModelDialog,
+      openVoiceModelDialog,
       openAgentConfigDialog,
       openPermissionsDialog,
       quit: (messages: HistoryItem[]) => {
@@ -981,6 +990,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       },
       setDebugMessage,
       toggleCorgiMode: () => setCorgiMode((prev) => !prev),
+      toggleVoiceMode: () => setVoiceModeEnabled((prev) => !prev),
       toggleDebugProfiler,
       dispatchExtensionStateUpdate,
       addConfirmUpdateExtensionRequest,
@@ -1006,6 +1016,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       openSettingsDialog,
       openSessionBrowser,
       openModelDialog,
+      openVoiceModelDialog,
       openAgentConfigDialog,
       setQuittingMessages,
       setDebugMessage,
@@ -2191,6 +2202,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
     isThemeDialogOpen ||
     isSettingsDialogOpen ||
     isModelDialogOpen ||
+    isVoiceModelDialogOpen ||
     isAgentConfigDialogOpen ||
     isPermissionsDialogOpen ||
     isAuthenticating ||
@@ -2448,6 +2460,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       isSettingsDialogOpen,
       isSessionBrowserOpen,
       isModelDialogOpen,
+      isVoiceModelDialogOpen,
       isAgentConfigDialogOpen,
       selectedAgentName,
       selectedAgentDisplayName,
@@ -2468,6 +2481,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       pendingGeminiHistoryItems,
       thought,
       isInputActive,
+      isVoiceModeEnabled,
       isResuming,
       shouldShowIdePrompt,
       isFolderTrustDialogOpen: isFolderTrustDialogOpen ?? false,
@@ -2559,6 +2573,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       isSettingsDialogOpen,
       isSessionBrowserOpen,
       isModelDialogOpen,
+      isVoiceModelDialogOpen,
       isAgentConfigDialogOpen,
       selectedAgentName,
       selectedAgentDisplayName,
@@ -2579,6 +2594,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       pendingGeminiHistoryItems,
       thought,
       isInputActive,
+      isVoiceModeEnabled,
       isResuming,
       shouldShowIdePrompt,
       isFolderTrustDialogOpen,
@@ -2671,6 +2687,8 @@ Logging in with Google... Restarting Gemini CLI to continue.
       exitPrivacyNotice,
       closeSettingsDialog,
       closeModelDialog,
+      openVoiceModelDialog,
+      closeVoiceModelDialog,
       openAgentConfigDialog,
       closeAgentConfigDialog,
       openPermissionsDialog,
@@ -2751,6 +2769,9 @@ Logging in with Google... Restarting Gemini CLI to continue.
         setAccountSuspensionInfo(null);
         setAuthState(AuthState.Updating);
       },
+      setVoiceModeEnabled: (value: boolean) => {
+        setVoiceModeEnabled(value);
+      },
     }),
     [
       handleThemeSelect,
@@ -2764,6 +2785,8 @@ Logging in with Google... Restarting Gemini CLI to continue.
       exitPrivacyNotice,
       closeSettingsDialog,
       closeModelDialog,
+      openVoiceModelDialog,
+      closeVoiceModelDialog,
       openAgentConfigDialog,
       closeAgentConfigDialog,
       openPermissionsDialog,
@@ -2807,6 +2830,7 @@ Logging in with Google... Restarting Gemini CLI to continue.
       config,
       historyManager,
       getPreferredEditor,
+      setVoiceModeEnabled,
     ],
   );
 
diff --git a/packages/cli/src/ui/commands/types.ts b/packages/cli/src/ui/commands/types.ts
index 466e70c994..f0805f4426 100644
--- a/packages/cli/src/ui/commands/types.ts
+++ b/packages/cli/src/ui/commands/types.ts
@@ -72,6 +72,7 @@ export interface CommandContext {
     loadHistory: (history: HistoryItem[], postLoadInput?: string) => void;
     /** Toggles a special display mode. */
     toggleCorgiMode: () => void;
+    toggleVoiceMode: () => void;
     toggleDebugProfiler: () => void;
     toggleVimEnabled: () => Promise<boolean>;
     reloadCommands: () => void;
@@ -125,6 +126,7 @@ export interface OpenDialogActionReturn {
     | 'settings'
     | 'sessionBrowser'
     | 'model'
+    | 'voice-model'
     | 'agentConfig'
     | 'permissions';
 }
diff --git a/packages/cli/src/ui/commands/voiceCommand.ts b/packages/cli/src/ui/commands/voiceCommand.ts
new file mode 100644
index 0000000000..b9df28ca27
--- /dev/null
+++ b/packages/cli/src/ui/commands/voiceCommand.ts
@@ -0,0 +1,30 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { CommandKind, type SlashCommand } from './types.js';
+
+export const voiceCommand: SlashCommand = {
+  name: 'voice',
+  altNames: [],
+  description: 'Toggle voice dictation mode',
+  kind: CommandKind.BUILT_IN,
+  autoExecute: true,
+  action: (context) => {
+    context.ui.toggleVoiceMode();
+  },
+  subCommands: [
+    {
+      name: 'model',
+      description: 'Manage voice transcription models',
+      kind: CommandKind.BUILT_IN,
+      autoExecute: true,
+      action: async () => ({
+        type: 'dialog',
+        dialog: 'voice-model',
+      }),
+    },
+  ],
+};
diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx
index b231a62db5..40f0b06138 100644
--- a/packages/cli/src/ui/components/DialogManager.tsx
+++ b/packages/cli/src/ui/components/DialogManager.tsx
@@ -25,6 +25,7 @@ import { relaunchApp } from '../../utils/processUtils.js';
 import { SessionBrowser } from './SessionBrowser.js';
 import { PermissionsModifyTrustDialog } from './PermissionsModifyTrustDialog.js';
 import { ModelDialog } from './ModelDialog.js';
+import { VoiceModelDialog } from './VoiceModelDialog.js';
 import { theme } from '../semantic-colors.js';
 import { useUIState } from '../contexts/UIStateContext.js';
 import { useQuotaState } from '../contexts/QuotaContext.js';
@@ -238,6 +239,9 @@ export const DialogManager = ({
   if (uiState.isModelDialogOpen) {
     return <ModelDialog onClose={uiActions.closeModelDialog} />;
   }
+  if (uiState.isVoiceModelDialogOpen) {
+    return <VoiceModelDialog onClose={uiActions.closeVoiceModelDialog} />;
+  }
   if (
     uiState.isAgentConfigDialogOpen &&
     uiState.selectedAgentName &&
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index e50a2f1d81..5be237a15f 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -9,12 +9,41 @@ import { createMockSettings } from '../../test-utils/settings.js';
 import { makeFakeConfig } from '@google/gemini-cli-core';
 import { waitFor } from '../../test-utils/async.js';
 import { act, useState, useMemo } from 'react';
+import type { EventEmitter } from 'node:events';
+
+const { fakeTranscriptionProvider } = vi.hoisted(() => {
+  // Use require within hoisted block for immediate synchronous access
+  // eslint-disable-next-line @typescript-eslint/no-require-imports, no-restricted-syntax
+  const { EventEmitter } = require('node:events');
+  class FakeTranscriptionProvider extends EventEmitter {
+    connect = vi.fn().mockResolvedValue(undefined);
+    disconnect = vi.fn();
+    sendAudioChunk = vi.fn();
+    getTranscription = vi.fn().mockReturnValue('');
+  }
+  return {
+    fakeTranscriptionProvider: new FakeTranscriptionProvider(),
+  };
+});
+
+vi.mock('@google/gemini-cli-core', async (importOriginal) => {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const actual = (await importOriginal()) as any;
+  return {
+    ...actual,
+    TranscriptionFactory: {
+      createProvider: vi.fn(() => fakeTranscriptionProvider),
+    },
+  };
+});
+
 import {
   InputPrompt,
   tryTogglePasteExpansion,
   type InputPromptProps,
 } from './InputPrompt.js';
 import { InputContext } from '../contexts/InputContext.js';
+import { type UIState } from '../contexts/UIStateContext.js';
 import {
   calculateTransformationsForLine,
   calculateTransformedLine,
@@ -417,6 +446,7 @@ describe('InputPrompt', () => {
         getWorkspaceContext: () => ({
           getDirectories: () => ['/test/project/src'],
         }),
+        getContentGeneratorConfig: () => ({ apiKey: 'test-api-key' }),
       } as unknown as Config,
       slashCommands: mockSlashCommands,
       commandContext: mockCommandContext,
@@ -4925,6 +4955,383 @@ describe('InputPrompt', () => {
       },
     );
   });
+
+  describe('Voice Mode', () => {
+    beforeEach(() => {
+      (
+        fakeTranscriptionProvider as unknown as EventEmitter
+      ).removeAllListeners();
+      vi.clearAllMocks();
+    });
+
+    it('should start recording when space is pressed and voice mode is enabled (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('');
+      });
+      const { stdin, unmount, lastFrame } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: true } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Initially not recording
+      expect(lastFrame()).not.toContain('🎙️ Listening...');
+      expect(lastFrame()).toContain(
+        'Voice mode: Space to start/stop recording',
+      );
+
+      // Press space to start
+      await act(async () => {
+        stdin.write(' ');
+      });
+
+      // Now should show listening
+      await waitFor(() => {
+        expect(lastFrame()).toContain('🎙️ Listening...');
+      });
+
+      unmount();
+    });
+
+    it('should toggle recording off when space is pressed again (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('');
+      });
+      const { stdin, unmount, lastFrame } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: true } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Start recording
+      await act(async () => {
+        stdin.write(' ');
+      });
+      await waitFor(() => {
+        expect(lastFrame()).toContain('🎙️ Listening...');
+      });
+
+      // Stop recording
+      await act(async () => {
+        stdin.write(' ');
+      });
+      await waitFor(() => {
+        expect(lastFrame()).not.toContain('🎙️ Listening...');
+        expect(lastFrame()).toContain(
+          'Voice mode: Space to start/stop recording',
+        );
+      });
+
+      unmount();
+    });
+
+    it('should resume recording when space is pressed even if buffer is not empty (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('some existing text');
+      });
+      const { stdin, unmount, lastFrame } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: true } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Should show voice mode hint even if buffer is not empty (new behavior)
+      expect(lastFrame()).toContain(
+        'Voice mode: Space to start/stop recording',
+      );
+      expect(lastFrame()).toContain('some existing text');
+
+      // Press space to start recording again
+      await act(async () => {
+        stdin.write(' ');
+      });
+
+      await waitFor(() => {
+        expect(lastFrame()).toContain('🎙️ Listening...');
+      });
+
+      unmount();
+    });
+
+    it('should not start recording if voice mode is disabled (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('');
+      });
+      const { stdin, unmount, lastFrame } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: false } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Press space
+      await act(async () => {
+        stdin.write(' ');
+      });
+
+      // Should NOT show listening, instead should call handleInput which handles space
+      expect(lastFrame()).not.toContain('🎙️ Listening...');
+      expect(mockBuffer.handleInput).toHaveBeenCalled();
+      unmount();
+    });
+
+    it('should append transcription correctly across multiple turn updates (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('initial');
+      });
+      const { stdin, unmount } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: true } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Start recording
+      await act(async () => {
+        stdin.write(' ');
+      });
+
+      // Emit first transcription
+      await act(async () => {
+        (fakeTranscriptionProvider as unknown as EventEmitter).emit(
+          'transcription',
+          'hello',
+        );
+      });
+      await waitFor(() => {
+        expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 'end');
+      });
+
+      // Emit turnComplete (Gemini Live starts over after this)
+      await act(async () => {
+        (fakeTranscriptionProvider as unknown as EventEmitter).emit(
+          'turnComplete',
+        );
+      });
+
+      // Emit second part (Gemini Live sends new turn text starting from empty)
+      await act(async () => {
+        (fakeTranscriptionProvider as unknown as EventEmitter).emit(
+          'transcription',
+          'world',
+        );
+      });
+      await waitFor(() => {
+        // Should have appended 'world' to the baseline 'initial hello'
+        expect(mockBuffer.setText).toHaveBeenCalledWith(
+          'initial hello world',
+          'end',
+        );
+      });
+
+      unmount();
+    });
+
+    it('should append transcription correctly when resuming voice mode (toggle)', async () => {
+      await act(async () => {
+        mockBuffer.setText('First turn.');
+      });
+      const { stdin, unmount } = await renderWithProviders(
+        <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+        {
+          uiState: { isVoiceModeEnabled: true } as UIState,
+          settings: createMockSettings({
+            experimental: { voice: { activationMode: 'toggle' } },
+          }),
+        },
+      );
+
+      // Start recording (resumed)
+      await act(async () => {
+        stdin.write(' ');
+      });
+
+      // Emit transcription
+      await act(async () => {
+        (fakeTranscriptionProvider as unknown as EventEmitter).emit(
+          'transcription',
+          'Second turn.',
+        );
+      });
+
+      await waitFor(() => {
+        expect(mockBuffer.setText).toHaveBeenCalledWith(
+          'First turn. Second turn.',
+          'end',
+        );
+      });
+
+      unmount();
+    });
+
+    describe('push-to-talk', () => {
+      beforeEach(() => {
+        vi.useFakeTimers();
+      });
+
+      afterEach(() => {
+        vi.useRealTimers();
+      });
+
+      it('should insert a space on a single tap', async () => {
+        const { stdin, unmount, lastFrame } = await renderWithProviders(
+          <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+          {
+            uiState: { isVoiceModeEnabled: true } as UIState,
+            settings: createMockSettings({
+              experimental: { voice: { activationMode: 'push-to-talk' } },
+            }),
+          },
+        );
+
+        expect(lastFrame()).toContain('Voice mode: Hold Space to record');
+
+        // Press space once
+        await act(async () => {
+          stdin.write(' ');
+        });
+
+        // Should insert space optimistically
+        expect(mockBuffer.insert).toHaveBeenCalledWith(' ');
+        expect(lastFrame()).not.toContain('🎙️ Listening...');
+
+        // Advance timer past HOLD_DELAY_MS
+        await act(async () => {
+          vi.advanceTimersByTime(700);
+        });
+
+        expect(lastFrame()).not.toContain('🎙️ Listening...');
+        unmount();
+      });
+
+      it('should start recording on hold (simulated by repeat spaces)', async () => {
+        const { stdin, unmount, lastFrame } = await renderWithProviders(
+          <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+          {
+            uiState: { isVoiceModeEnabled: true } as UIState,
+            settings: createMockSettings({
+              experimental: { voice: { activationMode: 'push-to-talk' } },
+            }),
+          },
+        );
+
+        // First space
+        await act(async () => {
+          stdin.write(' ');
+        });
+        expect(mockBuffer.insert).toHaveBeenCalledWith(' ');
+
+        // Second space (repeat)
+        await act(async () => {
+          stdin.write(' ');
+        });
+
+        await waitFor(() => {
+          // Should have backspaced the optimistic space
+          expect(mockBuffer.backspace).toHaveBeenCalled();
+          // Should show listening
+          expect(lastFrame()).toContain('🎙️ Listening...');
+        });
+
+        unmount();
+      });
+
+      it('should stop recording when space heartbeat stops (release)', async () => {
+        const { stdin, unmount, lastFrame } = await renderWithProviders(
+          <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+          {
+            uiState: { isVoiceModeEnabled: true } as UIState,
+            settings: createMockSettings({
+              experimental: { voice: { activationMode: 'push-to-talk' } },
+            }),
+          },
+        );
+
+        // Start hold
+        await act(async () => {
+          stdin.write(' ');
+          stdin.write(' ');
+        });
+
+        // Use a short interval in waitFor to prevent advancing fake timers past the 300ms RELEASE_DELAY_MS
+        await waitFor(
+          () => {
+            expect(lastFrame()).toContain('🎙️ Listening...');
+          },
+          { interval: 10 },
+        );
+
+        // Simulate heartbeat (held key) - send space first to reset timer, then advance
+        await act(async () => {
+          stdin.write(' ');
+          vi.advanceTimersByTime(100);
+        });
+        expect(lastFrame()).toContain('🎙️ Listening...');
+
+        // Stop heartbeat (release)
+        await act(async () => {
+          vi.advanceTimersByTime(400); // Past RELEASE_DELAY_MS
+        });
+
+        await waitFor(() => {
+          expect(lastFrame()).not.toContain('🎙️ Listening...');
+        });
+
+        unmount();
+      });
+
+      it('should cancel hold state if non-space key is pressed after first space', async () => {
+        const { stdin, unmount } = await renderWithProviders(
+          <TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
+          {
+            uiState: { isVoiceModeEnabled: true } as UIState,
+            settings: createMockSettings({
+              experimental: { voice: { activationMode: 'push-to-talk' } },
+            }),
+          },
+        );
+
+        // First space
+        await act(async () => {
+          stdin.write(' ');
+        });
+
+        // Type 'a'
+        await act(async () => {
+          stdin.write('a');
+        });
+
+        // Should NOT start recording on next space even if fast
+        await act(async () => {
+          stdin.write(' ');
+        });
+
+        expect(mockBuffer.insert).toHaveBeenCalledTimes(2); // Two spaces inserted
+        expect(mockBuffer.handleInput).toHaveBeenCalledWith(
+          expect.objectContaining({ name: 'a' }),
+        );
+        unmount();
+      });
+    });
+  });
 });
 
 function clean(str: string | undefined): string {
diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx
index c9f75c740b..f69138c8c7 100644
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@@ -56,6 +56,7 @@ import {
   debugLogger,
   type Config,
 } from '@google/gemini-cli-core';
+import { useVoiceMode } from '../hooks/useVoiceMode.js';
 import {
   parseInputForHighlighting,
   parseSegmentsFromTokens,
@@ -159,7 +160,6 @@ export function isLargePaste(text: string): boolean {
 }
 
 const DOUBLE_TAB_CLEAN_UI_TOGGLE_WINDOW_MS = 350;
-
 /**
  * Attempt to toggle expansion of a paste placeholder in the buffer.
  * Returns true if a toggle action was performed or hint was shown, false otherwise.
@@ -238,6 +238,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
     setEmbeddedShellFocused,
     setShortcutsHelpVisible,
     toggleCleanUiDetailsVisible,
+    setVoiceModeEnabled,
   } = useUIActions();
   const {
     terminalWidth,
@@ -246,6 +247,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
     backgroundTasks,
     backgroundTaskHeight,
     shortcutsHelpVisible,
+    isVoiceModeEnabled,
   } = useUIState();
   const [suppressCompletion, setSuppressCompletion] = useState(false);
   const { handlePress: registerPlainTabPress, resetCount: resetPlainTabPress } =
@@ -263,6 +265,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
           resetEscapeState();
           if (buffer.text.length > 0) {
             buffer.setText('');
+            resetTurnBaseline();
             resetCompletionState();
           } else if (history.length > 0) {
             onSubmit('/rewind');
@@ -281,6 +284,16 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
   const hasUserNavigatedSuggestions = useRef(false);
   const listRef = useRef<ScrollableListRef<ScrollableItem>>(null);
 
+  const { isRecording, handleVoiceInput, resetTurnBaseline } = useVoiceMode({
+    buffer,
+    config,
+    settings,
+    setQueueErrorMessage,
+    isVoiceModeEnabled,
+    setVoiceModeEnabled,
+    keyMatchers,
+  });
+
   const [reverseSearchActive, setReverseSearchActive] = useState(false);
   const [commandSearchActive, setCommandSearchActive] = useState(false);
   const [textBeforeReverseSearch, setTextBeforeReverseSearch] = useState('');
@@ -387,6 +400,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       // Clear the buffer *before* calling onSubmit to prevent potential re-submission
       // if onSubmit triggers a re-render while the buffer still holds the old value.
       buffer.setText('');
+      resetTurnBaseline();
       onSubmit(processedValue);
       resetCompletionState();
       resetReverseSearchCompletionState();
@@ -398,6 +412,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       shellModeActive,
       shellHistory,
       resetReverseSearchCompletionState,
+      resetTurnBaseline,
     ],
   );
 
@@ -647,6 +662,8 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
 
   const handleInput = useCallback(
     (key: Key) => {
+      if (handleVoiceInput(key)) return true;
+
       // Determine if this keypress is a history navigation command
       const isHistoryUp =
         !shellModeActive &&
@@ -873,9 +890,9 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       ) {
         setShellModeActive(!shellModeActive);
         buffer.setText(''); // Clear the '!' from input
+        resetTurnBaseline();
         return true;
       }
-
       if (keyMatchers[Command.ESCAPE](key)) {
         const cancelSearch = (
           setActive: (active: boolean) => void,
@@ -1360,6 +1377,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       backgroundTaskHeight,
       streamingState,
       handleEscPress,
+      resetTurnBaseline,
       registerPlainTabPress,
       resetPlainTabPress,
       toggleCleanUiDetailsVisible,
@@ -1369,9 +1387,9 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       keyMatchers,
       isHelpDismissKey,
       settings,
+      handleVoiceInput,
     ],
   );
-
   useKeypress(handleInput, {
     isActive: !isEmbeddedShellFocused && !copyModeEnabled,
     priority: true,
@@ -1792,20 +1810,39 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
             )}{' '}
           </Text>
           <Box flexGrow={1} flexDirection="column" ref={innerBoxRef}>
-            {buffer.text.length === 0 && placeholder ? (
-              showCursor ? (
-                <Text
-                  terminalCursorFocus={showCursor}
-                  terminalCursorPosition={0}
-                >
-                  {chalk.inverse(placeholder.slice(0, 1))}
-                  <Text color={theme.text.secondary}>
-                    {placeholder.slice(1)}
-                  </Text>
+            {isRecording && (
+              <Box flexDirection="row" marginBottom={0}>
+                <Text color={theme.status.success}>🎙️ Listening...</Text>
+              </Box>
+            )}
+            {isVoiceModeEnabled && !isRecording && (
+              <Box flexDirection="row" marginBottom={0}>
+                <Text color={theme.text.secondary}>
+                  &gt; Voice mode:{' '}
+                  {(settings.experimental.voice?.activationMode ??
+                    'push-to-talk') === 'push-to-talk'
+                    ? 'Hold Space to record'
+                    : 'Space to start/stop recording'}{' '}
+                  (Esc to exit)
                 </Text>
-              ) : (
-                <Text color={theme.text.secondary}>{placeholder}</Text>
-              )
+              </Box>
+            )}
+            {buffer.text.length === 0 && !isRecording ? (
+              !isVoiceModeEnabled && placeholder ? (
+                showCursor ? (
+                  <Text
+                    terminalCursorFocus={showCursor}
+                    terminalCursorPosition={0}
+                  >
+                    {chalk.inverse(placeholder.slice(0, 1))}
+                    <Text color={theme.text.secondary}>
+                      {placeholder.slice(1)}
+                    </Text>
+                  </Text>
+                ) : (
+                  <Text color={theme.text.secondary}>{placeholder}</Text>
+                )
+              ) : null
             ) : (
               <Box
                 flexDirection="column"
diff --git a/packages/cli/src/ui/components/VoiceModelDialog.tsx b/packages/cli/src/ui/components/VoiceModelDialog.tsx
new file mode 100644
index 0000000000..f340a5ccf4
--- /dev/null
+++ b/packages/cli/src/ui/components/VoiceModelDialog.tsx
@@ -0,0 +1,236 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type React from 'react';
+import { useCallback, useMemo, useState } from 'react';
+import { Box, Text } from 'ink';
+import { theme } from '../semantic-colors.js';
+import { DescriptiveRadioButtonSelect } from './shared/DescriptiveRadioButtonSelect.js';
+import { useSettingsStore } from '../contexts/SettingsContext.js';
+import { SettingScope } from '../../config/settings.js';
+import { useKeypress, type Key } from '../hooks/useKeypress.js';
+import { isBinaryAvailable } from '@google/gemini-cli-core';
+import {
+  WhisperModelManager,
+  type WhisperModelProgress,
+} from '@google/gemini-cli-core';
+import { CliSpinner } from './CliSpinner.js';
+
+interface VoiceModelDialogProps {
+  onClose: () => void;
+}
+
+type DialogView = 'backend' | 'whisper-models';
+
+const WHISPER_MODELS = [
+  {
+    value: 'ggml-tiny.en.bin',
+    label: 'Tiny (EN)',
+    description: 'Fastest, lower accuracy (~75MB)',
+  },
+  {
+    value: 'ggml-base.en.bin',
+    label: 'Base (EN)',
+    description: 'Balanced speed and accuracy (~142MB)',
+  },
+  {
+    value: 'ggml-large-v3-turbo-q5_0.bin',
+    label: 'Large v3 Turbo (Q5_0)',
+    description: 'High accuracy, quantized (~547MB)',
+  },
+  {
+    value: 'ggml-large-v3-turbo-q8_0.bin',
+    label: 'Large v3 Turbo (Q8_0)',
+    description: 'Maximum accuracy, high memory (~834MB)',
+  },
+];
+
+export function VoiceModelDialog({
+  onClose,
+}: VoiceModelDialogProps): React.JSX.Element {
+  const { settings, setSetting } = useSettingsStore();
+  const [view, setView] = useState<DialogView>('backend');
+  const [downloadProgress, setDownloadProgress] =
+    useState<WhisperModelProgress | null>(null);
+  const [error, setError] = useState<string | null>(null);
+
+  const whisperInstalled = useMemo(
+    () => isBinaryAvailable('whisper-stream'),
+    [],
+  );
+  const modelManager = useMemo(() => new WhisperModelManager(), []);
+
+  const currentBackend =
+    settings.merged.experimental.voice?.backend ?? 'gemini-live';
+  const currentWhisperModel =
+    settings.merged.experimental.voice?.whisperModel ?? 'ggml-base.en.bin';
+
+  const handleKeypress = useCallback(
+    (key: Key) => {
+      if (key.name === 'escape') {
+        if (view === 'whisper-models') {
+          setView('backend');
+        } else {
+          onClose();
+        }
+        return true;
+      }
+      return false;
+    },
+    [view, onClose],
+  );
+
+  useKeypress(handleKeypress, { isActive: true });
+
+  const handleBackendSelect = useCallback(
+    (value: string) => {
+      if (value === 'whisper') {
+        setView('whisper-models');
+      } else {
+        setSetting(
+          SettingScope.User,
+          'experimental.voice.backend',
+          'gemini-live',
+        );
+        onClose();
+      }
+    },
+    [setSetting, onClose],
+  );
+
+  const handleWhisperModelSelect = useCallback(
+    async (modelName: string) => {
+      if (modelManager.isModelInstalled(modelName)) {
+        setSetting(SettingScope.User, 'experimental.voice.backend', 'whisper');
+        setSetting(
+          SettingScope.User,
+          'experimental.voice.whisperModel',
+          modelName,
+        );
+        onClose();
+      } else {
+        setError(null);
+        const onProgress = (p: WhisperModelProgress) => setDownloadProgress(p);
+        modelManager.on('progress', onProgress);
+
+        try {
+          await modelManager.downloadModel(modelName);
+
+          setSetting(
+            SettingScope.User,
+            'experimental.voice.backend',
+            'whisper',
+          );
+          setSetting(
+            SettingScope.User,
+            'experimental.voice.whisperModel',
+            modelName,
+          );
+          onClose();
+        } catch (err) {
+          setError(
+            `Failed to download: ${err instanceof Error ? err.message : String(err)}`,
+          );
+        } finally {
+          modelManager.off('progress', onProgress);
+          setDownloadProgress(null);
+        }
+      }
+    },
+    [modelManager, setSetting, onClose],
+  );
+
+  const backendOptions = useMemo(
+    () => [
+      {
+        value: 'gemini-live',
+        title: 'Gemini Live API (Cloud)',
+        description: 'Real-time cloud transcription via Gemini Live API.',
+        key: 'gemini-live',
+      },
+      {
+        value: 'whisper',
+        title: 'Whisper (Local)',
+        description: whisperInstalled
+          ? 'Local transcription using whisper.cpp.'
+          : 'Local transcription (Requires: brew install whisper-cpp)',
+        key: 'whisper',
+      },
+    ],
+    [whisperInstalled],
+  );
+
+  const whisperOptions = useMemo(
+    () =>
+      WHISPER_MODELS.map((m) => ({
+        value: m.value,
+        title: `${m.label}${modelManager.isModelInstalled(m.value) ? ' (Installed)' : ' (Download)'}`,
+        description: m.description,
+        key: m.value,
+      })),
+    [modelManager],
+  );
+
+  return (
+    <Box
+      borderStyle="round"
+      borderColor={theme.border.default}
+      flexDirection="column"
+      padding={1}
+      width="100%"
+    >
+      <Text bold>
+        {view === 'backend'
+          ? 'Select Voice Transcription Backend'
+          : 'Select Whisper Model'}
+      </Text>
+
+      {error && (
+        <Box marginTop={1}>
+          <Text color={theme.status.error}>{error}</Text>
+        </Box>
+      )}
+
+      {downloadProgress ? (
+        <Box marginTop={1} flexDirection="column">
+          <Box>
+            <Text>Downloading {downloadProgress.modelName}... </Text>
+            <CliSpinner />
+            <Text> {Math.round(downloadProgress.percentage * 100)}%</Text>
+          </Box>
+        </Box>
+      ) : (
+        <Box marginTop={1}>
+          {view === 'backend' ? (
+            <DescriptiveRadioButtonSelect
+              items={backendOptions}
+              onSelect={handleBackendSelect}
+              initialIndex={currentBackend === 'whisper' ? 1 : 0}
+              showNumbers={true}
+            />
+          ) : (
+            <DescriptiveRadioButtonSelect
+              items={whisperOptions}
+              onSelect={handleWhisperModelSelect}
+              initialIndex={whisperOptions.findIndex(
+                (o) => o.value === currentWhisperModel,
+              )}
+              showNumbers={true}
+            />
+          )}
+        </Box>
+      )}
+
+      <Box marginTop={1} flexDirection="column">
+        <Text color={theme.text.secondary}>
+          {view === 'whisper-models'
+            ? '(Press Esc to go back)'
+            : '(Press Esc to close)'}
+        </Text>
+      </Box>
+    </Box>
+  );
+}
diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
index 4830e90db1..db449ce4d7 100644
--- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
+++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
@@ -168,13 +168,6 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub
 "
 `;
 
-exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = `
-"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
- > [Pasted Text: 10 lines]                                                                          
-▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
-"
-`;
-
 exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
  > hello
diff --git a/packages/cli/src/ui/contexts/UIActionsContext.tsx b/packages/cli/src/ui/contexts/UIActionsContext.tsx
index f1959c0173..fb979e7c17 100644
--- a/packages/cli/src/ui/contexts/UIActionsContext.tsx
+++ b/packages/cli/src/ui/contexts/UIActionsContext.tsx
@@ -41,6 +41,8 @@ export interface UIActions {
   exitPrivacyNotice: () => void;
   closeSettingsDialog: () => void;
   closeModelDialog: () => void;
+  openVoiceModelDialog: () => void;
+  closeVoiceModelDialog: () => void;
   openAgentConfigDialog: (
     name: string,
     displayName: string,
@@ -93,6 +95,7 @@ export interface UIActions {
   handleNewAgentsSelect: (choice: NewAgentsChoice) => Promise<void>;
   getPreferredEditor: () => EditorType | undefined;
   clearAccountSuspension: () => void;
+  setVoiceModeEnabled: (value: boolean) => void;
 }
 
 export const UIActionsContext = createContext<UIActions | null>(null);
diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx
index ed33c21ee5..2150218d44 100644
--- a/packages/cli/src/ui/contexts/UIStateContext.tsx
+++ b/packages/cli/src/ui/contexts/UIStateContext.tsx
@@ -112,6 +112,7 @@ export interface UIState {
   isSettingsDialogOpen: boolean;
   isSessionBrowserOpen: boolean;
   isModelDialogOpen: boolean;
+  isVoiceModelDialogOpen: boolean;
   isAgentConfigDialogOpen: boolean;
   selectedAgentName?: string;
   selectedAgentDisplayName?: string;
@@ -132,6 +133,7 @@ export interface UIState {
   pendingGeminiHistoryItems: HistoryItemWithoutId[];
   thought: ThoughtSummary | null;
   isInputActive: boolean;
+  isVoiceModeEnabled: boolean;
   isResuming: boolean;
   shouldShowIdePrompt: boolean;
   isFolderTrustDialogOpen: boolean;
diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
index 3e521a6627..f4b18d5bbf 100644
--- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
+++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.tsx
@@ -205,11 +205,13 @@ describe('useSlashCommandProcessor', () => {
             openSettingsDialog: vi.fn(),
             openSessionBrowser: vi.fn(),
             openModelDialog: mockOpenModelDialog,
+            openVoiceModelDialog: vi.fn(),
             openAgentConfigDialog,
             openPermissionsDialog: vi.fn(),
             quit: mockSetQuittingMessages,
             setDebugMessage: vi.fn(),
             toggleCorgiMode: vi.fn(),
+            toggleVoiceMode: vi.fn(),
             toggleDebugProfiler: vi.fn(),
             dispatchExtensionStateUpdate: vi.fn(),
             addConfirmUpdateExtensionRequest: vi.fn(),
diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts
index 20de86002c..3007a96a73 100644
--- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts
@@ -72,6 +72,7 @@ interface SlashCommandProcessorActions {
   openSettingsDialog: () => void;
   openSessionBrowser: () => void;
   openModelDialog: () => void;
+  openVoiceModelDialog: () => void;
   openAgentConfigDialog: (
     name: string,
     displayName: string,
@@ -81,6 +82,7 @@ interface SlashCommandProcessorActions {
   quit: (messages: HistoryItem[]) => void;
   setDebugMessage: (message: string) => void;
   toggleCorgiMode: () => void;
+  toggleVoiceMode: () => void;
   toggleDebugProfiler: () => void;
   dispatchExtensionStateUpdate: (action: ExtensionUpdateAction) => void;
   addConfirmUpdateExtensionRequest: (request: ConfirmationRequest) => void;
@@ -232,6 +234,7 @@ export const useSlashCommandProcessor = (
         pendingItem,
         setPendingItem,
         toggleCorgiMode: actions.toggleCorgiMode,
+        toggleVoiceMode: actions.toggleVoiceMode,
         toggleDebugProfiler: actions.toggleDebugProfiler,
         toggleVimEnabled,
         reloadCommands,
@@ -503,6 +506,9 @@ export const useSlashCommandProcessor = (
                     case 'model':
                       actions.openModelDialog();
                       return { type: 'handled' };
+                    case 'voice-model':
+                      actions.openVoiceModelDialog();
+                      return { type: 'handled' };
                     case 'agentConfig': {
                       // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
                       const props = result.props as Record<string, unknown>;
diff --git a/packages/cli/src/ui/hooks/useVoiceMode.ts b/packages/cli/src/ui/hooks/useVoiceMode.ts
new file mode 100644
index 0000000000..0f37c66357
--- /dev/null
+++ b/packages/cli/src/ui/hooks/useVoiceMode.ts
@@ -0,0 +1,429 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { useState, useRef, useCallback, useEffect } from 'react';
+import {
+  AudioRecorder,
+  TranscriptionFactory,
+  debugLogger,
+  type Config,
+  type TranscriptionProvider,
+} from '@google/gemini-cli-core';
+import type { TextBuffer } from '../components/shared/text-buffer.js';
+import type { MergedSettings } from '../../config/settingsSchema.js';
+import type { Key } from './useKeypress.js';
+import { Command } from '../key/keyMatchers.js';
+
+interface UseVoiceModeProps {
+  buffer: TextBuffer;
+  config: Config;
+  settings: MergedSettings;
+  setQueueErrorMessage: (message: string | null) => void;
+  isVoiceModeEnabled: boolean;
+  setVoiceModeEnabled: (enabled: boolean) => void;
+  keyMatchers: Record<Command, (key: Key) => boolean>;
+}
+
+const HOLD_DELAY_MS = 600;
+const RELEASE_DELAY_MS = 300;
+
+export function useVoiceMode({
+  buffer,
+  config,
+  settings,
+  setQueueErrorMessage,
+  isVoiceModeEnabled,
+  setVoiceModeEnabled,
+  keyMatchers,
+}: UseVoiceModeProps) {
+  const [isRecording, setIsRecording] = useState(false);
+  const [isConnecting, setIsConnecting] = useState(false);
+
+  const liveTranscriptionRef = useRef('');
+  const stopRequestedRef = useRef(false);
+  const isRecordingRef = useRef(false);
+  const lastFailureTimeRef = useRef(0);
+  const recordingInProgressRef = useRef(false);
+  const voiceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
+  const recorderRef = useRef<AudioRecorder | null>(null);
+  const transcriptionServiceRef = useRef<TranscriptionProvider | null>(null);
+  const turnBaselineRef = useRef<string | null>(null);
+
+  const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle');
+  const pttTimerRef = useRef<NodeJS.Timeout | null>(null);
+  const disconnectTimerRef = useRef<NodeJS.Timeout | null>(null);
+
+  const bufferRef = useRef(buffer);
+  bufferRef.current = buffer;
+
+  const stopVoiceRecording = useCallback(() => {
+    if (stopRequestedRef.current) return;
+    debugLogger.debug('[Voice] Stop requested');
+    stopRequestedRef.current = true;
+
+    setIsRecording(false);
+    isRecordingRef.current = false;
+    setIsConnecting(false);
+
+    if (recorderRef.current) {
+      recorderRef.current.stop();
+      recorderRef.current = null;
+    }
+
+    const serviceToDisconnect = transcriptionServiceRef.current;
+    transcriptionServiceRef.current = null;
+
+    if (serviceToDisconnect) {
+      const isLive = settings.experimental.voice?.backend === 'gemini-live';
+      const gracePeriodMs =
+        settings.experimental.voice?.stopGracePeriodMs ??
+        (isLive ? 2000 : 1000);
+      debugLogger.debug(
+        `[Voice] Draining transcription for ${gracePeriodMs}ms`,
+      );
+
+      if (disconnectTimerRef.current) clearTimeout(disconnectTimerRef.current);
+      disconnectTimerRef.current = setTimeout(() => {
+        debugLogger.debug('[Voice] Grace period ended, disconnecting service');
+        serviceToDisconnect.disconnect();
+        disconnectTimerRef.current = null;
+      }, gracePeriodMs);
+    }
+
+    liveTranscriptionRef.current = '';
+    pttStateRef.current = 'idle';
+  }, [settings.experimental.voice]);
+
+  const startVoiceRecording = useCallback(() => {
+    if (
+      isRecordingRef.current ||
+      Date.now() - lastFailureTimeRef.current < 2000
+    ) {
+      return;
+    }
+
+    if (disconnectTimerRef.current) {
+      clearTimeout(disconnectTimerRef.current);
+      disconnectTimerRef.current = null;
+    }
+
+    recordingInProgressRef.current = true;
+    turnBaselineRef.current = bufferRef.current.text;
+
+    setIsConnecting(true);
+    setIsRecording(true);
+    isRecordingRef.current = true;
+
+    liveTranscriptionRef.current = '';
+    stopRequestedRef.current = false;
+
+    const apiKey =
+      config.getContentGeneratorConfig()?.apiKey ||
+      process.env['GEMINI_API_KEY'] ||
+      '';
+
+    const startAsync = async () => {
+      // If there's an active draining service, disconnect it immediately
+      // before starting a new one to prevent orphaned event collisions.
+      if (disconnectTimerRef.current) {
+        clearTimeout(disconnectTimerRef.current);
+        disconnectTimerRef.current = null;
+      }
+      if (transcriptionServiceRef.current) {
+        transcriptionServiceRef.current.disconnect();
+        transcriptionServiceRef.current = null;
+      }
+
+      const cleanupIfStopped = () => {
+        if (stopRequestedRef.current) {
+          if (recorderRef.current) {
+            recorderRef.current.stop();
+            recorderRef.current = null;
+          }
+          if (transcriptionServiceRef.current) {
+            transcriptionServiceRef.current.disconnect();
+            transcriptionServiceRef.current = null;
+          }
+          setIsRecording(false);
+          isRecordingRef.current = false;
+          setIsConnecting(false);
+          recordingInProgressRef.current = false;
+          return true;
+        }
+        return false;
+      };
+
+      if (cleanupIfStopped()) return;
+
+      const voiceBackend =
+        settings.experimental.voice?.backend ?? 'gemini-live';
+
+      if (!apiKey && voiceBackend === 'gemini-live') {
+        setQueueErrorMessage(
+          'Cloud voice mode requires a GEMINI_API_KEY. Please set it in your environment or ~/.gemini/.env.',
+        );
+        setIsRecording(false);
+        isRecordingRef.current = false;
+        setIsConnecting(false);
+        recordingInProgressRef.current = false;
+        lastFailureTimeRef.current = Date.now();
+        return;
+      }
+
+      if (voiceBackend === 'gemini-live') {
+        recorderRef.current = new AudioRecorder();
+      }
+
+      const currentService = TranscriptionFactory.createProvider(
+        settings.experimental.voice,
+        apiKey,
+      );
+      transcriptionServiceRef.current = currentService;
+
+      currentService.on('transcription', (text) => {
+        if (
+          transcriptionServiceRef.current !== currentService &&
+          stopRequestedRef.current
+        ) {
+          // If this is an orphaned service that was replaced by a new session, ignore its events
+          return;
+        }
+
+        if (text) {
+          const currentBufferText = bufferRef.current.text;
+          const previousTranscription = liveTranscriptionRef.current;
+
+          let newTotalText = currentBufferText;
+
+          if (
+            previousTranscription &&
+            currentBufferText.endsWith(previousTranscription)
+          ) {
+            newTotalText = currentBufferText.slice(
+              0,
+              -previousTranscription.length,
+            );
+          } else if (
+            currentBufferText &&
+            !currentBufferText.endsWith(' ') &&
+            !currentBufferText.endsWith('\n')
+          ) {
+            newTotalText += ' ';
+          }
+
+          newTotalText += text;
+          bufferRef.current.setText(newTotalText, 'end');
+        }
+        liveTranscriptionRef.current = text;
+      });
+
+      currentService.on('turnComplete', () => {
+        if (
+          transcriptionServiceRef.current !== currentService &&
+          stopRequestedRef.current
+        )
+          return;
+        liveTranscriptionRef.current = '';
+      });
+
+      currentService.on('error', (err) => {
+        if (transcriptionServiceRef.current !== currentService) return;
+        debugLogger.error('[Voice] Transcription error:', err);
+        lastFailureTimeRef.current = Date.now();
+        recordingInProgressRef.current = false;
+      });
+
+      currentService.on('close', () => {
+        if (transcriptionServiceRef.current !== currentService) return;
+        if (!stopRequestedRef.current) {
+          setIsRecording(false);
+          isRecordingRef.current = false;
+          setIsConnecting(false);
+          recordingInProgressRef.current = false;
+          lastFailureTimeRef.current = Date.now();
+        }
+      });
+
+      try {
+        await currentService.connect();
+        if (cleanupIfStopped()) return;
+
+        await recorderRef.current?.start();
+        if (cleanupIfStopped()) return;
+
+        setIsConnecting(false);
+
+        const currentVoiceBackend =
+          settings.experimental.voice?.backend ?? 'gemini-live';
+
+        recorderRef.current?.on('data', (chunk) => {
+          if (currentVoiceBackend === 'gemini-live') {
+            currentService.sendAudioChunk(chunk);
+          }
+        });
+        recorderRef.current?.on('error', (err) => {
+          debugLogger.error('[Voice] Recorder error:', err);
+          stopVoiceRecording();
+          lastFailureTimeRef.current = Date.now();
+        });
+      } catch (err: unknown) {
+        if (transcriptionServiceRef.current !== currentService) return;
+        const message = err instanceof Error ? err.message : String(err);
+        setQueueErrorMessage(`Voice mode failure: ${message}`);
+        setIsRecording(false);
+        isRecordingRef.current = false;
+        setIsConnecting(false);
+        recordingInProgressRef.current = false;
+        lastFailureTimeRef.current = Date.now();
+
+        if (recorderRef.current) {
+          recorderRef.current.stop();
+          recorderRef.current = null;
+        }
+        if (transcriptionServiceRef.current) {
+          transcriptionServiceRef.current.disconnect();
+          transcriptionServiceRef.current = null;
+        }
+      }
+    };
+
+    void startAsync();
+  }, [
+    config,
+    settings.experimental.voice,
+    setQueueErrorMessage,
+    stopVoiceRecording,
+  ]);
+
+  useEffect(
+    () => () => {
+      if (voiceTimeoutRef.current) clearTimeout(voiceTimeoutRef.current);
+      if (recorderRef.current) {
+        recorderRef.current.stop();
+        recorderRef.current = null;
+      }
+      if (transcriptionServiceRef.current) {
+        transcriptionServiceRef.current.disconnect();
+        transcriptionServiceRef.current = null;
+      }
+      if (pttTimerRef.current) clearTimeout(pttTimerRef.current);
+      if (disconnectTimerRef.current) clearTimeout(disconnectTimerRef.current);
+    },
+    [],
+  );
+
+  const handleVoiceInput = useCallback(
+    (key: Key): boolean => {
+      const activeRecording = isRecording || isRecordingRef.current;
+
+      if (activeRecording) {
+        const activationMode =
+          settings.experimental.voice?.activationMode ?? 'push-to-talk';
+
+        if (keyMatchers[Command.ESCAPE](key)) {
+          stopVoiceRecording();
+          return true;
+        }
+
+        if (keyMatchers[Command.VOICE_MODE_PTT](key)) {
+          if (activationMode === 'push-to-talk') {
+            if (pttTimerRef.current) {
+              clearTimeout(pttTimerRef.current);
+            }
+            pttTimerRef.current = setTimeout(() => {
+              stopVoiceRecording();
+              pttTimerRef.current = null;
+            }, RELEASE_DELAY_MS);
+            return true;
+          } else {
+            stopVoiceRecording();
+            return true;
+          }
+        }
+        return true;
+      }
+
+      if (isVoiceModeEnabled) {
+        const activationMode =
+          settings.experimental.voice?.activationMode ?? 'push-to-talk';
+
+        if (keyMatchers[Command.ESCAPE](key) && buffer.text === '') {
+          setVoiceModeEnabled(false);
+          return true;
+        }
+
+        if (keyMatchers[Command.VOICE_MODE_PTT](key)) {
+          if (
+            key.name === 'space' &&
+            !key.ctrl &&
+            !key.alt &&
+            !key.shift &&
+            !key.cmd
+          ) {
+            if (activationMode === 'toggle') {
+              startVoiceRecording();
+              return true;
+            } else {
+              if (pttStateRef.current === 'idle') {
+                buffer.insert(' ');
+                pttStateRef.current = 'possible-hold';
+
+                if (pttTimerRef.current) clearTimeout(pttTimerRef.current);
+                pttTimerRef.current = setTimeout(() => {
+                  pttStateRef.current = 'idle';
+                  pttTimerRef.current = null;
+                }, HOLD_DELAY_MS);
+                return true;
+              } else if (pttStateRef.current === 'possible-hold') {
+                if (pttTimerRef.current) clearTimeout(pttTimerRef.current);
+                buffer.backspace();
+                pttStateRef.current = 'recording';
+                startVoiceRecording();
+
+                pttTimerRef.current = setTimeout(() => {
+                  stopVoiceRecording();
+                  pttTimerRef.current = null;
+                }, RELEASE_DELAY_MS);
+                return true;
+              }
+            }
+          }
+        }
+
+        if (pttStateRef.current === 'possible-hold') {
+          pttStateRef.current = 'idle';
+          if (pttTimerRef.current) {
+            clearTimeout(pttTimerRef.current);
+            pttTimerRef.current = null;
+          }
+        }
+      }
+
+      return false;
+    },
+    [
+      isRecording,
+      isVoiceModeEnabled,
+      settings.experimental.voice,
+      keyMatchers,
+      stopVoiceRecording,
+      startVoiceRecording,
+      buffer,
+      setVoiceModeEnabled,
+    ],
+  );
+
+  return {
+    isRecording,
+    isConnecting,
+    startVoiceRecording,
+    stopVoiceRecording,
+    handleVoiceInput,
+    resetTurnBaseline: () => {
+      turnBaselineRef.current = null;
+    },
+  };
+}
diff --git a/packages/cli/src/ui/hooks/useVoiceModelCommand.ts b/packages/cli/src/ui/hooks/useVoiceModelCommand.ts
new file mode 100644
index 0000000000..943c65ce30
--- /dev/null
+++ b/packages/cli/src/ui/hooks/useVoiceModelCommand.ts
@@ -0,0 +1,31 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { useState, useCallback } from 'react';
+
+interface UseVoiceModelCommandReturn {
+  isVoiceModelDialogOpen: boolean;
+  openVoiceModelDialog: () => void;
+  closeVoiceModelDialog: () => void;
+}
+
+export const useVoiceModelCommand = (): UseVoiceModelCommandReturn => {
+  const [isVoiceModelDialogOpen, setIsVoiceModelDialogOpen] = useState(false);
+
+  const openVoiceModelDialog = useCallback(() => {
+    setIsVoiceModelDialogOpen(true);
+  }, []);
+
+  const closeVoiceModelDialog = useCallback(() => {
+    setIsVoiceModelDialogOpen(false);
+  }, []);
+
+  return {
+    isVoiceModelDialogOpen,
+    openVoiceModelDialog,
+    closeVoiceModelDialog,
+  };
+};
diff --git a/packages/cli/src/ui/key/keyBindings.ts b/packages/cli/src/ui/key/keyBindings.ts
index e3fbcd8262..a038f6173c 100644
--- a/packages/cli/src/ui/key/keyBindings.ts
+++ b/packages/cli/src/ui/key/keyBindings.ts
@@ -97,6 +97,7 @@ export enum Command {
   RESTART_APP = 'app.restart',
   SUSPEND_APP = 'app.suspend',
   SHOW_SHELL_INPUT_UNFOCUS_WARNING = 'app.showShellUnfocusWarning',
+  VOICE_MODE_PTT = 'app.voiceModePTT',
 
   // Background Shell Controls
   BACKGROUND_SHELL_ESCAPE = 'background.escape',
@@ -407,9 +408,7 @@ export const defaultKeyBindingConfig: KeyBindingConfig = new Map([
   [Command.RESTART_APP, [new KeyBinding('r'), new KeyBinding('shift+r')]],
   [Command.SUSPEND_APP, [new KeyBinding('ctrl+z')]],
   [Command.SHOW_SHELL_INPUT_UNFOCUS_WARNING, [new KeyBinding('tab')]],
-  [Command.DUMP_FRAME, [new KeyBinding('f8')]],
-  [Command.START_RECORDING, [new KeyBinding('f6')]],
-  [Command.STOP_RECORDING, [new KeyBinding('f7')]],
+  [Command.VOICE_MODE_PTT, [new KeyBinding('space')]],
 
   // Background Shell Controls
   [Command.BACKGROUND_SHELL_ESCAPE, [new KeyBinding('escape')]],
@@ -424,6 +423,10 @@ export const defaultKeyBindingConfig: KeyBindingConfig = new Map([
   // Extension Controls
   [Command.UPDATE_EXTENSION, [new KeyBinding('i')]],
   [Command.LINK_EXTENSION, [new KeyBinding('l')]],
+
+  [Command.DUMP_FRAME, [new KeyBinding('f8')]],
+  [Command.START_RECORDING, [new KeyBinding('f6')]],
+  [Command.STOP_RECORDING, [new KeyBinding('f7')]],
 ]);
 
 interface CommandCategory {
@@ -538,6 +541,7 @@ export const commandCategories: readonly CommandCategory[] = [
       Command.RESTART_APP,
       Command.SUSPEND_APP,
       Command.SHOW_SHELL_INPUT_UNFOCUS_WARNING,
+      Command.VOICE_MODE_PTT,
     ],
   },
   {
@@ -658,6 +662,7 @@ export const commandDescriptions: Readonly<Record<Command, string>> = {
   [Command.SUSPEND_APP]: 'Suspend the CLI and move it to the background.',
   [Command.SHOW_SHELL_INPUT_UNFOCUS_WARNING]:
     'Show warning when trying to move focus away from shell input.',
+  [Command.VOICE_MODE_PTT]: 'Hold to speak in Voice Mode.',
 
   // Background Shell Controls
   [Command.BACKGROUND_SHELL_ESCAPE]: 'Dismiss background shell list.',
diff --git a/packages/cli/src/ui/noninteractive/nonInteractiveUi.ts b/packages/cli/src/ui/noninteractive/nonInteractiveUi.ts
index 3aff41d2de..9118518455 100644
--- a/packages/cli/src/ui/noninteractive/nonInteractiveUi.ts
+++ b/packages/cli/src/ui/noninteractive/nonInteractiveUi.ts
@@ -43,5 +43,6 @@ export function createNonInteractiveUI(): CommandContext['ui'] {
     removeComponent: () => {},
     toggleBackgroundTasks: () => {},
     toggleShortcutsHelp: () => {},
+    toggleVoiceMode: () => {},
   };
 }
diff --git a/packages/core/package.json b/packages/core/package.json
index eda0e1e5fe..48412418c4 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -56,6 +56,7 @@
     "ajv-formats": "^3.0.0",
     "chardet": "^2.1.0",
     "chokidar": "^5.0.0",
+    "command-exists": "^1.2.9",
     "diff": "^8.0.3",
     "dotenv": "^17.2.4",
     "dotenv-expand": "^12.0.3",
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 939fa77d70..11f7a24841 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -691,6 +691,7 @@ export interface ConfigParameters {
   ptyInfo?: string;
   disableYoloMode?: boolean;
   disableAlwaysAllow?: boolean;
+  voiceMode?: boolean;
   rawOutput?: boolean;
   acceptRawOutputRisk?: boolean;
   dynamicModelConfiguration?: boolean;
@@ -963,6 +964,7 @@ export class Config implements McpContext, AgentLoopContext {
   private readonly topicUpdateNarration: boolean;
   private readonly disableLLMCorrection: boolean;
   private readonly planEnabled: boolean;
+  private readonly voiceMode: boolean;
   private readonly trackerEnabled: boolean;
   private readonly planModeRoutingEnabled: boolean;
   private readonly modelSteering: boolean;
@@ -1117,6 +1119,7 @@ export class Config implements McpContext, AgentLoopContext {
     this.agents = params.agents ?? {};
     this.disableLLMCorrection = params.disableLLMCorrection ?? true;
     this.planEnabled = params.plan ?? true;
+    this.voiceMode = params.voiceMode ?? false;
     this.trackerEnabled = params.tracker ?? false;
     this.planModeRoutingEnabled = params.planSettings?.modelRouting ?? true;
     this.enableEventDrivenScheduler = params.enableEventDrivenScheduler ?? true;
@@ -2969,6 +2972,10 @@ export class Config implements McpContext, AgentLoopContext {
     return this.planEnabled;
   }
 
+  isVoiceModeEnabled(): boolean {
+    return this.voiceMode;
+  }
+
   isTrackerEnabled(): boolean {
     return this.trackerEnabled;
   }
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 3123dd9096..86b36a6b0e 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -297,3 +297,12 @@ export * from './context/profiles.js';
 
 // Export trust utility
 export * from './utils/trust.js';
+
+// Export voice utilities
+export * from './voice/audioRecorder.js';
+export * from './voice/transcriptionProvider.js';
+export * from './voice/geminiLiveTranscriptionProvider.js';
+export * from './voice/whisperTranscriptionProvider.js';
+export * from './voice/transcriptionFactory.js';
+export * from './voice/whisperModelManager.js';
+export { isBinaryAvailable } from './utils/binaryCheck.js';
diff --git a/packages/core/src/utils/binaryCheck.ts b/packages/core/src/utils/binaryCheck.ts
new file mode 100644
index 0000000000..8d37f0def4
--- /dev/null
+++ b/packages/core/src/utils/binaryCheck.ts
@@ -0,0 +1,14 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { sync as commandExistsSync } from 'command-exists';
+
+/**
+ * Checks if a binary is available in the system PATH.
+ */
+export function isBinaryAvailable(binaryName: string): boolean {
+  return commandExistsSync(binaryName);
+}
diff --git a/packages/core/src/voice/audioRecorder.ts b/packages/core/src/voice/audioRecorder.ts
new file mode 100644
index 0000000000..c1217e5d7a
--- /dev/null
+++ b/packages/core/src/voice/audioRecorder.ts
@@ -0,0 +1,115 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process';
+import { EventEmitter } from 'node:events';
+import commandExists from 'command-exists';
+
+export interface AudioRecorderEvents {
+  data: [Buffer];
+  start: [];
+  stop: [];
+  error: [Error];
+}
+
+/**
+ * Captures audio from the microphone using `sox` (`rec`).
+ * Emits 16kHz, 16-bit, mono PCM chunks.
+ */
+export class AudioRecorder extends EventEmitter<AudioRecorderEvents> {
+  private recProcess: ChildProcessWithoutNullStreams | null = null;
+  private isRecordingInternal = false;
+
+  get isRecording(): boolean {
+    return this.isRecordingInternal;
+  }
+
+  /**
+   * Checks if `rec` (sox) is available on the system.
+   */
+  static async isAvailable(): Promise<boolean> {
+    try {
+      await commandExists('rec');
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  async start(): Promise<void> {
+    if (this.isRecordingInternal) return;
+    this.isRecordingInternal = true;
+
+    try {
+      const available = await AudioRecorder.isAvailable();
+      if (!this.isRecordingInternal) return; // Check if stopped while checking availability
+
+      if (!available) {
+        throw new Error(
+          'The `rec` command (provided by SoX) is required for voice mode. Please install SoX (e.g., `brew install sox` on macOS or `sudo apt install sox libsox-fmt-all` on Linux).',
+        );
+      }
+
+      // rec -q -V0 -e signed -c 1 -b 16 -r 16000 -t raw -
+      this.recProcess = spawn('rec', [
+        '-q',
+        '-V0',
+        '-e',
+        'signed',
+        '-c',
+        '1',
+        '-b',
+        '16',
+        '-r',
+        '16000',
+        '-t',
+        'raw',
+        '-',
+      ]);
+
+      if (!this.isRecordingInternal) {
+        this.recProcess.kill('SIGTERM');
+        this.recProcess = null;
+        return;
+      }
+
+      this.recProcess.stdout.on('data', (data: Buffer) => {
+        this.emit('data', data);
+      });
+
+      this.recProcess.stderr.on('data', (_data: Buffer) => {
+        // rec might print warnings to stderr, we could log them or ignore
+        // console.warn(`rec stderr: ${data.toString()}`);
+      });
+
+      this.recProcess.on('error', (err) => {
+        this.emit('error', err);
+        this.stop();
+      });
+
+      this.recProcess.on('close', () => {
+        this.stop();
+      });
+
+      this.emit('start');
+    } catch (err) {
+      this.isRecordingInternal = false;
+      throw err;
+    }
+  }
+
+  stop(): void {
+    if (!this.isRecordingInternal) return;
+    this.isRecordingInternal = false;
+
+    if (this.recProcess) {
+      this.recProcess.kill('SIGTERM');
+      this.recProcess = null;
+    }
+
+    this.emit('stop');
+  }
+}
diff --git a/packages/core/src/voice/geminiLiveTranscriptionProvider.ts b/packages/core/src/voice/geminiLiveTranscriptionProvider.ts
new file mode 100644
index 0000000000..4895a60e56
--- /dev/null
+++ b/packages/core/src/voice/geminiLiveTranscriptionProvider.ts
@@ -0,0 +1,178 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import WebSocket from 'ws';
+import { EventEmitter, once } from 'node:events';
+import { debugLogger } from '../utils/debugLogger.js';
+import type {
+  TranscriptionProvider,
+  TranscriptionEvents,
+} from './transcriptionProvider.js';
+
+import { z } from 'zod';
+
+const LiveAPIResponseSchema = z.object({
+  setupComplete: z.record(z.unknown()).optional(),
+  serverContent: z
+    .object({
+      turnComplete: z.boolean().optional(),
+      inputTranscription: z
+        .object({
+          text: z.string().optional(),
+        })
+        .optional(),
+      outputTranscription: z
+        .object({
+          text: z.string().optional(),
+        })
+        .optional(),
+      modelTurn: z
+        .object({
+          parts: z
+            .array(
+              z.object({
+                text: z.string().optional(),
+                inlineData: z
+                  .object({
+                    data: z.string(),
+                  })
+                  .optional(),
+              }),
+            )
+            .optional(),
+        })
+        .optional(),
+    })
+    .optional(),
+});
+
+/**
+ * Connects to the Gemini Live API using raw WebSockets to support API Key authentication.
+ */
+export class GeminiLiveTranscriptionProvider
+  extends EventEmitter<TranscriptionEvents>
+  implements TranscriptionProvider
+{
+  private ws: WebSocket | null = null;
+  private currentTranscription = '';
+
+  constructor(private readonly apiKey: string) {
+    super();
+  }
+
+  async connect(): Promise<void> {
+    const modelName = 'gemini-3.1-flash-live-preview';
+    const baseUrl =
+      'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent';
+
+    if (!this.apiKey) {
+      throw new Error('No API key provided');
+    }
+
+    // NOTE: The Generative Language WebSocket API requires the API key to be passed via the 'key' query parameter.
+    const url = `${baseUrl}?key=${this.apiKey}`;
+    debugLogger.debug(
+      `[GeminiLiveTranscription] Connecting to model ${modelName} via raw WebSocket with API Key...`,
+    );
+
+    try {
+      this.ws = new WebSocket(url, {
+        maxPayload: 1 << 20, // 1MB limit for safety
+      });
+
+      this.ws.on('message', (data) => {
+        try {
+          const parsedData: unknown = JSON.parse(data.toString());
+          const result = LiveAPIResponseSchema.safeParse(parsedData);
+
+          if (result.success) {
+            const response = result.data;
+            if (response.serverContent) {
+              const content = response.serverContent;
+
+              if (content.turnComplete) {
+                this.emit('turnComplete');
+              }
+
+              if (content.inputTranscription?.text) {
+                const text = content.inputTranscription.text;
+                debugLogger.debug(
+                  `[GeminiLiveTranscription] Transcription received (Cloud): "${text}"`,
+                );
+                this.currentTranscription = text;
+                this.emit('transcription', this.currentTranscription);
+              }
+            }
+          }
+        } catch (e) {
+          debugLogger.error(
+            '[GeminiLiveTranscription] Error parsing message:',
+            e,
+          );
+        }
+      });
+
+      this.ws.on('error', (error) => {
+        debugLogger.error('[GeminiLiveTranscription] WebSocket Error:', error);
+        this.emit('error', error);
+      });
+
+      this.ws.on('close', (code, reason) => {
+        debugLogger.debug(
+          `[GeminiLiveTranscription] Connection Closed. Code: ${code}, Reason: ${reason}`,
+        );
+        this.emit('close');
+        this.ws = null;
+      });
+
+      await once(this.ws, 'open');
+
+      const setupMessage = {
+        setup: {
+          model: `models/${modelName}`,
+          generation_config: {
+            response_modalities: ['audio'],
+          },
+          input_audio_transcription: {},
+        },
+      };
+
+      this.ws.send(JSON.stringify(setupMessage));
+      this.currentTranscription = '';
+    } catch (err) {
+      debugLogger.error(
+        '[GeminiLiveTranscription] Failed to establish connection:',
+        err,
+      );
+      throw err;
+    }
+  }
+
+  sendAudioChunk(chunk: Buffer): void {
+    if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
+
+    const audioMessage = {
+      realtime_input: {
+        audio: {
+          data: chunk.toString('base64'),
+          mime_type: 'audio/pcm;rate=16000',
+        },
+      },
+    };
+    this.ws.send(JSON.stringify(audioMessage));
+  }
+
+  getTranscription(): string {
+    return this.currentTranscription;
+  }
+
+  disconnect(): void {
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+  }
+}
diff --git a/packages/core/src/voice/transcriptionFactory.ts b/packages/core/src/voice/transcriptionFactory.ts
new file mode 100644
index 0000000000..ee0ac64700
--- /dev/null
+++ b/packages/core/src/voice/transcriptionFactory.ts
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as path from 'node:path';
+import * as fs from 'node:fs';
+import { homedir, GEMINI_DIR } from '../utils/paths.js';
+import { GeminiLiveTranscriptionProvider } from './geminiLiveTranscriptionProvider.js';
+import { WhisperTranscriptionProvider } from './whisperTranscriptionProvider.js';
+import type { TranscriptionProvider } from './transcriptionProvider.js';
+
+export class TranscriptionFactory {
+  static createProvider(
+    voiceConfig: { backend?: string; whisperModel?: string } | undefined,
+    apiKey: string,
+  ): TranscriptionProvider {
+    const backend = voiceConfig?.backend ?? 'gemini-live';
+
+    if (backend === 'whisper') {
+      const modelsDir = path.join(homedir(), GEMINI_DIR, 'whisper_models');
+      if (!fs.existsSync(modelsDir)) {
+        fs.mkdirSync(modelsDir, { recursive: true });
+      }
+
+      const modelName = voiceConfig?.whisperModel ?? 'ggml-base.en.bin';
+      const modelPath = path.join(modelsDir, modelName);
+
+      return new WhisperTranscriptionProvider({
+        modelPath,
+        threads: 4,
+        step: 0,
+        length: 5000,
+      });
+    }
+
+    // Default to Gemini Live
+    return new GeminiLiveTranscriptionProvider(apiKey);
+  }
+}
diff --git a/packages/core/src/voice/transcriptionProvider.ts b/packages/core/src/voice/transcriptionProvider.ts
new file mode 100644
index 0000000000..6b3bf20c0a
--- /dev/null
+++ b/packages/core/src/voice/transcriptionProvider.ts
@@ -0,0 +1,33 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { EventEmitter } from 'node:events';
+
+export interface TranscriptionEvents {
+  /** Emitted when partial or full transcription text is available. */
+  transcription: [string];
+  /** Emitted when a speaking turn is considered complete. */
+  turnComplete: [];
+  /** Emitted when an error occurs during transcription. */
+  error: [Error];
+  /** Emitted when the transcription service connection is closed. */
+  close: [];
+}
+
+/**
+ * Common interface for all transcription backends (Cloud or Local).
+ */
+export interface TranscriptionProvider
+  extends EventEmitter<TranscriptionEvents> {
+  /** Establish connection to the transcription service. */
+  connect(): Promise<void>;
+  /** Send a chunk of raw audio data to the service. */
+  sendAudioChunk(chunk: Buffer): void;
+  /** Disconnect from the transcription service. */
+  disconnect(): void;
+  /** Get the current full transcription for the session. */
+  getTranscription(): string;
+}
diff --git a/packages/core/src/voice/whisperModelManager.ts b/packages/core/src/voice/whisperModelManager.ts
new file mode 100644
index 0000000000..64988eb48d
--- /dev/null
+++ b/packages/core/src/voice/whisperModelManager.ts
@@ -0,0 +1,107 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as path from 'node:path';
+import * as fs from 'node:fs';
+import { EventEmitter } from 'node:events';
+import { homedir, GEMINI_DIR } from '../utils/paths.js';
+import { debugLogger } from '../utils/debugLogger.js';
+
+export interface WhisperModelProgress {
+  modelName: string;
+  transferred: number;
+  total: number;
+  percentage: number;
+}
+
+export interface WhisperModelManagerEvents {
+  progress: [WhisperModelProgress];
+}
+
+const ALLOWED_MODELS = [
+  'ggml-tiny.en.bin',
+  'ggml-base.en.bin',
+  'ggml-large-v3-turbo-q5_0.bin',
+  'ggml-large-v3-turbo-q8_0.bin',
+];
+
+/**
+ * Manages Whisper models (checking existence, downloading).
+ */
+export class WhisperModelManager extends EventEmitter<WhisperModelManagerEvents> {
+  private readonly modelsDir: string;
+
+  constructor() {
+    super();
+    this.modelsDir = path.join(homedir(), GEMINI_DIR, 'whisper_models');
+  }
+
+  isModelInstalled(modelName: string): boolean {
+    this.validateModelName(modelName);
+    return fs.existsSync(path.join(this.modelsDir, modelName));
+  }
+
+  getModelPath(modelName: string): string {
+    this.validateModelName(modelName);
+    return path.join(this.modelsDir, modelName);
+  }
+
+  async downloadModel(modelName: string): Promise<void> {
+    this.validateModelName(modelName);
+
+    if (!fs.existsSync(this.modelsDir)) {
+      fs.mkdirSync(this.modelsDir, { recursive: true });
+    }
+
+    const destination = path.join(this.modelsDir, modelName);
+    const url = `https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${modelName}`;
+
+    debugLogger.debug(
+      `[WhisperModelManager] Downloading ${modelName} from ${url}`,
+    );
+
+    const response = await fetch(url);
+    if (!response.ok) {
+      throw new Error(`Failed to download model: ${response.statusText}`);
+    }
+
+    const total = parseInt(response.headers.get('content-length') || '0', 10);
+    let transferred = 0;
+
+    const reader = response.body?.getReader();
+    if (!reader) {
+      throw new Error('Response body is not readable');
+    }
+
+    const writer = fs.createWriteStream(destination);
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        transferred += value.length;
+        writer.write(value);
+
+        const percentage = total > 0 ? transferred / total : 0;
+        this.emit('progress', {
+          modelName,
+          transferred,
+          total,
+          percentage,
+        });
+      }
+    } finally {
+      writer.end();
+    }
+  }
+
+  private validateModelName(modelName: string): void {
+    if (!ALLOWED_MODELS.includes(modelName)) {
+      throw new Error(`Unauthorized model name: ${modelName}`);
+    }
+  }
+}
diff --git a/packages/core/src/voice/whisperTranscriptionProvider.test.ts b/packages/core/src/voice/whisperTranscriptionProvider.test.ts
new file mode 100644
index 0000000000..69b48fbb02
--- /dev/null
+++ b/packages/core/src/voice/whisperTranscriptionProvider.test.ts
@@ -0,0 +1,31 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { WhisperTranscriptionProvider } from './whisperTranscriptionProvider.js';
+import commandExists from 'command-exists';
+
+vi.mock('command-exists', () => ({
+  default: vi.fn(),
+}));
+
+describe('WhisperTranscriptionProvider', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('should throw a friendly error if whisper-stream is not available', async () => {
+    vi.mocked(commandExists).mockRejectedValue(new Error('not found'));
+
+    const provider = new WhisperTranscriptionProvider({
+      modelPath: 'test-model.bin',
+    });
+
+    await expect(provider.connect()).rejects.toThrow(
+      'The `whisper-stream` command is required for local voice mode. Please install it (e.g., `brew install whisper-cpp` on macOS).',
+    );
+  });
+});
diff --git a/packages/core/src/voice/whisperTranscriptionProvider.ts b/packages/core/src/voice/whisperTranscriptionProvider.ts
new file mode 100644
index 0000000000..b5b871df83
--- /dev/null
+++ b/packages/core/src/voice/whisperTranscriptionProvider.ts
@@ -0,0 +1,199 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process';
+import { EventEmitter } from 'node:events';
+import commandExists from 'command-exists';
+import { debugLogger } from '../utils/debugLogger.js';
+import type {
+  TranscriptionProvider,
+  TranscriptionEvents,
+} from './transcriptionProvider.js';
+
+export interface WhisperProviderOptions {
+  modelPath: string;
+  threads?: number;
+  step?: number;
+  length?: number;
+}
+
+/**
+ * Local transcription provider using `whisper-stream` from whisper.cpp.
+ *
+ * Uses the Sliding Window Mode with VAD (--step 0) for stable,
+ * non-overlapping transcription blocks that can be appended directly.
+ */
+export class WhisperTranscriptionProvider
+  extends EventEmitter<TranscriptionEvents>
+  implements TranscriptionProvider
+{
+  private process: ChildProcessWithoutNullStreams | null = null;
+  private currentTranscription = '';
+
+  constructor(private readonly options: WhisperProviderOptions) {
+    super();
+  }
+
+  /**
+   * Checks if `whisper-stream` is available on the system.
+   */
+  static async isAvailable(): Promise<boolean> {
+    try {
+      await commandExists('whisper-stream');
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  async connect(): Promise<void> {
+    const { modelPath, threads = 4, step = 0, length = 5000 } = this.options;
+
+    this.currentTranscription = '';
+
+    const available = await WhisperTranscriptionProvider.isAvailable();
+    if (!available) {
+      return Promise.reject(
+        new Error(
+          'The `whisper-stream` command is required for local voice mode. Please install it (e.g., `brew install whisper-cpp` on macOS).',
+        ),
+      );
+    }
+
+    debugLogger.debug(
+      `[WhisperTranscription] Starting whisper-stream with model: ${modelPath} (VAD mode: step=${step}, length=${length})`,
+    );
+
+    return new Promise((resolve, reject) => {
+      let isResolved = false;
+
+      try {
+        // whisper-stream -m <model_path> -t <threads> --step 0 --length <length> -vth 0.6
+        // Setting step == 0 enables sliding window mode with VAD, which outputs
+        // non-overlapping transcription blocks suitable for appending.
+        this.process = spawn('whisper-stream', [
+          '-m',
+          modelPath,
+          '-t',
+          threads.toString(),
+          '--step',
+          step.toString(),
+          '--length',
+          length.toString(),
+          '-vth',
+          '0.6',
+        ]);
+
+        this.process.stdout.on('data', (data: Buffer) => {
+          const output = data.toString();
+          this.parseOutput(output);
+        });
+
+        this.process.stderr.on('data', (data: Buffer) => {
+          const msg = data.toString();
+          if (msg.includes('error')) {
+            debugLogger.error(`[WhisperTranscription] stderr: ${msg}`);
+            if (!isResolved) {
+              isResolved = true;
+              reject(new Error(msg));
+            }
+          }
+
+          // whisper-stream prints "whisper_init_from_file_with_params_no_state: loading model from..."
+          // and finally "main: processing, press Ctrl+C to stop" when ready.
+          if (!isResolved && msg.includes('main: processing')) {
+            debugLogger.debug('[WhisperTranscription] whisper-stream is ready');
+            isResolved = true;
+            resolve();
+          }
+        });
+
+        this.process.on('error', (err) => {
+          debugLogger.error('[WhisperTranscription] Process error:', err);
+          this.emit('error', err);
+          if (!isResolved) {
+            isResolved = true;
+            reject(err);
+          }
+        });
+
+        this.process.on('close', (code) => {
+          debugLogger.debug(
+            `[WhisperTranscription] Process closed with code ${code}`,
+          );
+          this.emit('close');
+          this.process = null;
+        });
+
+        // Fallback timeout in case "main: processing" is never seen
+        setTimeout(() => {
+          if (!isResolved) {
+            debugLogger.warn(
+              '[WhisperTranscription] Connection timeout (fallback resolve)',
+            );
+            isResolved = true;
+            resolve();
+          }
+        }, 10000);
+      } catch (err) {
+        debugLogger.error(
+          '[WhisperTranscription] Failed to spawn process:',
+          err,
+        );
+        if (!isResolved) {
+          isResolved = true;
+          reject(err);
+        }
+      }
+    });
+  }
+
+  private parseOutput(output: string): void {
+    // whisper-stream output format: "[00:00:00.000 --> 00:00:02.000]   Hello world."
+    const lines = output.split('\n');
+
+    for (const line of lines) {
+      const match = line.match(/\[.* --> .*\]\s+(.*)/);
+      if (match && match[1]) {
+        let text = match[1].trim();
+
+        // Filter out [Silence], [music], (laughter), etc.
+        text = text
+          .replace(/\[[^\]]*\]/g, '')
+          .replace(/\([^)]*\)/g, '')
+          .trim();
+
+        if (text) {
+          // In VAD mode (step=0), each line is a completed speech block.
+          // Append it to the buffer to ensure it doesn't disappear.
+          this.currentTranscription = this.currentTranscription
+            ? `${this.currentTranscription} ${text}`
+            : text;
+
+          debugLogger.debug(
+            `[WhisperTranscription] Transcription updated (Local-VAD): "${this.currentTranscription}"`,
+          );
+          this.emit('transcription', this.currentTranscription);
+        }
+      }
+    }
+  }
+
+  sendAudioChunk(_chunk: Buffer): void {
+    // whisper-stream handles its own audio capture.
+  }
+
+  getTranscription(): string {
+    return this.currentTranscription;
+  }
+
+  disconnect(): void {
+    if (this.process) {
+      this.process.kill('SIGTERM');
+      this.process = null;
+    }
+  }
+}
diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json
index f4263fcc3e..a883150d66 100644
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@@ -2910,6 +2910,59 @@
           "default": false,
           "type": "boolean"
         },
+        "voiceMode": {
+          "title": "Voice Mode",
+          "description": "Enable experimental voice dictation and commands (/voice, /voice model).",
+          "markdownDescription": "Enable experimental voice dictation and commands (/voice, /voice model).\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `false`",
+          "default": false,
+          "type": "boolean"
+        },
+        "voice": {
+          "title": "Voice",
+          "description": "Settings for voice mode and transcription.",
+          "markdownDescription": "Settings for voice mode and transcription.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `{}`",
+          "default": {},
+          "type": "object",
+          "properties": {
+            "activationMode": {
+              "title": "Voice Activation Mode",
+              "description": "How to trigger voice recording with the Space key.",
+              "markdownDescription": "How to trigger voice recording with the Space key.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `push-to-talk`",
+              "default": "push-to-talk",
+              "type": "string",
+              "enum": ["push-to-talk", "toggle"]
+            },
+            "backend": {
+              "title": "Voice Transcription Backend",
+              "description": "The backend to use for voice transcription.",
+              "markdownDescription": "The backend to use for voice transcription.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `gemini-live`",
+              "default": "gemini-live",
+              "type": "string",
+              "enum": ["gemini-live", "whisper"]
+            },
+            "whisperModel": {
+              "title": "Whisper Model",
+              "description": "The Whisper model to use for local transcription.",
+              "markdownDescription": "The Whisper model to use for local transcription.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `ggml-base.en.bin`",
+              "default": "ggml-base.en.bin",
+              "type": "string",
+              "enum": [
+                "ggml-tiny.en.bin",
+                "ggml-base.en.bin",
+                "ggml-large-v3-turbo-q5_0.bin",
+                "ggml-large-v3-turbo-q8_0.bin"
+              ]
+            },
+            "stopGracePeriodMs": {
+              "title": "Voice Stop Grace Period (ms)",
+              "description": "How long to wait for final transcription after stopping recording.",
+              "markdownDescription": "How long to wait for final transcription after stopping recording.\n\n- Category: `Experimental`\n- Requires restart: `no`\n- Default: `1000`",
+              "default": 1000,
+              "type": "number"
+            }
+          },
+          "additionalProperties": false
+        },
         "adk": {
           "title": "ADK",
           "description": "Settings for the Agent Development Kit (ADK).",