feat(voice): implement real-time voice mode with cloud and local backends (#24174)

2026-07-14 12:00:18 -07:00 · 2026-04-24 14:29:38 -07:00
parent 048bf6e514
commit 2e0641c83b
40 changed files with 2244 additions and 43 deletions
@@ -1000,6 +1000,7 @@ export async function loadCliConfig(
    enableExtensionReloading: settings.experimental?.extensionReloading,
    enableAgents: settings.experimental?.enableAgents,
    plan: settings.general?.plan?.enabled ?? true,
+    voiceMode: settings.experimental?.voiceMode,
    tracker: settings.experimental?.taskTracker,
    directWebFetch: settings.experimental?.directWebFetch,
    planSettings: settings.general?.plan?.directory
@@ -2061,6 +2061,87 @@ const SETTINGS_SCHEMA = {
        description: 'Enable access to Gemma 4 models (experimental).',
        showInDialog: true,
      },
+      voiceMode: {
+        type: 'boolean',
+        label: 'Voice Mode',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: false,
+        description:
+          'Enable experimental voice dictation and commands (/voice, /voice model).',
+        showInDialog: true,
+      },
+      voice: {
+        type: 'object',
+        label: 'Voice',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: {},
+        description: 'Settings for voice mode and transcription.',
+        showInDialog: false,
+        properties: {
+          activationMode: {
+            type: 'enum',
+            label: 'Voice Activation Mode',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'push-to-talk',
+            description: 'How to trigger voice recording with the Space key.',
+            showInDialog: true,
+            options: [
+              { value: 'push-to-talk', label: 'Push-To-Talk (Hold Space)' },
+              { value: 'toggle', label: 'Toggle (Press Space to start/stop)' },
+            ],
+          },
+          backend: {
+            type: 'enum',
+            label: 'Voice Transcription Backend',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'gemini-live',
+            description: 'The backend to use for voice transcription.',
+            showInDialog: true,
+            options: [
+              { value: 'gemini-live', label: 'Gemini Live API (Cloud)' },
+              { value: 'whisper', label: 'Whisper (Local)' },
+            ],
+          },
+          whisperModel: {
+            type: 'enum',
+            label: 'Whisper Model',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 'ggml-base.en.bin',
+            description: 'The Whisper model to use for local transcription.',
+            showInDialog: true,
+            options: [
+              { value: 'ggml-tiny.en.bin', label: 'Tiny (EN) - Fast (~75MB)' },
+              {
+                value: 'ggml-base.en.bin',
+                label: 'Base (EN) - Balanced (~142MB)',
+              },
+              {
+                value: 'ggml-large-v3-turbo-q5_0.bin',
+                label: 'Large v3 Turbo (Q5_0) - High Accuracy (~547MB)',
+              },
+              {
+                value: 'ggml-large-v3-turbo-q8_0.bin',
+                label: 'Large v3 Turbo (Q8_0) - Max Accuracy (~834MB)',
+              },
+            ],
+          },
+          stopGracePeriodMs: {
+            type: 'number',
+            label: 'Voice Stop Grace Period (ms)',
+            category: 'Experimental',
+            requiresRestart: false,
+            default: 1000,
+            description:
+              'How long to wait for final transcription after stopping recording.',
+            showInDialog: true,
+          },
+        },
+      },
      adk: {
        type: 'object',
        label: 'ADK',