feat(voice): implement real-time voice mode with cloud and local backends (#24174)

This commit is contained in:
Abhijit Balaji
2026-04-24 14:29:38 -07:00
committed by GitHub
parent 048bf6e514
commit 2e0641c83b
40 changed files with 2244 additions and 43 deletions
+1
View File
@@ -1000,6 +1000,7 @@ export async function loadCliConfig(
enableExtensionReloading: settings.experimental?.extensionReloading,
enableAgents: settings.experimental?.enableAgents,
plan: settings.general?.plan?.enabled ?? true,
voiceMode: settings.experimental?.voiceMode,
tracker: settings.experimental?.taskTracker,
directWebFetch: settings.experimental?.directWebFetch,
planSettings: settings.general?.plan?.directory
+81
View File
@@ -2061,6 +2061,87 @@ const SETTINGS_SCHEMA = {
description: 'Enable access to Gemma 4 models (experimental).',
showInDialog: true,
},
voiceMode: {
type: 'boolean',
label: 'Voice Mode',
category: 'Experimental',
requiresRestart: false,
default: false,
description:
'Enable experimental voice dictation and commands (/voice, /voice model).',
showInDialog: true,
},
voice: {
type: 'object',
label: 'Voice',
category: 'Experimental',
requiresRestart: false,
default: {},
description: 'Settings for voice mode and transcription.',
showInDialog: false,
properties: {
activationMode: {
type: 'enum',
label: 'Voice Activation Mode',
category: 'Experimental',
requiresRestart: false,
default: 'push-to-talk',
description: 'How to trigger voice recording with the Space key.',
showInDialog: true,
options: [
{ value: 'push-to-talk', label: 'Push-To-Talk (Hold Space)' },
{ value: 'toggle', label: 'Toggle (Press Space to start/stop)' },
],
},
backend: {
type: 'enum',
label: 'Voice Transcription Backend',
category: 'Experimental',
requiresRestart: false,
default: 'gemini-live',
description: 'The backend to use for voice transcription.',
showInDialog: true,
options: [
{ value: 'gemini-live', label: 'Gemini Live API (Cloud)' },
{ value: 'whisper', label: 'Whisper (Local)' },
],
},
whisperModel: {
type: 'enum',
label: 'Whisper Model',
category: 'Experimental',
requiresRestart: false,
default: 'ggml-base.en.bin',
description: 'The Whisper model to use for local transcription.',
showInDialog: true,
options: [
{ value: 'ggml-tiny.en.bin', label: 'Tiny (EN) - Fast (~75MB)' },
{
value: 'ggml-base.en.bin',
label: 'Base (EN) - Balanced (~142MB)',
},
{
value: 'ggml-large-v3-turbo-q5_0.bin',
label: 'Large v3 Turbo (Q5_0) - High Accuracy (~547MB)',
},
{
value: 'ggml-large-v3-turbo-q8_0.bin',
label: 'Large v3 Turbo (Q8_0) - Max Accuracy (~834MB)',
},
],
},
stopGracePeriodMs: {
type: 'number',
label: 'Voice Stop Grace Period (ms)',
category: 'Experimental',
requiresRestart: false,
default: 1000,
description:
'How long to wait for final transcription after stopping recording.',
showInDialog: true,
},
},
},
adk: {
type: 'object',
label: 'ADK',