From 60a6a47d56ecdf4c9b5cb9ffbdc1e6a26efb2040 Mon Sep 17 00:00:00 2001 From: Coco Sheng Date: Mon, 4 May 2026 14:32:15 -0400 Subject: [PATCH] feat(voice): add privacy and compliance UX warning for Gemini Live backend (#26454) --- docs/cli/settings.md | 2 +- docs/reference/configuration.md | 4 +- packages/cli/src/config/settingsSchema.ts | 6 +- .../ui/components/VoiceModelDialog.test.tsx | 92 +++++++++++++++++++ .../src/ui/components/VoiceModelDialog.tsx | 30 ++++-- schemas/settings.schema.json | 4 +- 6 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 packages/cli/src/ui/components/VoiceModelDialog.test.tsx diff --git a/docs/cli/settings.md b/docs/cli/settings.md index a5c7ecae87..d39a0e18f7 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -166,7 +166,7 @@ they appear in the UI. | Gemma Models | `experimental.gemma` | Enable access to Gemma 4 models via Gemini API. | `true` | | Voice Mode | `experimental.voiceMode` | Enable experimental voice dictation and commands (/voice, /voice model). | `false` | | Voice Activation Mode | `experimental.voice.activationMode` | How to trigger voice recording with the Space key. | `"push-to-talk"` | -| Voice Transcription Backend | `experimental.voice.backend` | The backend to use for voice transcription. | `"gemini-live"` | +| Voice Transcription Backend | `experimental.voice.backend` | The backend to use for voice transcription. Note: When using the Gemini Live backend, voice recordings are sent to Google Cloud for transcription. | `"gemini-live"` | | Whisper Model | `experimental.voice.whisperModel` | The Whisper model to use for local transcription. | `"ggml-base.en.bin"` | | Voice Stop Grace Period (ms) | `experimental.voice.stopGracePeriodMs` | How long to wait for final transcription after stopping recording. | `1000` | | Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index f0eaafc27c..3498634dd1 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1774,7 +1774,9 @@ their corresponding top-level category object in your `settings.json` file. - **Values:** `"push-to-talk"`, `"toggle"` - **`experimental.voice.backend`** (enum): - - **Description:** The backend to use for voice transcription. + - **Description:** The backend to use for voice transcription. Note: When + using the Gemini Live backend, voice recordings are sent to Google Cloud for + transcription. - **Default:** `"gemini-live"` - **Values:** `"gemini-live"`, `"whisper"` diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 225d3d8ac0..fa941c9a01 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2099,7 +2099,11 @@ const SETTINGS_SCHEMA = { category: 'Experimental', requiresRestart: false, default: 'gemini-live', - description: 'The backend to use for voice transcription.', + description: oneLine` + The backend to use for voice transcription. Note: When using the + Gemini Live backend, voice recordings are sent to Google Cloud for + transcription. + `, showInDialog: true, options: [ { value: 'gemini-live', label: 'Gemini Live API (Cloud)' }, diff --git a/packages/cli/src/ui/components/VoiceModelDialog.test.tsx b/packages/cli/src/ui/components/VoiceModelDialog.test.tsx new file mode 100644 index 0000000000..7ec081b032 --- /dev/null +++ b/packages/cli/src/ui/components/VoiceModelDialog.test.tsx @@ -0,0 +1,92 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { renderWithProviders } from '../../test-utils/render.js'; +import { createMockSettings } from '../../test-utils/settings.js'; +import { VoiceModelDialog } from './VoiceModelDialog.js'; +import { act } from 'react'; +import { waitFor } from '../../test-utils/async.js'; +import { SettingScope } from '../../config/settings.js'; + +vi.mock('@google/gemini-cli-core', async () => { + const actual = await vi.importActual('@google/gemini-cli-core'); + return { + ...actual, + isBinaryAvailable: vi.fn().mockReturnValue(true), + WhisperModelManager: vi.fn().mockImplementation(() => ({ + isModelInstalled: vi.fn().mockReturnValue(false), + on: vi.fn(), + off: vi.fn(), + downloadModel: vi.fn(), + })), + }; +}); + +describe('VoiceModelDialog', () => { + it('should display a privacy warning when Gemini Live API (Cloud) is selected', async () => { + const onClose = vi.fn(); + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + ); + + await waitUntilReady(); + + const frame = lastFrame(); + expect(frame).toContain('Gemini Live API (Cloud)'); + expect(frame).toContain('When using the Gemini Live backend'); + }); + + it('should NOT display a privacy warning when Whisper (Local) is highlighted', async () => { + const onClose = vi.fn(); + const { lastFrame, waitUntilReady, stdin } = await renderWithProviders( + , + ); + + await waitUntilReady(); + + // Verify warning is present for default (Gemini Live) + expect(lastFrame()).toContain('When using the Gemini Live backend'); + + // Arrow Down to highlight Whisper + await act(async () => { + stdin.write('\u001b[B'); + }); + + await waitFor(() => { + const frame = lastFrame(); + expect(frame).toContain('Whisper (Local)'); + expect(frame).not.toContain('When using the Gemini Live backend'); + }); + }); + + it('should update settings and close dialog when a backend is selected', async () => { + const onClose = vi.fn(); + const settings = createMockSettings(); + const setValueSpy = vi.spyOn(settings, 'setValue'); + + const { waitUntilReady, stdin } = await renderWithProviders( + , + { settings }, + ); + + await waitUntilReady(); + + // Select Gemini Live (it's already highlighted, just press Enter) + await act(async () => { + stdin.write('\r'); + }); + + await waitFor(() => { + expect(setValueSpy).toHaveBeenCalledWith( + SettingScope.User, + 'experimental.voice.backend', + 'gemini-live', + ); + expect(onClose).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/cli/src/ui/components/VoiceModelDialog.tsx b/packages/cli/src/ui/components/VoiceModelDialog.tsx index f340a5ccf4..e882c89235 100644 --- a/packages/cli/src/ui/components/VoiceModelDialog.tsx +++ b/packages/cli/src/ui/components/VoiceModelDialog.tsx @@ -18,6 +18,7 @@ import { type WhisperModelProgress, } from '@google/gemini-cli-core'; import { CliSpinner } from './CliSpinner.js'; +import { WarningMessage } from './messages/WarningMessage.js'; interface VoiceModelDialogProps { onClose: () => void; @@ -68,6 +69,9 @@ export function VoiceModelDialog({ const currentWhisperModel = settings.merged.experimental.voice?.whisperModel ?? 'ggml-base.en.bin'; + const [highlightedBackend, setHighlightedBackend] = + useState(currentBackend); + const handleKeypress = useCallback( (key: Key) => { if (key.name === 'escape') { @@ -101,6 +105,10 @@ export function VoiceModelDialog({ [setSetting, onClose], ); + const handleBackendHighlight = useCallback((value: string) => { + setHighlightedBackend(value); + }, []); + const handleWhisperModelSelect = useCallback( async (modelName: string) => { if (modelManager.isModelInstalled(modelName)) { @@ -203,14 +211,22 @@ export function VoiceModelDialog({ ) : ( - + {view === 'backend' ? ( - + <> + + {highlightedBackend === 'gemini-live' && ( + + + + )} + ) : (