mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-26 13:04:49 -07:00
feat(voice): implement real-time voice mode with cloud and local backends (#24174)
This commit is contained in:
@@ -56,6 +56,7 @@ import {
|
||||
debugLogger,
|
||||
type Config,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { useVoiceMode } from '../hooks/useVoiceMode.js';
|
||||
import {
|
||||
parseInputForHighlighting,
|
||||
parseSegmentsFromTokens,
|
||||
@@ -159,7 +160,6 @@ export function isLargePaste(text: string): boolean {
|
||||
}
|
||||
|
||||
const DOUBLE_TAB_CLEAN_UI_TOGGLE_WINDOW_MS = 350;
|
||||
|
||||
/**
|
||||
* Attempt to toggle expansion of a paste placeholder in the buffer.
|
||||
* Returns true if a toggle action was performed or hint was shown, false otherwise.
|
||||
@@ -238,6 +238,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
setEmbeddedShellFocused,
|
||||
setShortcutsHelpVisible,
|
||||
toggleCleanUiDetailsVisible,
|
||||
setVoiceModeEnabled,
|
||||
} = useUIActions();
|
||||
const {
|
||||
terminalWidth,
|
||||
@@ -246,6 +247,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
backgroundTasks,
|
||||
backgroundTaskHeight,
|
||||
shortcutsHelpVisible,
|
||||
isVoiceModeEnabled,
|
||||
} = useUIState();
|
||||
const [suppressCompletion, setSuppressCompletion] = useState(false);
|
||||
const { handlePress: registerPlainTabPress, resetCount: resetPlainTabPress } =
|
||||
@@ -263,6 +265,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
resetEscapeState();
|
||||
if (buffer.text.length > 0) {
|
||||
buffer.setText('');
|
||||
resetTurnBaseline();
|
||||
resetCompletionState();
|
||||
} else if (history.length > 0) {
|
||||
onSubmit('/rewind');
|
||||
@@ -281,6 +284,16 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
const hasUserNavigatedSuggestions = useRef(false);
|
||||
const listRef = useRef<ScrollableListRef<ScrollableItem>>(null);
|
||||
|
||||
const { isRecording, handleVoiceInput, resetTurnBaseline } = useVoiceMode({
|
||||
buffer,
|
||||
config,
|
||||
settings,
|
||||
setQueueErrorMessage,
|
||||
isVoiceModeEnabled,
|
||||
setVoiceModeEnabled,
|
||||
keyMatchers,
|
||||
});
|
||||
|
||||
const [reverseSearchActive, setReverseSearchActive] = useState(false);
|
||||
const [commandSearchActive, setCommandSearchActive] = useState(false);
|
||||
const [textBeforeReverseSearch, setTextBeforeReverseSearch] = useState('');
|
||||
@@ -387,6 +400,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
// Clear the buffer *before* calling onSubmit to prevent potential re-submission
|
||||
// if onSubmit triggers a re-render while the buffer still holds the old value.
|
||||
buffer.setText('');
|
||||
resetTurnBaseline();
|
||||
onSubmit(processedValue);
|
||||
resetCompletionState();
|
||||
resetReverseSearchCompletionState();
|
||||
@@ -398,6 +412,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
shellModeActive,
|
||||
shellHistory,
|
||||
resetReverseSearchCompletionState,
|
||||
resetTurnBaseline,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -647,6 +662,8 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
|
||||
const handleInput = useCallback(
|
||||
(key: Key) => {
|
||||
if (handleVoiceInput(key)) return true;
|
||||
|
||||
// Determine if this keypress is a history navigation command
|
||||
const isHistoryUp =
|
||||
!shellModeActive &&
|
||||
@@ -873,9 +890,9 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
) {
|
||||
setShellModeActive(!shellModeActive);
|
||||
buffer.setText(''); // Clear the '!' from input
|
||||
resetTurnBaseline();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (keyMatchers[Command.ESCAPE](key)) {
|
||||
const cancelSearch = (
|
||||
setActive: (active: boolean) => void,
|
||||
@@ -1360,6 +1377,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
backgroundTaskHeight,
|
||||
streamingState,
|
||||
handleEscPress,
|
||||
resetTurnBaseline,
|
||||
registerPlainTabPress,
|
||||
resetPlainTabPress,
|
||||
toggleCleanUiDetailsVisible,
|
||||
@@ -1369,9 +1387,9 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
keyMatchers,
|
||||
isHelpDismissKey,
|
||||
settings,
|
||||
handleVoiceInput,
|
||||
],
|
||||
);
|
||||
|
||||
useKeypress(handleInput, {
|
||||
isActive: !isEmbeddedShellFocused && !copyModeEnabled,
|
||||
priority: true,
|
||||
@@ -1792,20 +1810,39 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
|
||||
)}{' '}
|
||||
</Text>
|
||||
<Box flexGrow={1} flexDirection="column" ref={innerBoxRef}>
|
||||
{buffer.text.length === 0 && placeholder ? (
|
||||
showCursor ? (
|
||||
<Text
|
||||
terminalCursorFocus={showCursor}
|
||||
terminalCursorPosition={0}
|
||||
>
|
||||
{chalk.inverse(placeholder.slice(0, 1))}
|
||||
<Text color={theme.text.secondary}>
|
||||
{placeholder.slice(1)}
|
||||
</Text>
|
||||
{isRecording && (
|
||||
<Box flexDirection="row" marginBottom={0}>
|
||||
<Text color={theme.status.success}>🎙️ Listening...</Text>
|
||||
</Box>
|
||||
)}
|
||||
{isVoiceModeEnabled && !isRecording && (
|
||||
<Box flexDirection="row" marginBottom={0}>
|
||||
<Text color={theme.text.secondary}>
|
||||
> Voice mode:{' '}
|
||||
{(settings.experimental.voice?.activationMode ??
|
||||
'push-to-talk') === 'push-to-talk'
|
||||
? 'Hold Space to record'
|
||||
: 'Space to start/stop recording'}{' '}
|
||||
(Esc to exit)
|
||||
</Text>
|
||||
) : (
|
||||
<Text color={theme.text.secondary}>{placeholder}</Text>
|
||||
)
|
||||
</Box>
|
||||
)}
|
||||
{buffer.text.length === 0 && !isRecording ? (
|
||||
!isVoiceModeEnabled && placeholder ? (
|
||||
showCursor ? (
|
||||
<Text
|
||||
terminalCursorFocus={showCursor}
|
||||
terminalCursorPosition={0}
|
||||
>
|
||||
{chalk.inverse(placeholder.slice(0, 1))}
|
||||
<Text color={theme.text.secondary}>
|
||||
{placeholder.slice(1)}
|
||||
</Text>
|
||||
</Text>
|
||||
) : (
|
||||
<Text color={theme.text.secondary}>{placeholder}</Text>
|
||||
)
|
||||
) : null
|
||||
) : (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
|
||||
Reference in New Issue
Block a user