diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx index 3bf48259fe..d52897abed 100644 --- a/packages/cli/src/ui/components/InputPrompt.test.tsx +++ b/packages/cli/src/ui/components/InputPrompt.test.tsx @@ -348,7 +348,7 @@ describe('InputPrompt', () => { visualToLogicalMap: [[0, 0]], visualToTransformedMap: [0], transformationsByLine: [], - getOffset: vi.fn().mockReturnValue(0), + getOffset: vi.fn().mockImplementation(() => mockBuffer.cursor[1]), pastedContent: {}, } as unknown as TextBuffer; @@ -5114,17 +5114,15 @@ describe('InputPrompt', () => { ); }); await waitFor(() => { - expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 'end'); + expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 13); }); - // Emit turnComplete (Gemini Live starts over after this) + // turnComplete advances the baseline; next turn appends after it await act(async () => { (fakeTranscriptionProvider as unknown as EventEmitter).emit( 'turnComplete', ); }); - - // Emit second part (Gemini Live sends new turn text starting from empty) await act(async () => { (fakeTranscriptionProvider as unknown as EventEmitter).emit( 'transcription', @@ -5132,10 +5130,9 @@ describe('InputPrompt', () => { ); }); await waitFor(() => { - // Should have appended 'world' to the baseline 'initial hello' expect(mockBuffer.setText).toHaveBeenCalledWith( 'initial hello world', - 'end', + 19, ); }); @@ -5172,13 +5169,48 @@ describe('InputPrompt', () => { await waitFor(() => { expect(mockBuffer.setText).toHaveBeenCalledWith( 'First turn. Second turn.', - 'end', + 24, ); }); unmount(); }); + it('should insert transcription at cursor position when buffer has text before and after (toggle)', async () => { + await act(async () => { + mockBuffer.setText('hello world'); + mockBuffer.cursor = [0, 5]; // cursor after 'hello' + }); + const { stdin, unmount } = await renderWithProviders( + , + { + uiState: { isVoiceModeEnabled: true } as UIState, + settings: createMockSettings({ + experimental: { voice: { activationMode: 'toggle' } }, + }), + }, + ); + + await act(async () => { + stdin.write(' '); + }); + await act(async () => { + (fakeTranscriptionProvider as unknown as EventEmitter).emit( + 'transcription', + 'there', + ); + }); + + // 'hello'(5) + ' '(1) + 'there'(5) = cursor at 11; ' world' preserved after + await waitFor(() => { + expect(mockBuffer.setText).toHaveBeenCalledWith( + 'hello there world', + 11, + ); + }); + unmount(); + }); + describe('push-to-talk', () => { beforeEach(() => { vi.useFakeTimers(); diff --git a/packages/cli/src/ui/hooks/useVoiceMode.ts b/packages/cli/src/ui/hooks/useVoiceMode.ts index 0f37c66357..e2e61f76d2 100644 --- a/packages/cli/src/ui/hooks/useVoiceMode.ts +++ b/packages/cli/src/ui/hooks/useVoiceMode.ts @@ -51,6 +51,7 @@ export function useVoiceMode({ const recorderRef = useRef(null); const transcriptionServiceRef = useRef(null); const turnBaselineRef = useRef(null); + const turnBaselineCursorOffsetRef = useRef(0); const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle'); const pttTimerRef = useRef(null); @@ -112,6 +113,7 @@ export function useVoiceMode({ recordingInProgressRef.current = true; turnBaselineRef.current = bufferRef.current.text; + turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset(); setIsConnecting(true); setIsRecording(true); @@ -193,29 +195,23 @@ export function useVoiceMode({ } if (text) { - const currentBufferText = bufferRef.current.text; - const previousTranscription = liveTranscriptionRef.current; + const baseline = turnBaselineRef.current ?? ''; + const insertOffset = turnBaselineCursorOffsetRef.current; + const textBefore = baseline.slice(0, insertOffset); + const textAfter = baseline.slice(insertOffset); - let newTotalText = currentBufferText; + const prefix = + textBefore.length > 0 && !/\s$/.test(textBefore) + ? textBefore + ' ' + : textBefore; - if ( - previousTranscription && - currentBufferText.endsWith(previousTranscription) - ) { - newTotalText = currentBufferText.slice( - 0, - -previousTranscription.length, - ); - } else if ( - currentBufferText && - !currentBufferText.endsWith(' ') && - !currentBufferText.endsWith('\n') - ) { - newTotalText += ' '; - } + const suffix = + text.length > 0 && textAfter.length > 0 && !/^\s/.test(textAfter) + ? ' ' + : ''; - newTotalText += text; - bufferRef.current.setText(newTotalText, 'end'); + const newTotalText = prefix + text + suffix + textAfter; + bufferRef.current.setText(newTotalText, prefix.length + text.length); } liveTranscriptionRef.current = text; }); @@ -226,6 +222,9 @@ export function useVoiceMode({ stopRequestedRef.current ) return; + // Advance the baseline so subsequent turns append after this turn's text + turnBaselineRef.current = bufferRef.current.text; + turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset(); liveTranscriptionRef.current = ''; });