diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 3bf48259fe..d52897abed 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -348,7 +348,7 @@ describe('InputPrompt', () => {
visualToLogicalMap: [[0, 0]],
visualToTransformedMap: [0],
transformationsByLine: [],
- getOffset: vi.fn().mockReturnValue(0),
+ getOffset: vi.fn().mockImplementation(() => mockBuffer.cursor[1]),
pastedContent: {},
} as unknown as TextBuffer;
@@ -5114,17 +5114,15 @@ describe('InputPrompt', () => {
);
});
await waitFor(() => {
- expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 'end');
+ expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 13);
});
- // Emit turnComplete (Gemini Live starts over after this)
+ // turnComplete advances the baseline; next turn appends after it
await act(async () => {
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
'turnComplete',
);
});
-
- // Emit second part (Gemini Live sends new turn text starting from empty)
await act(async () => {
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
'transcription',
@@ -5132,10 +5130,9 @@ describe('InputPrompt', () => {
);
});
await waitFor(() => {
- // Should have appended 'world' to the baseline 'initial hello'
expect(mockBuffer.setText).toHaveBeenCalledWith(
'initial hello world',
- 'end',
+ 19,
);
});
@@ -5172,13 +5169,48 @@ describe('InputPrompt', () => {
await waitFor(() => {
expect(mockBuffer.setText).toHaveBeenCalledWith(
'First turn. Second turn.',
- 'end',
+ 24,
);
});
unmount();
});
+ it('should insert transcription at cursor position when buffer has text before and after (toggle)', async () => {
+ await act(async () => {
+ mockBuffer.setText('hello world');
+ mockBuffer.cursor = [0, 5]; // cursor after 'hello'
+ });
+ const { stdin, unmount } = await renderWithProviders(
+ ,
+ {
+ uiState: { isVoiceModeEnabled: true } as UIState,
+ settings: createMockSettings({
+ experimental: { voice: { activationMode: 'toggle' } },
+ }),
+ },
+ );
+
+ await act(async () => {
+ stdin.write(' ');
+ });
+ await act(async () => {
+ (fakeTranscriptionProvider as unknown as EventEmitter).emit(
+ 'transcription',
+ 'there',
+ );
+ });
+
+ // 'hello'(5) + ' '(1) + 'there'(5) = cursor at 11; ' world' preserved after
+ await waitFor(() => {
+ expect(mockBuffer.setText).toHaveBeenCalledWith(
+ 'hello there world',
+ 11,
+ );
+ });
+ unmount();
+ });
+
describe('push-to-talk', () => {
beforeEach(() => {
vi.useFakeTimers();
diff --git a/packages/cli/src/ui/hooks/useVoiceMode.ts b/packages/cli/src/ui/hooks/useVoiceMode.ts
index 0f37c66357..e2e61f76d2 100644
--- a/packages/cli/src/ui/hooks/useVoiceMode.ts
+++ b/packages/cli/src/ui/hooks/useVoiceMode.ts
@@ -51,6 +51,7 @@ export function useVoiceMode({
const recorderRef = useRef(null);
const transcriptionServiceRef = useRef(null);
const turnBaselineRef = useRef(null);
+ const turnBaselineCursorOffsetRef = useRef(0);
const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle');
const pttTimerRef = useRef(null);
@@ -112,6 +113,7 @@ export function useVoiceMode({
recordingInProgressRef.current = true;
turnBaselineRef.current = bufferRef.current.text;
+ turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
setIsConnecting(true);
setIsRecording(true);
@@ -193,29 +195,23 @@ export function useVoiceMode({
}
if (text) {
- const currentBufferText = bufferRef.current.text;
- const previousTranscription = liveTranscriptionRef.current;
+ const baseline = turnBaselineRef.current ?? '';
+ const insertOffset = turnBaselineCursorOffsetRef.current;
+ const textBefore = baseline.slice(0, insertOffset);
+ const textAfter = baseline.slice(insertOffset);
- let newTotalText = currentBufferText;
+ const prefix =
+ textBefore.length > 0 && !/\s$/.test(textBefore)
+ ? textBefore + ' '
+ : textBefore;
- if (
- previousTranscription &&
- currentBufferText.endsWith(previousTranscription)
- ) {
- newTotalText = currentBufferText.slice(
- 0,
- -previousTranscription.length,
- );
- } else if (
- currentBufferText &&
- !currentBufferText.endsWith(' ') &&
- !currentBufferText.endsWith('\n')
- ) {
- newTotalText += ' ';
- }
+ const suffix =
+ text.length > 0 && textAfter.length > 0 && !/^\s/.test(textAfter)
+ ? ' '
+ : '';
- newTotalText += text;
- bufferRef.current.setText(newTotalText, 'end');
+ const newTotalText = prefix + text + suffix + textAfter;
+ bufferRef.current.setText(newTotalText, prefix.length + text.length);
}
liveTranscriptionRef.current = text;
});
@@ -226,6 +222,9 @@ export function useVoiceMode({
stopRequestedRef.current
)
return;
+ // Advance the baseline so subsequent turns append after this turn's text
+ turnBaselineRef.current = bufferRef.current.text;
+ turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
liveTranscriptionRef.current = '';
});