fix(cli): insert voice transcription at cursor position instead of ap… (#26287)

Co-authored-by: Zheyuan <zlin252@emory.edu>
This commit is contained in:
Zheyuan Lin
2026-05-01 12:41:17 -04:00
committed by GitHub
parent d9f273e440
commit 7213822e84
2 changed files with 59 additions and 28 deletions
@@ -348,7 +348,7 @@ describe('InputPrompt', () => {
visualToLogicalMap: [[0, 0]],
visualToTransformedMap: [0],
transformationsByLine: [],
getOffset: vi.fn().mockReturnValue(0),
getOffset: vi.fn().mockImplementation(() => mockBuffer.cursor[1]),
pastedContent: {},
} as unknown as TextBuffer;
@@ -5114,17 +5114,15 @@ describe('InputPrompt', () => {
);
});
await waitFor(() => {
expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 'end');
expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 13);
});
// Emit turnComplete (Gemini Live starts over after this)
// turnComplete advances the baseline; next turn appends after it
await act(async () => {
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
'turnComplete',
);
});
// Emit second part (Gemini Live sends new turn text starting from empty)
await act(async () => {
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
'transcription',
@@ -5132,10 +5130,9 @@ describe('InputPrompt', () => {
);
});
await waitFor(() => {
// Should have appended 'world' to the baseline 'initial hello'
expect(mockBuffer.setText).toHaveBeenCalledWith(
'initial hello world',
'end',
19,
);
});
@@ -5172,13 +5169,48 @@ describe('InputPrompt', () => {
await waitFor(() => {
expect(mockBuffer.setText).toHaveBeenCalledWith(
'First turn. Second turn.',
'end',
24,
);
});
unmount();
});
it('should insert transcription at cursor position when buffer has text before and after (toggle)', async () => {
await act(async () => {
mockBuffer.setText('hello world');
mockBuffer.cursor = [0, 5]; // cursor after 'hello'
});
const { stdin, unmount } = await renderWithProviders(
<TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
{
uiState: { isVoiceModeEnabled: true } as UIState,
settings: createMockSettings({
experimental: { voice: { activationMode: 'toggle' } },
}),
},
);
await act(async () => {
stdin.write(' ');
});
await act(async () => {
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
'transcription',
'there',
);
});
// 'hello'(5) + ' '(1) + 'there'(5) = cursor at 11; ' world' preserved after
await waitFor(() => {
expect(mockBuffer.setText).toHaveBeenCalledWith(
'hello there world',
11,
);
});
unmount();
});
describe('push-to-talk', () => {
beforeEach(() => {
vi.useFakeTimers();
+19 -20
View File
@@ -51,6 +51,7 @@ export function useVoiceMode({
const recorderRef = useRef<AudioRecorder | null>(null);
const transcriptionServiceRef = useRef<TranscriptionProvider | null>(null);
const turnBaselineRef = useRef<string | null>(null);
const turnBaselineCursorOffsetRef = useRef<number>(0);
const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle');
const pttTimerRef = useRef<NodeJS.Timeout | null>(null);
@@ -112,6 +113,7 @@ export function useVoiceMode({
recordingInProgressRef.current = true;
turnBaselineRef.current = bufferRef.current.text;
turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
setIsConnecting(true);
setIsRecording(true);
@@ -193,29 +195,23 @@ export function useVoiceMode({
}
if (text) {
const currentBufferText = bufferRef.current.text;
const previousTranscription = liveTranscriptionRef.current;
const baseline = turnBaselineRef.current ?? '';
const insertOffset = turnBaselineCursorOffsetRef.current;
const textBefore = baseline.slice(0, insertOffset);
const textAfter = baseline.slice(insertOffset);
let newTotalText = currentBufferText;
const prefix =
textBefore.length > 0 && !/\s$/.test(textBefore)
? textBefore + ' '
: textBefore;
if (
previousTranscription &&
currentBufferText.endsWith(previousTranscription)
) {
newTotalText = currentBufferText.slice(
0,
-previousTranscription.length,
);
} else if (
currentBufferText &&
!currentBufferText.endsWith(' ') &&
!currentBufferText.endsWith('\n')
) {
newTotalText += ' ';
}
const suffix =
text.length > 0 && textAfter.length > 0 && !/^\s/.test(textAfter)
? ' '
: '';
newTotalText += text;
bufferRef.current.setText(newTotalText, 'end');
const newTotalText = prefix + text + suffix + textAfter;
bufferRef.current.setText(newTotalText, prefix.length + text.length);
}
liveTranscriptionRef.current = text;
});
@@ -226,6 +222,9 @@ export function useVoiceMode({
stopRequestedRef.current
)
return;
// Advance the baseline so subsequent turns append after this turn's text
turnBaselineRef.current = bufferRef.current.text;
turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
liveTranscriptionRef.current = '';
});