mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-14 22:02:59 -07:00
fix(cli): insert voice transcription at cursor position instead of ap… (#26287)
Co-authored-by: Zheyuan <zlin252@emory.edu>
This commit is contained in:
@@ -348,7 +348,7 @@ describe('InputPrompt', () => {
|
|||||||
visualToLogicalMap: [[0, 0]],
|
visualToLogicalMap: [[0, 0]],
|
||||||
visualToTransformedMap: [0],
|
visualToTransformedMap: [0],
|
||||||
transformationsByLine: [],
|
transformationsByLine: [],
|
||||||
getOffset: vi.fn().mockReturnValue(0),
|
getOffset: vi.fn().mockImplementation(() => mockBuffer.cursor[1]),
|
||||||
pastedContent: {},
|
pastedContent: {},
|
||||||
} as unknown as TextBuffer;
|
} as unknown as TextBuffer;
|
||||||
|
|
||||||
@@ -5114,17 +5114,15 @@ describe('InputPrompt', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 'end');
|
expect(mockBuffer.setText).toHaveBeenCalledWith('initial hello', 13);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Emit turnComplete (Gemini Live starts over after this)
|
// turnComplete advances the baseline; next turn appends after it
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
|
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
|
||||||
'turnComplete',
|
'turnComplete',
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Emit second part (Gemini Live sends new turn text starting from empty)
|
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
|
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
|
||||||
'transcription',
|
'transcription',
|
||||||
@@ -5132,10 +5130,9 @@ describe('InputPrompt', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
// Should have appended 'world' to the baseline 'initial hello'
|
|
||||||
expect(mockBuffer.setText).toHaveBeenCalledWith(
|
expect(mockBuffer.setText).toHaveBeenCalledWith(
|
||||||
'initial hello world',
|
'initial hello world',
|
||||||
'end',
|
19,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -5172,13 +5169,48 @@ describe('InputPrompt', () => {
|
|||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockBuffer.setText).toHaveBeenCalledWith(
|
expect(mockBuffer.setText).toHaveBeenCalledWith(
|
||||||
'First turn. Second turn.',
|
'First turn. Second turn.',
|
||||||
'end',
|
24,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
unmount();
|
unmount();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should insert transcription at cursor position when buffer has text before and after (toggle)', async () => {
|
||||||
|
await act(async () => {
|
||||||
|
mockBuffer.setText('hello world');
|
||||||
|
mockBuffer.cursor = [0, 5]; // cursor after 'hello'
|
||||||
|
});
|
||||||
|
const { stdin, unmount } = await renderWithProviders(
|
||||||
|
<TestInputPrompt {...props} focus={true} buffer={mockBuffer} />,
|
||||||
|
{
|
||||||
|
uiState: { isVoiceModeEnabled: true } as UIState,
|
||||||
|
settings: createMockSettings({
|
||||||
|
experimental: { voice: { activationMode: 'toggle' } },
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await act(async () => {
|
||||||
|
stdin.write(' ');
|
||||||
|
});
|
||||||
|
await act(async () => {
|
||||||
|
(fakeTranscriptionProvider as unknown as EventEmitter).emit(
|
||||||
|
'transcription',
|
||||||
|
'there',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// 'hello'(5) + ' '(1) + 'there'(5) = cursor at 11; ' world' preserved after
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(mockBuffer.setText).toHaveBeenCalledWith(
|
||||||
|
'hello there world',
|
||||||
|
11,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
unmount();
|
||||||
|
});
|
||||||
|
|
||||||
describe('push-to-talk', () => {
|
describe('push-to-talk', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ export function useVoiceMode({
|
|||||||
const recorderRef = useRef<AudioRecorder | null>(null);
|
const recorderRef = useRef<AudioRecorder | null>(null);
|
||||||
const transcriptionServiceRef = useRef<TranscriptionProvider | null>(null);
|
const transcriptionServiceRef = useRef<TranscriptionProvider | null>(null);
|
||||||
const turnBaselineRef = useRef<string | null>(null);
|
const turnBaselineRef = useRef<string | null>(null);
|
||||||
|
const turnBaselineCursorOffsetRef = useRef<number>(0);
|
||||||
|
|
||||||
const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle');
|
const pttStateRef = useRef<'idle' | 'possible-hold' | 'recording'>('idle');
|
||||||
const pttTimerRef = useRef<NodeJS.Timeout | null>(null);
|
const pttTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
@@ -112,6 +113,7 @@ export function useVoiceMode({
|
|||||||
|
|
||||||
recordingInProgressRef.current = true;
|
recordingInProgressRef.current = true;
|
||||||
turnBaselineRef.current = bufferRef.current.text;
|
turnBaselineRef.current = bufferRef.current.text;
|
||||||
|
turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
|
||||||
|
|
||||||
setIsConnecting(true);
|
setIsConnecting(true);
|
||||||
setIsRecording(true);
|
setIsRecording(true);
|
||||||
@@ -193,29 +195,23 @@ export function useVoiceMode({
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (text) {
|
if (text) {
|
||||||
const currentBufferText = bufferRef.current.text;
|
const baseline = turnBaselineRef.current ?? '';
|
||||||
const previousTranscription = liveTranscriptionRef.current;
|
const insertOffset = turnBaselineCursorOffsetRef.current;
|
||||||
|
const textBefore = baseline.slice(0, insertOffset);
|
||||||
|
const textAfter = baseline.slice(insertOffset);
|
||||||
|
|
||||||
let newTotalText = currentBufferText;
|
const prefix =
|
||||||
|
textBefore.length > 0 && !/\s$/.test(textBefore)
|
||||||
|
? textBefore + ' '
|
||||||
|
: textBefore;
|
||||||
|
|
||||||
if (
|
const suffix =
|
||||||
previousTranscription &&
|
text.length > 0 && textAfter.length > 0 && !/^\s/.test(textAfter)
|
||||||
currentBufferText.endsWith(previousTranscription)
|
? ' '
|
||||||
) {
|
: '';
|
||||||
newTotalText = currentBufferText.slice(
|
|
||||||
0,
|
|
||||||
-previousTranscription.length,
|
|
||||||
);
|
|
||||||
} else if (
|
|
||||||
currentBufferText &&
|
|
||||||
!currentBufferText.endsWith(' ') &&
|
|
||||||
!currentBufferText.endsWith('\n')
|
|
||||||
) {
|
|
||||||
newTotalText += ' ';
|
|
||||||
}
|
|
||||||
|
|
||||||
newTotalText += text;
|
const newTotalText = prefix + text + suffix + textAfter;
|
||||||
bufferRef.current.setText(newTotalText, 'end');
|
bufferRef.current.setText(newTotalText, prefix.length + text.length);
|
||||||
}
|
}
|
||||||
liveTranscriptionRef.current = text;
|
liveTranscriptionRef.current = text;
|
||||||
});
|
});
|
||||||
@@ -226,6 +222,9 @@ export function useVoiceMode({
|
|||||||
stopRequestedRef.current
|
stopRequestedRef.current
|
||||||
)
|
)
|
||||||
return;
|
return;
|
||||||
|
// Advance the baseline so subsequent turns append after this turn's text
|
||||||
|
turnBaselineRef.current = bufferRef.current.text;
|
||||||
|
turnBaselineCursorOffsetRef.current = bufferRef.current.getOffset();
|
||||||
liveTranscriptionRef.current = '';
|
liveTranscriptionRef.current = '';
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user