mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-16 00:51:25 -07:00
fix(cli): Fix word navigation for CJK characters (#14475)
This commit is contained in:
@@ -2241,4 +2241,103 @@ describe('Unicode helper functions', () => {
|
||||
expect(cpLen('hello مرحبا world')).toBe(17);
|
||||
});
|
||||
});
|
||||
|
||||
describe('useTextBuffer CJK Navigation', () => {
|
||||
const viewport = { width: 80, height: 24 };
|
||||
|
||||
it('should navigate by word in Chinese', () => {
|
||||
const { result } = renderHook(() =>
|
||||
useTextBuffer({
|
||||
initialText: '你好世界',
|
||||
initialCursorOffset: 4, // End of string
|
||||
viewport,
|
||||
isValidPath: () => false,
|
||||
}),
|
||||
);
|
||||
|
||||
// Initial state: cursor at end (index 2 in code points if 4 is length? wait. length is 2 code points? No. '你好世界' length is 4.)
|
||||
// '你好世界' length is 4. Code points length is 4.
|
||||
|
||||
// Move word left
|
||||
act(() => {
|
||||
result.current.move('wordLeft');
|
||||
});
|
||||
|
||||
// Should be at start of "世界" (index 2)
|
||||
// "你好世界" -> "你好" | "世界"
|
||||
expect(result.current.cursor[1]).toBe(2);
|
||||
|
||||
// Move word left again
|
||||
act(() => {
|
||||
result.current.move('wordLeft');
|
||||
});
|
||||
|
||||
// Should be at start of "你好" (index 0)
|
||||
expect(result.current.cursor[1]).toBe(0);
|
||||
|
||||
// Move word left again (should stay at 0)
|
||||
act(() => {
|
||||
result.current.move('wordLeft');
|
||||
});
|
||||
expect(result.current.cursor[1]).toBe(0);
|
||||
|
||||
// Move word right
|
||||
act(() => {
|
||||
result.current.move('wordRight');
|
||||
});
|
||||
|
||||
// Should be at end of "你好" (index 2)
|
||||
expect(result.current.cursor[1]).toBe(2);
|
||||
|
||||
// Move word right again
|
||||
act(() => {
|
||||
result.current.move('wordRight');
|
||||
});
|
||||
|
||||
// Should be at end of "世界" (index 4)
|
||||
expect(result.current.cursor[1]).toBe(4);
|
||||
|
||||
// Move word right again (should stay at end)
|
||||
act(() => {
|
||||
result.current.move('wordRight');
|
||||
});
|
||||
expect(result.current.cursor[1]).toBe(4);
|
||||
});
|
||||
|
||||
it('should navigate mixed English and Chinese', () => {
|
||||
const { result } = renderHook(() =>
|
||||
useTextBuffer({
|
||||
initialText: 'Hello你好World',
|
||||
initialCursorOffset: 10, // End
|
||||
viewport,
|
||||
isValidPath: () => false,
|
||||
}),
|
||||
);
|
||||
|
||||
// Hello (5) + 你好 (2) + World (5) = 12 chars.
|
||||
// initialCursorOffset 10? 'Hello你好World'.length is 12.
|
||||
// Let's set it to end.
|
||||
|
||||
act(() => {
|
||||
result.current.move('end');
|
||||
});
|
||||
expect(result.current.cursor[1]).toBe(12);
|
||||
|
||||
// wordLeft -> start of "World" (index 7)
|
||||
act(() => result.current.move('wordLeft'));
|
||||
expect(result.current.cursor[1]).toBe(7);
|
||||
|
||||
// wordLeft -> start of "你好" (index 5)
|
||||
act(() => result.current.move('wordLeft'));
|
||||
expect(result.current.cursor[1]).toBe(5);
|
||||
|
||||
// wordLeft -> start of "Hello" (index 0)
|
||||
act(() => result.current.move('wordLeft'));
|
||||
expect(result.current.cursor[1]).toBe(0);
|
||||
|
||||
// wordLeft -> start of line (should stay at 0)
|
||||
act(() => result.current.move('wordLeft'));
|
||||
expect(result.current.cursor[1]).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -32,14 +32,6 @@ export type Direction =
|
||||
| 'home'
|
||||
| 'end';
|
||||
|
||||
// Simple helper for word‑wise ops.
|
||||
function isWordChar(ch: string | undefined): boolean {
|
||||
if (ch === undefined) {
|
||||
return false;
|
||||
}
|
||||
return !/[\s,.;!?]/.test(ch);
|
||||
}
|
||||
|
||||
// Helper functions for line-based word navigation
|
||||
export const isWordCharStrict = (char: string): boolean =>
|
||||
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
|
||||
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
|
||||
return null;
|
||||
};
|
||||
|
||||
// Initialize segmenter for word boundary detection
|
||||
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
|
||||
|
||||
function findPrevWordBoundary(line: string, cursorCol: number): number {
|
||||
const codePoints = toCodePoints(line);
|
||||
// Convert cursorCol (CP index) to string index
|
||||
const prefix = codePoints.slice(0, cursorCol).join('');
|
||||
const cursorIdx = prefix.length;
|
||||
|
||||
let targetIdx = 0;
|
||||
|
||||
for (const seg of segmenter.segment(line)) {
|
||||
// We want the last word start strictly before the cursor.
|
||||
// If we've reached or passed the cursor, we stop.
|
||||
if (seg.index >= cursorIdx) break;
|
||||
|
||||
if (seg.isWordLike) {
|
||||
targetIdx = seg.index;
|
||||
}
|
||||
}
|
||||
|
||||
return toCodePoints(line.slice(0, targetIdx)).length;
|
||||
}
|
||||
|
||||
function findNextWordBoundary(line: string, cursorCol: number): number {
|
||||
const codePoints = toCodePoints(line);
|
||||
const prefix = codePoints.slice(0, cursorCol).join('');
|
||||
const cursorIdx = prefix.length;
|
||||
|
||||
let targetIdx = line.length;
|
||||
|
||||
for (const seg of segmenter.segment(line)) {
|
||||
const segEnd = seg.index + seg.segment.length;
|
||||
|
||||
if (segEnd > cursorIdx) {
|
||||
if (seg.isWordLike) {
|
||||
targetIdx = segEnd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return toCodePoints(line.slice(0, targetIdx)).length;
|
||||
}
|
||||
|
||||
// Find next word across lines
|
||||
export const findNextWordAcrossLines = (
|
||||
lines: string[],
|
||||
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
|
||||
newCursorCol = cpLen(lines[newCursorRow] ?? '');
|
||||
} else {
|
||||
const lineContent = lines[cursorRow];
|
||||
const arr = toCodePoints(lineContent);
|
||||
let start = cursorCol;
|
||||
let onlySpaces = true;
|
||||
for (let i = 0; i < start; i++) {
|
||||
if (isWordChar(arr[i])) {
|
||||
onlySpaces = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (onlySpaces && start > 0) {
|
||||
start--;
|
||||
} else {
|
||||
while (start > 0 && !isWordChar(arr[start - 1])) start--;
|
||||
while (start > 0 && isWordChar(arr[start - 1])) start--;
|
||||
}
|
||||
newCursorCol = start;
|
||||
newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
|
||||
}
|
||||
return {
|
||||
...state,
|
||||
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
|
||||
};
|
||||
}
|
||||
case 'wordRight': {
|
||||
const lineContent = lines[cursorRow] ?? '';
|
||||
if (
|
||||
cursorRow === lines.length - 1 &&
|
||||
cursorCol === cpLen(lines[cursorRow] ?? '')
|
||||
cursorCol === cpLen(lineContent)
|
||||
) {
|
||||
return state;
|
||||
}
|
||||
|
||||
let newCursorRow = cursorRow;
|
||||
let newCursorCol = cursorCol;
|
||||
const lineContent = lines[cursorRow] ?? '';
|
||||
const arr = toCodePoints(lineContent);
|
||||
const lineLen = cpLen(lineContent);
|
||||
|
||||
if (cursorCol >= arr.length) {
|
||||
if (cursorCol >= lineLen) {
|
||||
newCursorRow++;
|
||||
newCursorCol = 0;
|
||||
} else {
|
||||
let end = cursorCol;
|
||||
while (end < arr.length && !isWordChar(arr[end])) end++;
|
||||
while (end < arr.length && isWordChar(arr[end])) end++;
|
||||
newCursorCol = end;
|
||||
newCursorCol = findNextWordBoundary(lineContent, cursorCol);
|
||||
}
|
||||
return {
|
||||
...state,
|
||||
|
||||
Reference in New Issue
Block a user