mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
fix(cli): Fix word navigation for CJK characters (#14475)
This commit is contained in:
@@ -2241,4 +2241,103 @@ describe('Unicode helper functions', () => {
|
|||||||
expect(cpLen('hello مرحبا world')).toBe(17);
|
expect(cpLen('hello مرحبا world')).toBe(17);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('useTextBuffer CJK Navigation', () => {
|
||||||
|
const viewport = { width: 80, height: 24 };
|
||||||
|
|
||||||
|
it('should navigate by word in Chinese', () => {
|
||||||
|
const { result } = renderHook(() =>
|
||||||
|
useTextBuffer({
|
||||||
|
initialText: '你好世界',
|
||||||
|
initialCursorOffset: 4, // End of string
|
||||||
|
viewport,
|
||||||
|
isValidPath: () => false,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Initial state: cursor at end (index 2 in code points if 4 is length? wait. length is 2 code points? No. '你好世界' length is 4.)
|
||||||
|
// '你好世界' length is 4. Code points length is 4.
|
||||||
|
|
||||||
|
// Move word left
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordLeft');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should be at start of "世界" (index 2)
|
||||||
|
// "你好世界" -> "你好" | "世界"
|
||||||
|
expect(result.current.cursor[1]).toBe(2);
|
||||||
|
|
||||||
|
// Move word left again
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordLeft');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should be at start of "你好" (index 0)
|
||||||
|
expect(result.current.cursor[1]).toBe(0);
|
||||||
|
|
||||||
|
// Move word left again (should stay at 0)
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordLeft');
|
||||||
|
});
|
||||||
|
expect(result.current.cursor[1]).toBe(0);
|
||||||
|
|
||||||
|
// Move word right
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordRight');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should be at end of "你好" (index 2)
|
||||||
|
expect(result.current.cursor[1]).toBe(2);
|
||||||
|
|
||||||
|
// Move word right again
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordRight');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should be at end of "世界" (index 4)
|
||||||
|
expect(result.current.cursor[1]).toBe(4);
|
||||||
|
|
||||||
|
// Move word right again (should stay at end)
|
||||||
|
act(() => {
|
||||||
|
result.current.move('wordRight');
|
||||||
|
});
|
||||||
|
expect(result.current.cursor[1]).toBe(4);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should navigate mixed English and Chinese', () => {
|
||||||
|
const { result } = renderHook(() =>
|
||||||
|
useTextBuffer({
|
||||||
|
initialText: 'Hello你好World',
|
||||||
|
initialCursorOffset: 10, // End
|
||||||
|
viewport,
|
||||||
|
isValidPath: () => false,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Hello (5) + 你好 (2) + World (5) = 12 chars.
|
||||||
|
// initialCursorOffset 10? 'Hello你好World'.length is 12.
|
||||||
|
// Let's set it to end.
|
||||||
|
|
||||||
|
act(() => {
|
||||||
|
result.current.move('end');
|
||||||
|
});
|
||||||
|
expect(result.current.cursor[1]).toBe(12);
|
||||||
|
|
||||||
|
// wordLeft -> start of "World" (index 7)
|
||||||
|
act(() => result.current.move('wordLeft'));
|
||||||
|
expect(result.current.cursor[1]).toBe(7);
|
||||||
|
|
||||||
|
// wordLeft -> start of "你好" (index 5)
|
||||||
|
act(() => result.current.move('wordLeft'));
|
||||||
|
expect(result.current.cursor[1]).toBe(5);
|
||||||
|
|
||||||
|
// wordLeft -> start of "Hello" (index 0)
|
||||||
|
act(() => result.current.move('wordLeft'));
|
||||||
|
expect(result.current.cursor[1]).toBe(0);
|
||||||
|
|
||||||
|
// wordLeft -> start of line (should stay at 0)
|
||||||
|
act(() => result.current.move('wordLeft'));
|
||||||
|
expect(result.current.cursor[1]).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -32,14 +32,6 @@ export type Direction =
|
|||||||
| 'home'
|
| 'home'
|
||||||
| 'end';
|
| 'end';
|
||||||
|
|
||||||
// Simple helper for word‑wise ops.
|
|
||||||
function isWordChar(ch: string | undefined): boolean {
|
|
||||||
if (ch === undefined) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return !/[\s,.;!?]/.test(ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper functions for line-based word navigation
|
// Helper functions for line-based word navigation
|
||||||
export const isWordCharStrict = (char: string): boolean =>
|
export const isWordCharStrict = (char: string): boolean =>
|
||||||
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
|
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
|
||||||
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Initialize segmenter for word boundary detection
|
||||||
|
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
|
||||||
|
|
||||||
|
function findPrevWordBoundary(line: string, cursorCol: number): number {
|
||||||
|
const codePoints = toCodePoints(line);
|
||||||
|
// Convert cursorCol (CP index) to string index
|
||||||
|
const prefix = codePoints.slice(0, cursorCol).join('');
|
||||||
|
const cursorIdx = prefix.length;
|
||||||
|
|
||||||
|
let targetIdx = 0;
|
||||||
|
|
||||||
|
for (const seg of segmenter.segment(line)) {
|
||||||
|
// We want the last word start strictly before the cursor.
|
||||||
|
// If we've reached or passed the cursor, we stop.
|
||||||
|
if (seg.index >= cursorIdx) break;
|
||||||
|
|
||||||
|
if (seg.isWordLike) {
|
||||||
|
targetIdx = seg.index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return toCodePoints(line.slice(0, targetIdx)).length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function findNextWordBoundary(line: string, cursorCol: number): number {
|
||||||
|
const codePoints = toCodePoints(line);
|
||||||
|
const prefix = codePoints.slice(0, cursorCol).join('');
|
||||||
|
const cursorIdx = prefix.length;
|
||||||
|
|
||||||
|
let targetIdx = line.length;
|
||||||
|
|
||||||
|
for (const seg of segmenter.segment(line)) {
|
||||||
|
const segEnd = seg.index + seg.segment.length;
|
||||||
|
|
||||||
|
if (segEnd > cursorIdx) {
|
||||||
|
if (seg.isWordLike) {
|
||||||
|
targetIdx = segEnd;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return toCodePoints(line.slice(0, targetIdx)).length;
|
||||||
|
}
|
||||||
|
|
||||||
// Find next word across lines
|
// Find next word across lines
|
||||||
export const findNextWordAcrossLines = (
|
export const findNextWordAcrossLines = (
|
||||||
lines: string[],
|
lines: string[],
|
||||||
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
|
|||||||
newCursorCol = cpLen(lines[newCursorRow] ?? '');
|
newCursorCol = cpLen(lines[newCursorRow] ?? '');
|
||||||
} else {
|
} else {
|
||||||
const lineContent = lines[cursorRow];
|
const lineContent = lines[cursorRow];
|
||||||
const arr = toCodePoints(lineContent);
|
newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
|
||||||
let start = cursorCol;
|
|
||||||
let onlySpaces = true;
|
|
||||||
for (let i = 0; i < start; i++) {
|
|
||||||
if (isWordChar(arr[i])) {
|
|
||||||
onlySpaces = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (onlySpaces && start > 0) {
|
|
||||||
start--;
|
|
||||||
} else {
|
|
||||||
while (start > 0 && !isWordChar(arr[start - 1])) start--;
|
|
||||||
while (start > 0 && isWordChar(arr[start - 1])) start--;
|
|
||||||
}
|
|
||||||
newCursorCol = start;
|
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
...state,
|
...state,
|
||||||
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
case 'wordRight': {
|
case 'wordRight': {
|
||||||
|
const lineContent = lines[cursorRow] ?? '';
|
||||||
if (
|
if (
|
||||||
cursorRow === lines.length - 1 &&
|
cursorRow === lines.length - 1 &&
|
||||||
cursorCol === cpLen(lines[cursorRow] ?? '')
|
cursorCol === cpLen(lineContent)
|
||||||
) {
|
) {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
let newCursorRow = cursorRow;
|
let newCursorRow = cursorRow;
|
||||||
let newCursorCol = cursorCol;
|
let newCursorCol = cursorCol;
|
||||||
const lineContent = lines[cursorRow] ?? '';
|
const lineLen = cpLen(lineContent);
|
||||||
const arr = toCodePoints(lineContent);
|
|
||||||
|
|
||||||
if (cursorCol >= arr.length) {
|
if (cursorCol >= lineLen) {
|
||||||
newCursorRow++;
|
newCursorRow++;
|
||||||
newCursorCol = 0;
|
newCursorCol = 0;
|
||||||
} else {
|
} else {
|
||||||
let end = cursorCol;
|
newCursorCol = findNextWordBoundary(lineContent, cursorCol);
|
||||||
while (end < arr.length && !isWordChar(arr[end])) end++;
|
|
||||||
while (end < arr.length && isWordChar(arr[end])) end++;
|
|
||||||
newCursorCol = end;
|
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
...state,
|
...state,
|
||||||
|
|||||||
Reference in New Issue
Block a user