fix(cli): Fix word navigation for CJK characters (#14475)

This commit is contained in:
Sandy Tao
2025-12-04 08:16:11 +08:00
committed by GitHub
parent 3da4fd5f7d
commit 518e73ac9f
2 changed files with 150 additions and 32 deletions

View File

@@ -2241,4 +2241,103 @@ describe('Unicode helper functions', () => {
expect(cpLen('hello مرحبا world')).toBe(17);
});
});
describe('useTextBuffer CJK Navigation', () => {
const viewport = { width: 80, height: 24 };
it('should navigate by word in Chinese', () => {
const { result } = renderHook(() =>
useTextBuffer({
initialText: '你好世界',
initialCursorOffset: 4, // End of string
viewport,
isValidPath: () => false,
}),
);
// Initial state: cursor at end (index 2 in code points if 4 is length? wait. length is 2 code points? No. '你好世界' length is 4.)
// '你好世界' length is 4. Code points length is 4.
// Move word left
act(() => {
result.current.move('wordLeft');
});
// Should be at start of "世界" (index 2)
// "你好世界" -> "你好" | "世界"
expect(result.current.cursor[1]).toBe(2);
// Move word left again
act(() => {
result.current.move('wordLeft');
});
// Should be at start of "你好" (index 0)
expect(result.current.cursor[1]).toBe(0);
// Move word left again (should stay at 0)
act(() => {
result.current.move('wordLeft');
});
expect(result.current.cursor[1]).toBe(0);
// Move word right
act(() => {
result.current.move('wordRight');
});
// Should be at end of "你好" (index 2)
expect(result.current.cursor[1]).toBe(2);
// Move word right again
act(() => {
result.current.move('wordRight');
});
// Should be at end of "世界" (index 4)
expect(result.current.cursor[1]).toBe(4);
// Move word right again (should stay at end)
act(() => {
result.current.move('wordRight');
});
expect(result.current.cursor[1]).toBe(4);
});
it('should navigate mixed English and Chinese', () => {
const { result } = renderHook(() =>
useTextBuffer({
initialText: 'Hello你好World',
initialCursorOffset: 10, // End
viewport,
isValidPath: () => false,
}),
);
// Hello (5) + 你好 (2) + World (5) = 12 chars.
// initialCursorOffset 10? 'Hello你好World'.length is 12.
// Let's set it to end.
act(() => {
result.current.move('end');
});
expect(result.current.cursor[1]).toBe(12);
// wordLeft -> start of "World" (index 7)
act(() => result.current.move('wordLeft'));
expect(result.current.cursor[1]).toBe(7);
// wordLeft -> start of "你好" (index 5)
act(() => result.current.move('wordLeft'));
expect(result.current.cursor[1]).toBe(5);
// wordLeft -> start of "Hello" (index 0)
act(() => result.current.move('wordLeft'));
expect(result.current.cursor[1]).toBe(0);
// wordLeft -> start of line (should stay at 0)
act(() => result.current.move('wordLeft'));
expect(result.current.cursor[1]).toBe(0);
});
});
});

View File

@@ -32,14 +32,6 @@ export type Direction =
| 'home'
| 'end';
// Simple helper for wordwise ops.
function isWordChar(ch: string | undefined): boolean {
if (ch === undefined) {
return false;
}
return !/[\s,.;!?]/.test(ch);
}
// Helper functions for line-based word navigation
export const isWordCharStrict = (char: string): boolean =>
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
return null;
};
// Initialize segmenter for word boundary detection
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
function findPrevWordBoundary(line: string, cursorCol: number): number {
const codePoints = toCodePoints(line);
// Convert cursorCol (CP index) to string index
const prefix = codePoints.slice(0, cursorCol).join('');
const cursorIdx = prefix.length;
let targetIdx = 0;
for (const seg of segmenter.segment(line)) {
// We want the last word start strictly before the cursor.
// If we've reached or passed the cursor, we stop.
if (seg.index >= cursorIdx) break;
if (seg.isWordLike) {
targetIdx = seg.index;
}
}
return toCodePoints(line.slice(0, targetIdx)).length;
}
function findNextWordBoundary(line: string, cursorCol: number): number {
const codePoints = toCodePoints(line);
const prefix = codePoints.slice(0, cursorCol).join('');
const cursorIdx = prefix.length;
let targetIdx = line.length;
for (const seg of segmenter.segment(line)) {
const segEnd = seg.index + seg.segment.length;
if (segEnd > cursorIdx) {
if (seg.isWordLike) {
targetIdx = segEnd;
break;
}
}
}
return toCodePoints(line.slice(0, targetIdx)).length;
}
// Find next word across lines
export const findNextWordAcrossLines = (
lines: string[],
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
newCursorCol = cpLen(lines[newCursorRow] ?? '');
} else {
const lineContent = lines[cursorRow];
const arr = toCodePoints(lineContent);
let start = cursorCol;
let onlySpaces = true;
for (let i = 0; i < start; i++) {
if (isWordChar(arr[i])) {
onlySpaces = false;
break;
}
}
if (onlySpaces && start > 0) {
start--;
} else {
while (start > 0 && !isWordChar(arr[start - 1])) start--;
while (start > 0 && isWordChar(arr[start - 1])) start--;
}
newCursorCol = start;
newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
}
return {
...state,
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
};
}
case 'wordRight': {
const lineContent = lines[cursorRow] ?? '';
if (
cursorRow === lines.length - 1 &&
cursorCol === cpLen(lines[cursorRow] ?? '')
cursorCol === cpLen(lineContent)
) {
return state;
}
let newCursorRow = cursorRow;
let newCursorCol = cursorCol;
const lineContent = lines[cursorRow] ?? '';
const arr = toCodePoints(lineContent);
const lineLen = cpLen(lineContent);
if (cursorCol >= arr.length) {
if (cursorCol >= lineLen) {
newCursorRow++;
newCursorCol = 0;
} else {
let end = cursorCol;
while (end < arr.length && !isWordChar(arr[end])) end++;
while (end < arr.length && isWordChar(arr[end])) end++;
newCursorCol = end;
newCursorCol = findNextWordBoundary(lineContent, cursorCol);
}
return {
...state,