fix(cli): Fix word navigation for CJK characters (#14475)

This commit is contained in:
Sandy Tao
2025-12-04 08:16:11 +08:00
committed by GitHub
parent 3da4fd5f7d
commit 518e73ac9f
2 changed files with 150 additions and 32 deletions

View File

@@ -32,14 +32,6 @@ export type Direction =
| 'home'
| 'end';
// Simple helper for wordwise ops.
function isWordChar(ch: string | undefined): boolean {
if (ch === undefined) {
return false;
}
return !/[\s,.;!?]/.test(ch);
}
// Helper functions for line-based word navigation
export const isWordCharStrict = (char: string): boolean =>
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
return null;
};
// Initialize segmenter for word boundary detection
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
function findPrevWordBoundary(line: string, cursorCol: number): number {
const codePoints = toCodePoints(line);
// Convert cursorCol (CP index) to string index
const prefix = codePoints.slice(0, cursorCol).join('');
const cursorIdx = prefix.length;
let targetIdx = 0;
for (const seg of segmenter.segment(line)) {
// We want the last word start strictly before the cursor.
// If we've reached or passed the cursor, we stop.
if (seg.index >= cursorIdx) break;
if (seg.isWordLike) {
targetIdx = seg.index;
}
}
return toCodePoints(line.slice(0, targetIdx)).length;
}
function findNextWordBoundary(line: string, cursorCol: number): number {
const codePoints = toCodePoints(line);
const prefix = codePoints.slice(0, cursorCol).join('');
const cursorIdx = prefix.length;
let targetIdx = line.length;
for (const seg of segmenter.segment(line)) {
const segEnd = seg.index + seg.segment.length;
if (segEnd > cursorIdx) {
if (seg.isWordLike) {
targetIdx = segEnd;
break;
}
}
}
return toCodePoints(line.slice(0, targetIdx)).length;
}
// Find next word across lines
export const findNextWordAcrossLines = (
lines: string[],
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
newCursorCol = cpLen(lines[newCursorRow] ?? '');
} else {
const lineContent = lines[cursorRow];
const arr = toCodePoints(lineContent);
let start = cursorCol;
let onlySpaces = true;
for (let i = 0; i < start; i++) {
if (isWordChar(arr[i])) {
onlySpaces = false;
break;
}
}
if (onlySpaces && start > 0) {
start--;
} else {
while (start > 0 && !isWordChar(arr[start - 1])) start--;
while (start > 0 && isWordChar(arr[start - 1])) start--;
}
newCursorCol = start;
newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
}
return {
...state,
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
};
}
case 'wordRight': {
const lineContent = lines[cursorRow] ?? '';
if (
cursorRow === lines.length - 1 &&
cursorCol === cpLen(lines[cursorRow] ?? '')
cursorCol === cpLen(lineContent)
) {
return state;
}
let newCursorRow = cursorRow;
let newCursorCol = cursorCol;
const lineContent = lines[cursorRow] ?? '';
const arr = toCodePoints(lineContent);
const lineLen = cpLen(lineContent);
if (cursorCol >= arr.length) {
if (cursorCol >= lineLen) {
newCursorRow++;
newCursorCol = 0;
} else {
let end = cursorCol;
while (end < arr.length && !isWordChar(arr[end])) end++;
while (end < arr.length && isWordChar(arr[end])) end++;
newCursorCol = end;
newCursorCol = findNextWordBoundary(lineContent, cursorCol);
}
return {
...state,