fix(cli): Fix word navigation for CJK characters (#14475)

2026-05-12 21:03:05 -07:00 · 2025-12-04 08:16:11 +08:00
parent 3da4fd5f7d
commit 518e73ac9f
2 changed files with 150 additions and 32 deletions
@@ -2241,4 +2241,103 @@ describe('Unicode helper functions', () => {
      expect(cpLen('hello مرحبا world')).toBe(17);
    });
  });
+
+  describe('useTextBuffer CJK Navigation', () => {
+    const viewport = { width: 80, height: 24 };
+
+    it('should navigate by word in Chinese', () => {
+      const { result } = renderHook(() =>
+        useTextBuffer({
+          initialText: '你好世界',
+          initialCursorOffset: 4, // End of string
+          viewport,
+          isValidPath: () => false,
+        }),
+      );
+
+      // Initial state: cursor at end (index 2 in code points if 4 is length? wait. length is 2 code points? No. '你好世界' length is 4.)
+      // '你好世界' length is 4. Code points length is 4.
+
+      // Move word left
+      act(() => {
+        result.current.move('wordLeft');
+      });
+
+      // Should be at start of "世界" (index 2)
+      // "你好世界" -> "你好" | "世界"
+      expect(result.current.cursor[1]).toBe(2);
+
+      // Move word left again
+      act(() => {
+        result.current.move('wordLeft');
+      });
+
+      // Should be at start of "你好" (index 0)
+      expect(result.current.cursor[1]).toBe(0);
+
+      // Move word left again (should stay at 0)
+      act(() => {
+        result.current.move('wordLeft');
+      });
+      expect(result.current.cursor[1]).toBe(0);
+
+      // Move word right
+      act(() => {
+        result.current.move('wordRight');
+      });
+
+      // Should be at end of "你好" (index 2)
+      expect(result.current.cursor[1]).toBe(2);
+
+      // Move word right again
+      act(() => {
+        result.current.move('wordRight');
+      });
+
+      // Should be at end of "世界" (index 4)
+      expect(result.current.cursor[1]).toBe(4);
+
+      // Move word right again (should stay at end)
+      act(() => {
+        result.current.move('wordRight');
+      });
+      expect(result.current.cursor[1]).toBe(4);
+    });
+
+    it('should navigate mixed English and Chinese', () => {
+      const { result } = renderHook(() =>
+        useTextBuffer({
+          initialText: 'Hello你好World',
+          initialCursorOffset: 10, // End
+          viewport,
+          isValidPath: () => false,
+        }),
+      );
+
+      // Hello (5) + 你好 (2) + World (5) = 12 chars.
+      // initialCursorOffset 10? 'Hello你好World'.length is 12.
+      // Let's set it to end.
+
+      act(() => {
+        result.current.move('end');
+      });
+      expect(result.current.cursor[1]).toBe(12);
+
+      // wordLeft -> start of "World" (index 7)
+      act(() => result.current.move('wordLeft'));
+      expect(result.current.cursor[1]).toBe(7);
+
+      // wordLeft -> start of "你好" (index 5)
+      act(() => result.current.move('wordLeft'));
+      expect(result.current.cursor[1]).toBe(5);
+
+      // wordLeft -> start of "Hello" (index 0)
+      act(() => result.current.move('wordLeft'));
+      expect(result.current.cursor[1]).toBe(0);
+
+      // wordLeft -> start of line (should stay at 0)
+      act(() => result.current.move('wordLeft'));
+      expect(result.current.cursor[1]).toBe(0);
+    });
+  });
 });
@@ -32,14 +32,6 @@ export type Direction =
  | 'home'
  | 'end';

-// Simple helper for word‑wise ops.
-function isWordChar(ch: string | undefined): boolean {
-  if (ch === undefined) {
-    return false;
-  }
-  return !/[\s,.;!?]/.test(ch);
-}
-
 // Helper functions for line-based word navigation
 export const isWordCharStrict = (char: string): boolean =>
  /[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
  return null;
 };

+// Initialize segmenter for word boundary detection
+const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
+
+function findPrevWordBoundary(line: string, cursorCol: number): number {
+  const codePoints = toCodePoints(line);
+  // Convert cursorCol (CP index) to string index
+  const prefix = codePoints.slice(0, cursorCol).join('');
+  const cursorIdx = prefix.length;
+
+  let targetIdx = 0;
+
+  for (const seg of segmenter.segment(line)) {
+    // We want the last word start strictly before the cursor.
+    // If we've reached or passed the cursor, we stop.
+    if (seg.index >= cursorIdx) break;
+
+    if (seg.isWordLike) {
+      targetIdx = seg.index;
+    }
+  }
+
+  return toCodePoints(line.slice(0, targetIdx)).length;
+}
+
+function findNextWordBoundary(line: string, cursorCol: number): number {
+  const codePoints = toCodePoints(line);
+  const prefix = codePoints.slice(0, cursorCol).join('');
+  const cursorIdx = prefix.length;
+
+  let targetIdx = line.length;
+
+  for (const seg of segmenter.segment(line)) {
+    const segEnd = seg.index + seg.segment.length;
+
+    if (segEnd > cursorIdx) {
+      if (seg.isWordLike) {
+        targetIdx = segEnd;
+        break;
+      }
+    }
+  }
+
+  return toCodePoints(line.slice(0, targetIdx)).length;
+}
+
 // Find next word across lines
 export const findNextWordAcrossLines = (
  lines: string[],
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
            newCursorCol = cpLen(lines[newCursorRow] ?? '');
          } else {
            const lineContent = lines[cursorRow];
-            const arr = toCodePoints(lineContent);
-            let start = cursorCol;
-            let onlySpaces = true;
-            for (let i = 0; i < start; i++) {
-              if (isWordChar(arr[i])) {
-                onlySpaces = false;
-                break;
-              }
-            }
-            if (onlySpaces && start > 0) {
-              start--;
-            } else {
-              while (start > 0 && !isWordChar(arr[start - 1])) start--;
-              while (start > 0 && isWordChar(arr[start - 1])) start--;
-            }
-            newCursorCol = start;
+            newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
          }
          return {
            ...state,
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
          };
        }
        case 'wordRight': {
+          const lineContent = lines[cursorRow] ?? '';
          if (
            cursorRow === lines.length - 1 &&
-            cursorCol === cpLen(lines[cursorRow] ?? '')
+            cursorCol === cpLen(lineContent)
          ) {
            return state;
          }

          let newCursorRow = cursorRow;
          let newCursorCol = cursorCol;
-          const lineContent = lines[cursorRow] ?? '';
-          const arr = toCodePoints(lineContent);
+          const lineLen = cpLen(lineContent);

-          if (cursorCol >= arr.length) {
+          if (cursorCol >= lineLen) {
            newCursorRow++;
            newCursorCol = 0;
          } else {
-            let end = cursorCol;
-            while (end < arr.length && !isWordChar(arr[end])) end++;
-            while (end < arr.length && isWordChar(arr[end])) end++;
-            newCursorCol = end;
+            newCursorCol = findNextWordBoundary(lineContent, cursorCol);
          }
          return {
            ...state,