fix(cli): Fix word navigation for CJK characters (#14475)

2026-05-12 12:54:07 -07:00 · 2025-12-04 08:16:11 +08:00
parent 3da4fd5f7d
commit 518e73ac9f
2 changed files with 150 additions and 32 deletions
@@ -2241,4 +2241,103 @@ describe('Unicode helper functions', () => {
      expect(cpLen('hello مرحبا world')).toBe(17);
    });
  });
  describe('useTextBuffer CJK Navigation', () => {
    const viewport = { width: 80, height: 24 };
    it('should navigate by word in Chinese', () => {
      const { result } = renderHook(() =>
        useTextBuffer({
          initialText: '你好世界',
          initialCursorOffset: 4, // End of string
          viewport,
          isValidPath: () => false,
        }),
      );
      // Initial state: cursor at end (index 2 in code points if 4 is length? wait. length is 2 code points? No. '你好世界' length is 4.)
      // '你好世界' length is 4. Code points length is 4.
      // Move word left
      act(() => {
        result.current.move('wordLeft');
      });
      // Should be at start of "世界" (index 2)
      // "你好世界" -> "你好" | "世界"
      expect(result.current.cursor[1]).toBe(2);
      // Move word left again
      act(() => {
        result.current.move('wordLeft');
      });
      // Should be at start of "你好" (index 0)
      expect(result.current.cursor[1]).toBe(0);
      // Move word left again (should stay at 0)
      act(() => {
        result.current.move('wordLeft');
      });
      expect(result.current.cursor[1]).toBe(0);
      // Move word right
      act(() => {
        result.current.move('wordRight');
      });
      // Should be at end of "你好" (index 2)
      expect(result.current.cursor[1]).toBe(2);
      // Move word right again
      act(() => {
        result.current.move('wordRight');
      });
      // Should be at end of "世界" (index 4)
      expect(result.current.cursor[1]).toBe(4);
      // Move word right again (should stay at end)
      act(() => {
        result.current.move('wordRight');
      });
      expect(result.current.cursor[1]).toBe(4);
    });
    it('should navigate mixed English and Chinese', () => {
      const { result } = renderHook(() =>
        useTextBuffer({
          initialText: 'Hello你好World',
          initialCursorOffset: 10, // End
          viewport,
          isValidPath: () => false,
        }),
      );
      // Hello (5) + 你好 (2) + World (5) = 12 chars.
      // initialCursorOffset 10? 'Hello你好World'.length is 12.
      // Let's set it to end.
      act(() => {
        result.current.move('end');
      });
      expect(result.current.cursor[1]).toBe(12);
      // wordLeft -> start of "World" (index 7)
      act(() => result.current.move('wordLeft'));
      expect(result.current.cursor[1]).toBe(7);
      // wordLeft -> start of "你好" (index 5)
      act(() => result.current.move('wordLeft'));
      expect(result.current.cursor[1]).toBe(5);
      // wordLeft -> start of "Hello" (index 0)
      act(() => result.current.move('wordLeft'));
      expect(result.current.cursor[1]).toBe(0);
      // wordLeft -> start of line (should stay at 0)
      act(() => result.current.move('wordLeft'));
      expect(result.current.cursor[1]).toBe(0);
    });
  });
 });
@@ -32,14 +32,6 @@ export type Direction =
  | 'home'
  | 'end';
 // Simple helper for word‑wise ops.
 function isWordChar(ch: string | undefined): boolean {
  if (ch === undefined) {
    return false;
  }
  return !/[\s,.;!?]/.test(ch);
 }
 // Helper functions for line-based word navigation
 export const isWordCharStrict = (char: string): boolean =>
  /[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
@@ -249,6 +241,51 @@ export const findWordEndInLine = (line: string, col: number): number | null => {
  return null;
 };
 // Initialize segmenter for word boundary detection
 const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
 function findPrevWordBoundary(line: string, cursorCol: number): number {
  const codePoints = toCodePoints(line);
  // Convert cursorCol (CP index) to string index
  const prefix = codePoints.slice(0, cursorCol).join('');
  const cursorIdx = prefix.length;
  let targetIdx = 0;
  for (const seg of segmenter.segment(line)) {
    // We want the last word start strictly before the cursor.
    // If we've reached or passed the cursor, we stop.
    if (seg.index >= cursorIdx) break;
    if (seg.isWordLike) {
      targetIdx = seg.index;
    }
  }
  return toCodePoints(line.slice(0, targetIdx)).length;
 }
 function findNextWordBoundary(line: string, cursorCol: number): number {
  const codePoints = toCodePoints(line);
  const prefix = codePoints.slice(0, cursorCol).join('');
  const cursorIdx = prefix.length;
  let targetIdx = line.length;
  for (const seg of segmenter.segment(line)) {
    const segEnd = seg.index + seg.segment.length;
    if (segEnd > cursorIdx) {
      if (seg.isWordLike) {
        targetIdx = segEnd;
        break;
      }
    }
  }
  return toCodePoints(line.slice(0, targetIdx)).length;
 }
 // Find next word across lines
 export const findNextWordAcrossLines = (
  lines: string[],
@@ -1201,22 +1238,7 @@ function textBufferReducerLogic(
            newCursorCol = cpLen(lines[newCursorRow] ?? '');
          } else {
            const lineContent = lines[cursorRow];
-            const arr = toCodePoints(lineContent);
+            newCursorCol = findPrevWordBoundary(lineContent, cursorCol);
            let start = cursorCol;
            let onlySpaces = true;
            for (let i = 0; i < start; i++) {
              if (isWordChar(arr[i])) {
                onlySpaces = false;
                break;
              }
            }
            if (onlySpaces && start > 0) {
              start--;
            } else {
              while (start > 0 && !isWordChar(arr[start - 1])) start--;
              while (start > 0 && isWordChar(arr[start - 1])) start--;
            }
            newCursorCol = start;
          }
          return {
            ...state,
@@ -1226,26 +1248,23 @@ function textBufferReducerLogic(
          };
        }
        case 'wordRight': {
          const lineContent = lines[cursorRow] ?? '';
          if (
            cursorRow === lines.length - 1 &&
-            cursorCol === cpLen(lines[cursorRow] ?? '')
+            cursorCol === cpLen(lineContent)
          ) {
            return state;
          }
          let newCursorRow = cursorRow;
          let newCursorCol = cursorCol;
-          const lineContent = lines[cursorRow] ?? '';
+          const lineLen = cpLen(lineContent);
          const arr = toCodePoints(lineContent);
-          if (cursorCol >= arr.length) {
+          if (cursorCol >= lineLen) {
            newCursorRow++;
            newCursorCol = 0;
          } else {
-            let end = cursorCol;
+            newCursorCol = findNextWordBoundary(lineContent, cursorCol);
            while (end < arr.length && !isWordChar(arr[end])) end++;
            while (end < arr.length && isWordChar(arr[end])) end++;
            newCursorCol = end;
          }
          return {
            ...state,