Fix line end bugs in Vim mode (#5328)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
2026-05-13 13:22:35 -07:00 · 2025-08-11 11:58:32 -07:00
parent 6390b81646
commit 72832fb889
4 changed files with 856 additions and 326 deletions
@@ -15,7 +15,11 @@ import {
  textBufferReducer,
  TextBufferState,
  TextBufferAction,
+  findWordEndInLine,
+  findNextWordStartInLine,
+  isWordCharStrict,
 } from './text-buffer.js';
+import { cpLen } from '../../utils/textUtils.js';

 const initialState: TextBufferState = {
  lines: [''],
@@ -1591,3 +1595,94 @@ describe('textBufferReducer vim operations', () => {
    });
  });
 });
+
+describe('Unicode helper functions', () => {
+  describe('findWordEndInLine with Unicode', () => {
+    it('should handle combining characters', () => {
+      // café with combining accent
+      const cafeWithCombining = 'cafe\u0301';
+      const result = findWordEndInLine(cafeWithCombining + ' test', 0);
+      expect(result).toBe(3); // End of 'café' at base character 'e', not combining accent
+    });
+
+    it('should handle precomposed characters with diacritics', () => {
+      // café with precomposed é (U+00E9)
+      const cafePrecomposed = 'café';
+      const result = findWordEndInLine(cafePrecomposed + ' test', 0);
+      expect(result).toBe(3); // End of 'café' at precomposed character 'é'
+    });
+
+    it('should return null when no word end found', () => {
+      const result = findWordEndInLine('   ', 0);
+      expect(result).toBeNull(); // No word end found in whitespace-only string string
+    });
+  });
+
+  describe('findNextWordStartInLine with Unicode', () => {
+    it('should handle right-to-left text', () => {
+      const result = findNextWordStartInLine('hello مرحبا world', 0);
+      expect(result).toBe(6); // Start of Arabic word
+    });
+
+    it('should handle Chinese characters', () => {
+      const result = findNextWordStartInLine('hello 你好 world', 0);
+      expect(result).toBe(6); // Start of Chinese word
+    });
+
+    it('should return null at end of line', () => {
+      const result = findNextWordStartInLine('hello', 10);
+      expect(result).toBeNull();
+    });
+
+    it('should handle combining characters', () => {
+      // café with combining accent + next word
+      const textWithCombining = 'cafe\u0301 test';
+      const result = findNextWordStartInLine(textWithCombining, 0);
+      expect(result).toBe(6); // Start of 'test' after 'café ' (combining char makes string longer)
+    });
+
+    it('should handle precomposed characters with diacritics', () => {
+      // café with precomposed é + next word
+      const textPrecomposed = 'café test';
+      const result = findNextWordStartInLine(textPrecomposed, 0);
+      expect(result).toBe(5); // Start of 'test' after 'café '
+    });
+  });
+
+  describe('isWordCharStrict with Unicode', () => {
+    it('should return true for ASCII word characters', () => {
+      expect(isWordCharStrict('a')).toBe(true);
+      expect(isWordCharStrict('Z')).toBe(true);
+      expect(isWordCharStrict('0')).toBe(true);
+      expect(isWordCharStrict('_')).toBe(true);
+    });
+
+    it('should return false for punctuation', () => {
+      expect(isWordCharStrict('.')).toBe(false);
+      expect(isWordCharStrict(',')).toBe(false);
+      expect(isWordCharStrict('!')).toBe(false);
+    });
+
+    it('should return true for non-Latin scripts', () => {
+      expect(isWordCharStrict('你')).toBe(true); // Chinese character
+      expect(isWordCharStrict('م')).toBe(true); // Arabic character
+    });
+
+    it('should return false for whitespace', () => {
+      expect(isWordCharStrict(' ')).toBe(false);
+      expect(isWordCharStrict('\t')).toBe(false);
+    });
+  });
+
+  describe('cpLen with Unicode', () => {
+    it('should handle combining characters', () => {
+      expect(cpLen('é')).toBe(1); // Precomposed
+      expect(cpLen('e\u0301')).toBe(2); // e + combining acute
+    });
+
+    it('should handle Chinese and Arabic text', () => {
+      expect(cpLen('hello 你好 world')).toBe(14); // 5 + 1 + 2 + 1 + 5 = 14
+      expect(cpLen('hello مرحبا world')).toBe(17);
+    });
+  });
+});