diff --git a/packages/cli/src/ui/utils/textUtils.test.ts b/packages/cli/src/ui/utils/textUtils.test.ts index 62462dddf6..0f9b2fcd39 100644 --- a/packages/cli/src/ui/utils/textUtils.test.ts +++ b/packages/cli/src/ui/utils/textUtils.test.ts @@ -58,9 +58,289 @@ describe('textUtils', () => { }); describe('stripUnsafeCharacters', () => { - it('should not strip tab characters', () => { - const input = 'hello world'; - expect(stripUnsafeCharacters(input)).toBe('hello world'); + describe('preserved characters', () => { + it('should preserve TAB (0x09)', () => { + const input = 'hello\tworld'; + expect(stripUnsafeCharacters(input)).toBe('hello\tworld'); + }); + + it('should preserve LF/newline (0x0A)', () => { + const input = 'hello\nworld'; + expect(stripUnsafeCharacters(input)).toBe('hello\nworld'); + }); + + it('should preserve CR (0x0D)', () => { + const input = 'hello\rworld'; + expect(stripUnsafeCharacters(input)).toBe('hello\rworld'); + }); + + it('should preserve CRLF (0x0D 0x0A)', () => { + const input = 'hello\r\nworld'; + expect(stripUnsafeCharacters(input)).toBe('hello\r\nworld'); + }); + + it('should preserve DEL (0x7F)', () => { + const input = 'hello\x7Fworld'; + expect(stripUnsafeCharacters(input)).toBe('hello\x7Fworld'); + }); + + it('should preserve all printable ASCII (0x20-0x7E)', () => { + const printableAscii = + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; + expect(stripUnsafeCharacters(printableAscii)).toBe(printableAscii); + }); + + it('should preserve Unicode characters above 0x9F', () => { + const input = 'Hello κόσμε 世界 🌍'; + expect(stripUnsafeCharacters(input)).toBe('Hello κόσμε 世界 🌍'); + }); + + it('should preserve emojis', () => { + const input = '🎉 Celebration! 🚀 Launch! 💯'; + expect(stripUnsafeCharacters(input)).toBe( + '🎉 Celebration! 🚀 Launch! 💯', + ); + }); + + it('should preserve complex emoji sequences (ZWJ)', () => { + const input = 'Family: 👨‍👩‍👧‍👦 Flag: 🏳️‍🌈'; + expect(stripUnsafeCharacters(input)).toBe('Family: 👨‍👩‍👧‍👦 Flag: 🏳️‍🌈'); + }); + }); + + describe('stripped C0 control characters (0x00-0x1F except TAB/LF/CR)', () => { + it('should strip NULL (0x00)', () => { + const input = 'hello\x00world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip SOH (0x01)', () => { + const input = 'hello\x01world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip STX (0x02)', () => { + const input = 'hello\x02world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip ETX (0x03)', () => { + const input = 'hello\x03world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip EOT (0x04)', () => { + const input = 'hello\x04world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip ENQ (0x05)', () => { + const input = 'hello\x05world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip ACK (0x06)', () => { + const input = 'hello\x06world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip BELL (0x07)', () => { + const input = 'hello\x07world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip BACKSPACE (0x08)', () => { + const input = 'hello\x08world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip VT/Vertical Tab (0x0B)', () => { + const input = 'hello\x0Bworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip FF/Form Feed (0x0C)', () => { + const input = 'hello\x0Cworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip SO (0x0E)', () => { + const input = 'hello\x0Eworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip SI (0x0F)', () => { + const input = 'hello\x0Fworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip DLE (0x10)', () => { + const input = 'hello\x10world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip DC1 (0x11)', () => { + const input = 'hello\x11world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip DC2 (0x12)', () => { + const input = 'hello\x12world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip DC3 (0x13)', () => { + const input = 'hello\x13world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip DC4 (0x14)', () => { + const input = 'hello\x14world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip NAK (0x15)', () => { + const input = 'hello\x15world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip SYN (0x16)', () => { + const input = 'hello\x16world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip ETB (0x17)', () => { + const input = 'hello\x17world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip CAN (0x18)', () => { + const input = 'hello\x18world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip EM (0x19)', () => { + const input = 'hello\x19world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip SUB (0x1A)', () => { + const input = 'hello\x1Aworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip FS (0x1C)', () => { + const input = 'hello\x1Cworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip GS (0x1D)', () => { + const input = 'hello\x1Dworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip RS (0x1E)', () => { + const input = 'hello\x1Eworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip US (0x1F)', () => { + const input = 'hello\x1Fworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + }); + + describe('stripped C1 control characters (0x80-0x9F)', () => { + it('should strip all C1 control characters', () => { + // Test a few representative C1 control chars + expect(stripUnsafeCharacters('hello\x80world')).toBe('helloworld'); + expect(stripUnsafeCharacters('hello\x85world')).toBe('helloworld'); // NEL + expect(stripUnsafeCharacters('hello\x8Aworld')).toBe('helloworld'); + expect(stripUnsafeCharacters('hello\x90world')).toBe('helloworld'); + expect(stripUnsafeCharacters('hello\x9Fworld')).toBe('helloworld'); + }); + + it('should preserve characters at 0xA0 and above (non-C1)', () => { + // 0xA0 is non-breaking space, should be preserved + expect(stripUnsafeCharacters('hello\xA0world')).toBe('hello\xA0world'); + }); + }); + + describe('ANSI escape sequence stripping', () => { + it('should strip ANSI color codes', () => { + const input = '\x1b[31mRed\x1b[0m text'; + expect(stripUnsafeCharacters(input)).toBe('Red text'); + }); + + it('should strip ANSI cursor movement codes', () => { + const input = 'hello\x1b[9D\x1b[Kworld'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should strip complex ANSI sequences', () => { + const input = '\x1b[1;32;40mBold Green on Black\x1b[0m'; + expect(stripUnsafeCharacters(input)).toBe('Bold Green on Black'); + }); + }); + + describe('multiple control characters', () => { + it('should strip multiple different control characters', () => { + const input = 'a\x00b\x01c\x02d\x07e\x08f'; + expect(stripUnsafeCharacters(input)).toBe('abcdef'); + }); + + it('should handle consecutive control characters', () => { + const input = 'hello\x00\x01\x02\x03\x04world'; + expect(stripUnsafeCharacters(input)).toBe('helloworld'); + }); + + it('should handle mixed preserved and stripped chars', () => { + const input = 'line1\n\x00line2\t\x07line3\r\n'; + expect(stripUnsafeCharacters(input)).toBe('line1\nline2\tline3\r\n'); + }); + }); + + describe('edge cases', () => { + it('should handle empty string', () => { + expect(stripUnsafeCharacters('')).toBe(''); + }); + + it('should handle string with only control characters', () => { + expect(stripUnsafeCharacters('\x00\x01\x02\x03')).toBe(''); + }); + + it('should handle string with only preserved whitespace', () => { + expect(stripUnsafeCharacters('\t\n\r')).toBe('\t\n\r'); + }); + + it('should handle very long strings efficiently', () => { + const longString = 'a'.repeat(10000) + '\x00' + 'b'.repeat(10000); + const result = stripUnsafeCharacters(longString); + expect(result).toBe('a'.repeat(10000) + 'b'.repeat(10000)); + expect(result.length).toBe(20000); + }); + + it('should handle surrogate pairs correctly', () => { + // 𝌆 is outside BMP (U+1D306) + const input = '𝌆hello𝌆'; + expect(stripUnsafeCharacters(input)).toBe('𝌆hello𝌆'); + }); + + it('should handle mixed BMP and non-BMP characters', () => { + const input = 'Hello 世界 🌍 привет'; + expect(stripUnsafeCharacters(input)).toBe('Hello 世界 🌍 привет'); + }); + }); + + describe('performance: regex vs array-based', () => { + it('should handle real-world terminal output with control chars', () => { + // Simulate terminal output with various control sequences + const terminalOutput = + '\x1b[32mSuccess:\x1b[0m File saved\x07\n\x1b[?25hDone'; + expect(stripUnsafeCharacters(terminalOutput)).toBe( + 'Success: File saved\nDone', + ); + }); }); }); describe('escapeAnsiCtrlCodes', () => { diff --git a/packages/cli/src/ui/utils/textUtils.ts b/packages/cli/src/ui/utils/textUtils.ts index 4d3cd1ded5..b99a38c20f 100644 --- a/packages/cli/src/ui/utils/textUtils.ts +++ b/packages/cli/src/ui/utils/textUtils.ts @@ -104,7 +104,7 @@ export function cpSlice(str: string, start: number, end?: number): string { * Characters stripped: * - ANSI escape sequences (via strip-ansi) * - VT control sequences (via Node.js util.stripVTControlCharacters) - * - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere + * - C0 control chars (0x00-0x1F) except TAB(0x09), LF(0x0A), CR(0x0D) * - C1 control chars (0x80-0x9F) that can cause display issues * * Characters preserved: @@ -117,28 +117,11 @@ export function stripUnsafeCharacters(str: string): string { const strippedAnsi = stripAnsi(str); const strippedVT = stripVTControlCharacters(strippedAnsi); - return toCodePoints(strippedVT) - .filter((char) => { - const code = char.codePointAt(0); - if (code === undefined) return false; - - // Preserve CR/LF/TAB for line handling - if (code === 0x0a || code === 0x0d || code === 0x09) return true; - - // Remove C0 control chars (except CR/LF) that can break display - // Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C) - if (code >= 0x00 && code <= 0x1f) return false; - - // Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes - if (code >= 0x80 && code <= 0x9f) return false; - - // Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace - // and doesn't cause rendering issues when displayed - - // Preserve all other characters including Unicode/emojis - return true; - }) - .join(''); + // Use a regex to strip remaining unsafe control characters + // C0: 0x00-0x1F except 0x09 (TAB), 0x0A (LF), 0x0D (CR) + // C1: 0x80-0x9F + // eslint-disable-next-line no-control-regex + return strippedVT.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F]/g, ''); } /**