From 565eafc1ecd0047fc377b92fc41df781d6173897 Mon Sep 17 00:00:00 2001 From: euxaristia <25621994+euxaristia@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:44:28 -0400 Subject: [PATCH] fix(cli): resolve text sanitization data loss due to C1 control characters (#22624) --- packages/cli/src/ui/utils/textUtils.test.ts | 23 +++++++++++++++++ packages/cli/src/ui/utils/textUtils.ts | 28 ++++++++++++++------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/packages/cli/src/ui/utils/textUtils.test.ts b/packages/cli/src/ui/utils/textUtils.test.ts index 7ec515ffb1..ad3e243026 100644 --- a/packages/cli/src/ui/utils/textUtils.test.ts +++ b/packages/cli/src/ui/utils/textUtils.test.ts @@ -266,6 +266,29 @@ describe('textUtils', () => { // 0xA0 is non-breaking space, should be preserved expect(stripUnsafeCharacters('hello\xA0world')).toBe('hello\xA0world'); }); + + it('should not lose text after DCS (0x90) — regression for data loss', () => { + // 0x90 (DCS) starts a Device Control String that stripVTControlCharacters + // treats as an unterminated sequence, swallowing all subsequent text. + // Stripping C1 chars before VT processing prevents this data loss. + expect(stripUnsafeCharacters('important\x90data after DCS')).toBe( + 'importantdata after DCS', + ); + }); + + it('should fully strip 8-bit CSI (0x9B) sequences', () => { + // 0x9B (CSI) is equivalent to ESC[. stripAnsi should handle the + // whole sequence including parameters. + expect(stripUnsafeCharacters('keep\x9B42mthis text')).toBe( + 'keepthis text', + ); + }); + + it('should not lose text when multiple C1 chars precede valid content', () => { + expect(stripUnsafeCharacters('start\x90\x9B\x85middle\x80end')).toBe( + 'startmiddleend', + ); + }); }); describe('ANSI escape sequence stripping', () => { diff --git a/packages/cli/src/ui/utils/textUtils.ts b/packages/cli/src/ui/utils/textUtils.ts index a039a43991..20d0c03874 100644 --- a/packages/cli/src/ui/utils/textUtils.ts +++ b/packages/cli/src/ui/utils/textUtils.ts @@ -98,8 +98,16 @@ export function cpSlice(str: string, start: number, end?: number): string { /** * Strip characters that can break terminal rendering. * - * Uses Node.js built-in stripVTControlCharacters to handle VT sequences, - * then filters remaining control characters that can disrupt display. + * This is a strict sanitization function intended for general display + * contexts. It strips all C1 control characters (0x80-0x9F) and VT + * control sequences. For list display contexts where a more lenient + * approach is needed (preserving C1 characters and only stripping ANSI + * codes and newlines/tabs), use a separate function instead. + * + * Processing order: + * 1. stripAnsi removes ANSI escape sequences (including 8-bit CSI 0x9B) + * 2. Regex strips C0, C1, BiDi, and zero-width control characters + * 3. stripVTControlCharacters removes any remaining VT sequences * * Characters stripped: * - ANSI escape sequences (via strip-ansi) @@ -119,18 +127,20 @@ export function cpSlice(str: string, start: number, end?: number): string { */ export function stripUnsafeCharacters(str: string): string { const strippedAnsi = stripAnsi(str); - const strippedVT = stripVTControlCharacters(strippedAnsi); - // Use a regex to strip remaining unsafe control characters - // C0: 0x00-0x1F except 0x09 (TAB), 0x0A (LF), 0x0D (CR) - // C1: 0x80-0x9F - // BiDi: U+200E (LRM), U+200F (RLM), U+202A-U+202E, U+2066-U+2069 - // Zero-width: U+200B (ZWSP), U+FEFF (BOM) - return strippedVT.replace( + // Strip C0, C1, and other unsafe characters via regex first. + // This is more efficient than multiple replaces and crucially removes C1 + // characters (e.g., 0x90 DCS) before they can be misinterpreted by + // stripVTControlCharacters, which could otherwise cause data loss. + const strippedWithRegex = strippedAnsi.replace( // eslint-disable-next-line no-control-regex /[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F\u200E\u200F\u202A-\u202E\u2066-\u2069\u200B\uFEFF]/g, '', ); + + // Finally, use stripVTControlCharacters for any remaining VT sequences + // that the regex might not cover. + return stripVTControlCharacters(strippedWithRegex); } /**