mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-13 23:01:09 -07:00
perf(ui): optimize stripUnsafeCharacters with regex (#18413)
This commit is contained in:
@@ -58,9 +58,289 @@ describe('textUtils', () => {
|
||||
});
|
||||
|
||||
describe('stripUnsafeCharacters', () => {
|
||||
it('should not strip tab characters', () => {
|
||||
const input = 'hello world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello world');
|
||||
describe('preserved characters', () => {
|
||||
it('should preserve TAB (0x09)', () => {
|
||||
const input = 'hello\tworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\tworld');
|
||||
});
|
||||
|
||||
it('should preserve LF/newline (0x0A)', () => {
|
||||
const input = 'hello\nworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\nworld');
|
||||
});
|
||||
|
||||
it('should preserve CR (0x0D)', () => {
|
||||
const input = 'hello\rworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\rworld');
|
||||
});
|
||||
|
||||
it('should preserve CRLF (0x0D 0x0A)', () => {
|
||||
const input = 'hello\r\nworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\r\nworld');
|
||||
});
|
||||
|
||||
it('should preserve DEL (0x7F)', () => {
|
||||
const input = 'hello\x7Fworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\x7Fworld');
|
||||
});
|
||||
|
||||
it('should preserve all printable ASCII (0x20-0x7E)', () => {
|
||||
const printableAscii =
|
||||
' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
|
||||
expect(stripUnsafeCharacters(printableAscii)).toBe(printableAscii);
|
||||
});
|
||||
|
||||
it('should preserve Unicode characters above 0x9F', () => {
|
||||
const input = 'Hello κόσμε 世界 🌍';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Hello κόσμε 世界 🌍');
|
||||
});
|
||||
|
||||
it('should preserve emojis', () => {
|
||||
const input = '🎉 Celebration! 🚀 Launch! 💯';
|
||||
expect(stripUnsafeCharacters(input)).toBe(
|
||||
'🎉 Celebration! 🚀 Launch! 💯',
|
||||
);
|
||||
});
|
||||
|
||||
it('should preserve complex emoji sequences (ZWJ)', () => {
|
||||
const input = 'Family: 👨👩👧👦 Flag: 🏳️🌈';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Family: 👨👩👧👦 Flag: 🏳️🌈');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripped C0 control characters (0x00-0x1F except TAB/LF/CR)', () => {
|
||||
it('should strip NULL (0x00)', () => {
|
||||
const input = 'hello\x00world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip SOH (0x01)', () => {
|
||||
const input = 'hello\x01world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip STX (0x02)', () => {
|
||||
const input = 'hello\x02world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip ETX (0x03)', () => {
|
||||
const input = 'hello\x03world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip EOT (0x04)', () => {
|
||||
const input = 'hello\x04world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip ENQ (0x05)', () => {
|
||||
const input = 'hello\x05world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip ACK (0x06)', () => {
|
||||
const input = 'hello\x06world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip BELL (0x07)', () => {
|
||||
const input = 'hello\x07world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip BACKSPACE (0x08)', () => {
|
||||
const input = 'hello\x08world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip VT/Vertical Tab (0x0B)', () => {
|
||||
const input = 'hello\x0Bworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip FF/Form Feed (0x0C)', () => {
|
||||
const input = 'hello\x0Cworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip SO (0x0E)', () => {
|
||||
const input = 'hello\x0Eworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip SI (0x0F)', () => {
|
||||
const input = 'hello\x0Fworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip DLE (0x10)', () => {
|
||||
const input = 'hello\x10world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip DC1 (0x11)', () => {
|
||||
const input = 'hello\x11world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip DC2 (0x12)', () => {
|
||||
const input = 'hello\x12world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip DC3 (0x13)', () => {
|
||||
const input = 'hello\x13world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip DC4 (0x14)', () => {
|
||||
const input = 'hello\x14world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip NAK (0x15)', () => {
|
||||
const input = 'hello\x15world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip SYN (0x16)', () => {
|
||||
const input = 'hello\x16world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip ETB (0x17)', () => {
|
||||
const input = 'hello\x17world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip CAN (0x18)', () => {
|
||||
const input = 'hello\x18world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip EM (0x19)', () => {
|
||||
const input = 'hello\x19world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip SUB (0x1A)', () => {
|
||||
const input = 'hello\x1Aworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip FS (0x1C)', () => {
|
||||
const input = 'hello\x1Cworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip GS (0x1D)', () => {
|
||||
const input = 'hello\x1Dworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip RS (0x1E)', () => {
|
||||
const input = 'hello\x1Eworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip US (0x1F)', () => {
|
||||
const input = 'hello\x1Fworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripped C1 control characters (0x80-0x9F)', () => {
|
||||
it('should strip all C1 control characters', () => {
|
||||
// Test a few representative C1 control chars
|
||||
expect(stripUnsafeCharacters('hello\x80world')).toBe('helloworld');
|
||||
expect(stripUnsafeCharacters('hello\x85world')).toBe('helloworld'); // NEL
|
||||
expect(stripUnsafeCharacters('hello\x8Aworld')).toBe('helloworld');
|
||||
expect(stripUnsafeCharacters('hello\x90world')).toBe('helloworld');
|
||||
expect(stripUnsafeCharacters('hello\x9Fworld')).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should preserve characters at 0xA0 and above (non-C1)', () => {
|
||||
// 0xA0 is non-breaking space, should be preserved
|
||||
expect(stripUnsafeCharacters('hello\xA0world')).toBe('hello\xA0world');
|
||||
});
|
||||
});
|
||||
|
||||
describe('ANSI escape sequence stripping', () => {
|
||||
it('should strip ANSI color codes', () => {
|
||||
const input = '\x1b[31mRed\x1b[0m text';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Red text');
|
||||
});
|
||||
|
||||
it('should strip ANSI cursor movement codes', () => {
|
||||
const input = 'hello\x1b[9D\x1b[Kworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should strip complex ANSI sequences', () => {
|
||||
const input = '\x1b[1;32;40mBold Green on Black\x1b[0m';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Bold Green on Black');
|
||||
});
|
||||
});
|
||||
|
||||
describe('multiple control characters', () => {
|
||||
it('should strip multiple different control characters', () => {
|
||||
const input = 'a\x00b\x01c\x02d\x07e\x08f';
|
||||
expect(stripUnsafeCharacters(input)).toBe('abcdef');
|
||||
});
|
||||
|
||||
it('should handle consecutive control characters', () => {
|
||||
const input = 'hello\x00\x01\x02\x03\x04world';
|
||||
expect(stripUnsafeCharacters(input)).toBe('helloworld');
|
||||
});
|
||||
|
||||
it('should handle mixed preserved and stripped chars', () => {
|
||||
const input = 'line1\n\x00line2\t\x07line3\r\n';
|
||||
expect(stripUnsafeCharacters(input)).toBe('line1\nline2\tline3\r\n');
|
||||
});
|
||||
});
|
||||
|
||||
describe('edge cases', () => {
|
||||
it('should handle empty string', () => {
|
||||
expect(stripUnsafeCharacters('')).toBe('');
|
||||
});
|
||||
|
||||
it('should handle string with only control characters', () => {
|
||||
expect(stripUnsafeCharacters('\x00\x01\x02\x03')).toBe('');
|
||||
});
|
||||
|
||||
it('should handle string with only preserved whitespace', () => {
|
||||
expect(stripUnsafeCharacters('\t\n\r')).toBe('\t\n\r');
|
||||
});
|
||||
|
||||
it('should handle very long strings efficiently', () => {
|
||||
const longString = 'a'.repeat(10000) + '\x00' + 'b'.repeat(10000);
|
||||
const result = stripUnsafeCharacters(longString);
|
||||
expect(result).toBe('a'.repeat(10000) + 'b'.repeat(10000));
|
||||
expect(result.length).toBe(20000);
|
||||
});
|
||||
|
||||
it('should handle surrogate pairs correctly', () => {
|
||||
// 𝌆 is outside BMP (U+1D306)
|
||||
const input = '𝌆hello𝌆';
|
||||
expect(stripUnsafeCharacters(input)).toBe('𝌆hello𝌆');
|
||||
});
|
||||
|
||||
it('should handle mixed BMP and non-BMP characters', () => {
|
||||
const input = 'Hello 世界 🌍 привет';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Hello 世界 🌍 привет');
|
||||
});
|
||||
});
|
||||
|
||||
describe('performance: regex vs array-based', () => {
|
||||
it('should handle real-world terminal output with control chars', () => {
|
||||
// Simulate terminal output with various control sequences
|
||||
const terminalOutput =
|
||||
'\x1b[32mSuccess:\x1b[0m File saved\x07\n\x1b[?25hDone';
|
||||
expect(stripUnsafeCharacters(terminalOutput)).toBe(
|
||||
'Success: File saved\nDone',
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
describe('escapeAnsiCtrlCodes', () => {
|
||||
|
||||
Reference in New Issue
Block a user