mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-07 20:00:37 -07:00
security: strip deceptive Unicode characters from terminal output (#19026)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -8,6 +8,7 @@ import React from 'react';
|
||||
import { Text } from 'ink';
|
||||
import { theme } from '../semantic-colors.js';
|
||||
import { debugLogger } from '@google/gemini-cli-core';
|
||||
import { stripUnsafeCharacters } from './textUtils.js';
|
||||
|
||||
// Constants for Markdown parsing
|
||||
const BOLD_MARKER_LENGTH = 2; // For "**"
|
||||
@@ -23,9 +24,10 @@ interface RenderInlineProps {
|
||||
}
|
||||
|
||||
const RenderInlineInternal: React.FC<RenderInlineProps> = ({
|
||||
text,
|
||||
text: rawText,
|
||||
defaultColor,
|
||||
}) => {
|
||||
const text = stripUnsafeCharacters(rawText);
|
||||
const baseColor = defaultColor ?? theme.text.primary;
|
||||
// Early return for plain text without markdown or URLs
|
||||
if (!/[*_~`<[https?:]/.test(text)) {
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
} from 'ink';
|
||||
import { theme } from '../semantic-colors.js';
|
||||
import { RenderInline } from './InlineMarkdownRenderer.js';
|
||||
import { stripUnsafeCharacters } from './textUtils.js';
|
||||
|
||||
interface TableRendererProps {
|
||||
headers: string[];
|
||||
@@ -60,12 +61,18 @@ export const TableRenderer: React.FC<TableRendererProps> = ({
|
||||
);
|
||||
|
||||
const styledHeaders = useMemo(
|
||||
() => cleanedHeaders.map((header) => toStyledCharacters(header)),
|
||||
() =>
|
||||
cleanedHeaders.map((header) =>
|
||||
toStyledCharacters(stripUnsafeCharacters(header)),
|
||||
),
|
||||
[cleanedHeaders],
|
||||
);
|
||||
|
||||
const styledRows = useMemo(
|
||||
() => rows.map((row) => row.map((cell) => toStyledCharacters(cell))),
|
||||
() =>
|
||||
rows.map((row) =>
|
||||
row.map((cell) => toStyledCharacters(stripUnsafeCharacters(cell))),
|
||||
),
|
||||
[rows],
|
||||
);
|
||||
|
||||
|
||||
@@ -332,6 +332,35 @@ describe('textUtils', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('BiDi and deceptive Unicode characters', () => {
|
||||
it('should strip BiDi override characters', () => {
|
||||
const input = 'safe\u202Etxt.sh';
|
||||
// When stripped, it should be 'safetxt.sh'
|
||||
expect(stripUnsafeCharacters(input)).toBe('safetxt.sh');
|
||||
});
|
||||
|
||||
it('should strip all BiDi control characters (LRM, RLM, U+202A-U+202E, U+2066-U+2069)', () => {
|
||||
const bidiChars =
|
||||
'\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069';
|
||||
expect(stripUnsafeCharacters('a' + bidiChars + 'b')).toBe('ab');
|
||||
});
|
||||
|
||||
it('should strip zero-width characters (U+200B, U+FEFF)', () => {
|
||||
const zeroWidthChars = '\u200B\uFEFF';
|
||||
expect(stripUnsafeCharacters('a' + zeroWidthChars + 'b')).toBe('ab');
|
||||
});
|
||||
|
||||
it('should preserve ZWJ (U+200D) for complex emojis', () => {
|
||||
const input = 'Family: 👨👩👧👦';
|
||||
expect(stripUnsafeCharacters(input)).toBe('Family: 👨👩👧👦');
|
||||
});
|
||||
|
||||
it('should preserve ZWNJ (U+200C)', () => {
|
||||
const input = 'hello\u200Cworld';
|
||||
expect(stripUnsafeCharacters(input)).toBe('hello\u200Cworld');
|
||||
});
|
||||
});
|
||||
|
||||
describe('performance: regex vs array-based', () => {
|
||||
it('should handle real-world terminal output with control chars', () => {
|
||||
// Simulate terminal output with various control sequences
|
||||
|
||||
@@ -106,9 +106,13 @@ export function cpSlice(str: string, start: number, end?: number): string {
|
||||
* - VT control sequences (via Node.js util.stripVTControlCharacters)
|
||||
* - C0 control chars (0x00-0x1F) except TAB(0x09), LF(0x0A), CR(0x0D)
|
||||
* - C1 control chars (0x80-0x9F) that can cause display issues
|
||||
* - BiDi control chars (U+200E, U+200F, U+202A-U+202E, U+2066-U+2069)
|
||||
* - Zero-width chars (U+200B, U+FEFF)
|
||||
*
|
||||
* Characters preserved:
|
||||
* - All printable Unicode including emojis
|
||||
* - ZWJ (U+200D) - needed for complex emoji sequences
|
||||
* - ZWNJ (U+200C) - preserve zero-width non-joiner
|
||||
* - DEL (0x7F) - handled functionally by applyOperations, not a display issue
|
||||
* - CR/LF (0x0D/0x0A) - needed for line breaks
|
||||
* - TAB (0x09) - preserve tabs
|
||||
@@ -120,8 +124,13 @@ export function stripUnsafeCharacters(str: string): string {
|
||||
// Use a regex to strip remaining unsafe control characters
|
||||
// C0: 0x00-0x1F except 0x09 (TAB), 0x0A (LF), 0x0D (CR)
|
||||
// C1: 0x80-0x9F
|
||||
// eslint-disable-next-line no-control-regex
|
||||
return strippedVT.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F]/g, '');
|
||||
// BiDi: U+200E (LRM), U+200F (RLM), U+202A-U+202E, U+2066-U+2069
|
||||
// Zero-width: U+200B (ZWSP), U+FEFF (BOM)
|
||||
return strippedVT.replace(
|
||||
// eslint-disable-next-line no-control-regex
|
||||
/[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F\u200E\u200F\u202A-\u202E\u2066-\u2069\u200B\uFEFF]/g,
|
||||
'',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user