Files
gemini-cli/packages/cli/src/ui/utils/textUtils.ts

256 lines
7.3 KiB
TypeScript
Raw Normal View History

2025-06-13 00:59:45 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import stripAnsi from 'strip-ansi';
import ansiRegex from 'ansi-regex';
import { stripVTControlCharacters } from 'node:util';
import stringWidth from 'string-width';
import { LRUCache } from 'mnemonist';
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
2025-06-13 00:59:45 -07:00
/**
* Calculates the maximum width of a multi-line ASCII art string.
* @param asciiArt The ASCII art string.
* @returns The length of the longest line in the ASCII art.
*/
export const getAsciiArtWidth = (asciiArt: string): number => {
if (!asciiArt) {
return 0;
}
const lines = asciiArt.split('\n');
return Math.max(...lines.map((line) => line.length));
};
/*
* -------------------------------------------------------------------------
* Unicodeaware helpers (work at the codepoint level rather than UTF16
* code units so that surrogatepair emoji count as one "column".)
* ---------------------------------------------------------------------- */
// Cache for code points
const MAX_STRING_LENGTH_TO_CACHE = 1000;
const codePointsCache = new LRUCache<string, string[]>(
LRU_BUFFER_PERF_CACHE_LIMIT,
);
export function toCodePoints(str: string): string[] {
// ASCII fast path - check if all chars are ASCII (0-127)
let isAscii = true;
for (let i = 0; i < str.length; i++) {
if (str.charCodeAt(i) > 127) {
isAscii = false;
break;
}
}
if (isAscii) {
return str.split('');
}
// Cache short strings
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
const cached = codePointsCache.get(str);
if (cached !== undefined) {
return cached;
}
}
const result = Array.from(str);
// Cache result
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
codePointsCache.set(str, result);
}
return result;
}
export function cpLen(str: string): number {
return toCodePoints(str).length;
}
export function cpSlice(str: string, start: number, end?: number): string {
// Slice by codepoint indices and rejoin.
const arr = toCodePoints(str).slice(start, end);
return arr.join('');
}
/**
* Strip characters that can break terminal rendering.
*
* Uses Node.js built-in stripVTControlCharacters to handle VT sequences,
* then filters remaining control characters that can disrupt display.
*
* Characters stripped:
* - ANSI escape sequences (via strip-ansi)
* - VT control sequences (via Node.js util.stripVTControlCharacters)
* - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere
* - C1 control chars (0x80-0x9F) that can cause display issues
*
* Characters preserved:
* - All printable Unicode including emojis
* - DEL (0x7F) - handled functionally by applyOperations, not a display issue
* - CR/LF (0x0D/0x0A) - needed for line breaks
2025-11-10 07:37:43 -08:00
* - TAB (0x09) - preserve tabs
*/
export function stripUnsafeCharacters(str: string): string {
const strippedAnsi = stripAnsi(str);
const strippedVT = stripVTControlCharacters(strippedAnsi);
return toCodePoints(strippedVT)
.filter((char) => {
const code = char.codePointAt(0);
if (code === undefined) return false;
2025-11-10 07:37:43 -08:00
// Preserve CR/LF/TAB for line handling
if (code === 0x0a || code === 0x0d || code === 0x09) return true;
// Remove C0 control chars (except CR/LF) that can break display
// Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C)
if (code >= 0x00 && code <= 0x1f) return false;
// Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes
if (code >= 0x80 && code <= 0x9f) return false;
// Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace
// and doesn't cause rendering issues when displayed
// Preserve all other characters including Unicode/emojis
return true;
})
.join('');
}
/**
* Sanitize a string for display in list-like UI components (e.g. Help, Suggestions).
* Removes ANSI codes, collapses whitespace characters into a single space, and optionally truncates.
*/
export function sanitizeForListDisplay(
str: string,
maxLength?: number,
): string {
if (!str) {
return '';
}
let sanitized = stripAnsi(str).replace(/\s+/g, ' ');
if (maxLength && sanitized.length > maxLength) {
sanitized = sanitized.substring(0, maxLength - 3) + '...';
}
return sanitized;
}
const stringWidthCache = new LRUCache<string, number>(
LRU_BUFFER_PERF_CACHE_LIMIT,
);
/**
* Cached version of stringWidth function for better performance
*/
export const getCachedStringWidth = (str: string): number => {
// ASCII printable chars (32-126) have width 1.
// This is a very frequent path, so we use a fast numeric check.
if (str.length === 1) {
const code = str.charCodeAt(0);
if (code >= 0x20 && code <= 0x7e) {
return 1;
}
}
const cached = stringWidthCache.get(str);
if (cached !== undefined) {
return cached;
}
let width: number;
try {
width = stringWidth(str);
} catch {
// Fallback for characters that cause string-width to crash (e.g. U+0602)
// See: https://github.com/google-gemini/gemini-cli/issues/16418
width = toCodePoints(stripAnsi(str)).length;
}
stringWidthCache.set(str, width);
return width;
};
/**
* Clear the string width cache
*/
export const clearStringWidthCache = (): void => {
stringWidthCache.clear();
};
const regex = ansiRegex();
/* Recursively traverses a JSON-like structure (objects, arrays, primitives)
* and escapes all ANSI control characters found in any string values.
*
* This function is designed to be robust, handling deeply nested objects and
* arrays. It applies a regex-based replacement to all string values to
* safely escape control characters.
*
* To optimize performance, this function uses a "copy-on-write" strategy.
* It avoids allocating new objects or arrays if no nested string values
* required escaping, returning the original object reference in such cases.
*
* @param obj The JSON-like value (object, array, string, etc.) to traverse.
* @returns A new value with all nested string fields escaped, or the
* original `obj` reference if no changes were necessary.
*/
export function escapeAnsiCtrlCodes<T>(obj: T): T {
if (typeof obj === 'string') {
if (obj.search(regex) === -1) {
return obj; // No changes return original string
}
regex.lastIndex = 0; // needed for global regex
return obj.replace(regex, (match) =>
JSON.stringify(match).slice(1, -1),
) as T;
}
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
let newArr: unknown[] | null = null;
for (let i = 0; i < obj.length; i++) {
const value = obj[i];
const escapedValue = escapeAnsiCtrlCodes(value);
if (escapedValue !== value) {
if (newArr === null) {
newArr = [...obj];
}
newArr[i] = escapedValue;
}
}
return (newArr !== null ? newArr : obj) as T;
}
let newObj: T | null = null;
const keys = Object.keys(obj);
for (const key of keys) {
const value = (obj as Record<string, unknown>)[key];
const escapedValue = escapeAnsiCtrlCodes(value);
if (escapedValue !== value) {
if (newObj === null) {
newObj = { ...obj };
}
(newObj as Record<string, unknown>)[key] = escapedValue;
}
}
return newObj !== null ? newObj : obj;
}