2025-06-13 00:59:45 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* @license
|
|
|
|
|
|
* Copyright 2025 Google LLC
|
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2025-08-21 16:43:56 -07:00
|
|
|
|
import stripAnsi from 'strip-ansi';
|
2025-09-25 14:07:17 -04:00
|
|
|
|
import ansiRegex from 'ansi-regex';
|
2025-08-25 22:11:27 +02:00
|
|
|
|
import { stripVTControlCharacters } from 'node:util';
|
2025-09-10 21:20:40 -07:00
|
|
|
|
import stringWidth from 'string-width';
|
2026-01-16 13:17:31 -08:00
|
|
|
|
import { LRUCache } from 'mnemonist';
|
2026-01-16 09:33:13 -08:00
|
|
|
|
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
|
2025-08-21 16:43:56 -07:00
|
|
|
|
|
2025-06-13 00:59:45 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* Calculates the maximum width of a multi-line ASCII art string.
|
|
|
|
|
|
* @param asciiArt The ASCII art string.
|
|
|
|
|
|
* @returns The length of the longest line in the ASCII art.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export const getAsciiArtWidth = (asciiArt: string): number => {
|
|
|
|
|
|
if (!asciiArt) {
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
const lines = asciiArt.split('\n');
|
|
|
|
|
|
return Math.max(...lines.map((line) => line.length));
|
|
|
|
|
|
};
|
2025-06-15 22:09:30 -04:00
|
|
|
|
|
2025-06-19 20:17:23 +00:00
|
|
|
|
/*
|
|
|
|
|
|
* -------------------------------------------------------------------------
|
|
|
|
|
|
* Unicode‑aware helpers (work at the code‑point level rather than UTF‑16
|
|
|
|
|
|
* code units so that surrogate‑pair emoji count as one "column".)
|
|
|
|
|
|
* ---------------------------------------------------------------------- */
|
|
|
|
|
|
|
2026-01-16 09:33:13 -08:00
|
|
|
|
// Cache for code points
|
2025-09-10 21:20:40 -07:00
|
|
|
|
const MAX_STRING_LENGTH_TO_CACHE = 1000;
|
2026-01-16 13:17:31 -08:00
|
|
|
|
const codePointsCache = new LRUCache<string, string[]>(
|
2026-01-16 09:33:13 -08:00
|
|
|
|
LRU_BUFFER_PERF_CACHE_LIMIT,
|
|
|
|
|
|
);
|
2025-09-10 21:20:40 -07:00
|
|
|
|
|
2025-06-19 20:17:23 +00:00
|
|
|
|
export function toCodePoints(str: string): string[] {
|
2025-09-10 21:20:40 -07:00
|
|
|
|
// ASCII fast path - check if all chars are ASCII (0-127)
|
|
|
|
|
|
let isAscii = true;
|
|
|
|
|
|
for (let i = 0; i < str.length; i++) {
|
|
|
|
|
|
if (str.charCodeAt(i) > 127) {
|
|
|
|
|
|
isAscii = false;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (isAscii) {
|
|
|
|
|
|
return str.split('');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Cache short strings
|
|
|
|
|
|
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
|
|
|
|
|
|
const cached = codePointsCache.get(str);
|
2026-01-16 09:33:13 -08:00
|
|
|
|
if (cached !== undefined) {
|
2025-09-10 21:20:40 -07:00
|
|
|
|
return cached;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const result = Array.from(str);
|
|
|
|
|
|
|
2026-01-16 09:33:13 -08:00
|
|
|
|
// Cache result
|
2025-09-10 21:20:40 -07:00
|
|
|
|
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
|
|
|
|
|
|
codePointsCache.set(str, result);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return result;
|
2025-06-19 20:17:23 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export function cpLen(str: string): number {
|
|
|
|
|
|
return toCodePoints(str).length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export function cpSlice(str: string, start: number, end?: number): string {
|
|
|
|
|
|
// Slice by code‑point indices and re‑join.
|
|
|
|
|
|
const arr = toCodePoints(str).slice(start, end);
|
|
|
|
|
|
return arr.join('');
|
|
|
|
|
|
}
|
2025-08-21 16:43:56 -07:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Strip characters that can break terminal rendering.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Uses Node.js built-in stripVTControlCharacters to handle VT sequences,
|
|
|
|
|
|
* then filters remaining control characters that can disrupt display.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Characters stripped:
|
|
|
|
|
|
* - ANSI escape sequences (via strip-ansi)
|
|
|
|
|
|
* - VT control sequences (via Node.js util.stripVTControlCharacters)
|
|
|
|
|
|
* - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere
|
|
|
|
|
|
* - C1 control chars (0x80-0x9F) that can cause display issues
|
|
|
|
|
|
*
|
|
|
|
|
|
* Characters preserved:
|
|
|
|
|
|
* - All printable Unicode including emojis
|
|
|
|
|
|
* - DEL (0x7F) - handled functionally by applyOperations, not a display issue
|
|
|
|
|
|
* - CR/LF (0x0D/0x0A) - needed for line breaks
|
2025-11-10 07:37:43 -08:00
|
|
|
|
* - TAB (0x09) - preserve tabs
|
2025-08-21 16:43:56 -07:00
|
|
|
|
*/
|
|
|
|
|
|
export function stripUnsafeCharacters(str: string): string {
|
|
|
|
|
|
const strippedAnsi = stripAnsi(str);
|
|
|
|
|
|
const strippedVT = stripVTControlCharacters(strippedAnsi);
|
|
|
|
|
|
|
|
|
|
|
|
return toCodePoints(strippedVT)
|
|
|
|
|
|
.filter((char) => {
|
|
|
|
|
|
const code = char.codePointAt(0);
|
|
|
|
|
|
if (code === undefined) return false;
|
|
|
|
|
|
|
2025-11-10 07:37:43 -08:00
|
|
|
|
// Preserve CR/LF/TAB for line handling
|
|
|
|
|
|
if (code === 0x0a || code === 0x0d || code === 0x09) return true;
|
2025-08-21 16:43:56 -07:00
|
|
|
|
|
|
|
|
|
|
// Remove C0 control chars (except CR/LF) that can break display
|
|
|
|
|
|
// Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C)
|
|
|
|
|
|
if (code >= 0x00 && code <= 0x1f) return false;
|
|
|
|
|
|
|
|
|
|
|
|
// Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes
|
|
|
|
|
|
if (code >= 0x80 && code <= 0x9f) return false;
|
|
|
|
|
|
|
|
|
|
|
|
// Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace
|
|
|
|
|
|
// and doesn't cause rendering issues when displayed
|
|
|
|
|
|
|
|
|
|
|
|
// Preserve all other characters including Unicode/emojis
|
|
|
|
|
|
return true;
|
|
|
|
|
|
})
|
|
|
|
|
|
.join('');
|
|
|
|
|
|
}
|
2025-09-10 21:20:40 -07:00
|
|
|
|
|
2026-01-22 11:41:51 -05:00
|
|
|
|
/**
|
|
|
|
|
|
* Sanitize a string for display in list-like UI components (e.g. Help, Suggestions).
|
|
|
|
|
|
* Removes ANSI codes, collapses whitespace characters into a single space, and optionally truncates.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export function sanitizeForListDisplay(
|
|
|
|
|
|
str: string,
|
|
|
|
|
|
maxLength?: number,
|
|
|
|
|
|
): string {
|
|
|
|
|
|
if (!str) {
|
|
|
|
|
|
return '';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let sanitized = stripAnsi(str).replace(/\s+/g, ' ');
|
|
|
|
|
|
|
|
|
|
|
|
if (maxLength && sanitized.length > maxLength) {
|
|
|
|
|
|
sanitized = sanitized.substring(0, maxLength - 3) + '...';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return sanitized;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-16 13:17:31 -08:00
|
|
|
|
const stringWidthCache = new LRUCache<string, number>(
|
2026-01-16 09:33:13 -08:00
|
|
|
|
LRU_BUFFER_PERF_CACHE_LIMIT,
|
|
|
|
|
|
);
|
2025-09-10 21:20:40 -07:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Cached version of stringWidth function for better performance
|
|
|
|
|
|
*/
|
|
|
|
|
|
export const getCachedStringWidth = (str: string): number => {
|
2026-01-16 09:33:13 -08:00
|
|
|
|
// ASCII printable chars (32-126) have width 1.
|
|
|
|
|
|
// This is a very frequent path, so we use a fast numeric check.
|
|
|
|
|
|
if (str.length === 1) {
|
|
|
|
|
|
const code = str.charCodeAt(0);
|
|
|
|
|
|
if (code >= 0x20 && code <= 0x7e) {
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
}
|
2025-09-10 21:20:40 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-16 09:33:13 -08:00
|
|
|
|
const cached = stringWidthCache.get(str);
|
|
|
|
|
|
if (cached !== undefined) {
|
|
|
|
|
|
return cached;
|
2025-09-10 21:20:40 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-12 11:52:19 -05:00
|
|
|
|
let width: number;
|
|
|
|
|
|
try {
|
|
|
|
|
|
width = stringWidth(str);
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
// Fallback for characters that cause string-width to crash (e.g. U+0602)
|
|
|
|
|
|
// See: https://github.com/google-gemini/gemini-cli/issues/16418
|
|
|
|
|
|
width = toCodePoints(stripAnsi(str)).length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-09-10 21:20:40 -07:00
|
|
|
|
stringWidthCache.set(str, width);
|
|
|
|
|
|
|
|
|
|
|
|
return width;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Clear the string width cache
|
|
|
|
|
|
*/
|
|
|
|
|
|
export const clearStringWidthCache = (): void => {
|
|
|
|
|
|
stringWidthCache.clear();
|
|
|
|
|
|
};
|
2025-09-25 14:07:17 -04:00
|
|
|
|
|
|
|
|
|
|
const regex = ansiRegex();
|
|
|
|
|
|
|
|
|
|
|
|
/* Recursively traverses a JSON-like structure (objects, arrays, primitives)
|
|
|
|
|
|
* and escapes all ANSI control characters found in any string values.
|
|
|
|
|
|
*
|
|
|
|
|
|
* This function is designed to be robust, handling deeply nested objects and
|
|
|
|
|
|
* arrays. It applies a regex-based replacement to all string values to
|
|
|
|
|
|
* safely escape control characters.
|
|
|
|
|
|
*
|
|
|
|
|
|
* To optimize performance, this function uses a "copy-on-write" strategy.
|
|
|
|
|
|
* It avoids allocating new objects or arrays if no nested string values
|
|
|
|
|
|
* required escaping, returning the original object reference in such cases.
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param obj The JSON-like value (object, array, string, etc.) to traverse.
|
|
|
|
|
|
* @returns A new value with all nested string fields escaped, or the
|
|
|
|
|
|
* original `obj` reference if no changes were necessary.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export function escapeAnsiCtrlCodes<T>(obj: T): T {
|
|
|
|
|
|
if (typeof obj === 'string') {
|
|
|
|
|
|
if (obj.search(regex) === -1) {
|
|
|
|
|
|
return obj; // No changes return original string
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
regex.lastIndex = 0; // needed for global regex
|
|
|
|
|
|
return obj.replace(regex, (match) =>
|
|
|
|
|
|
JSON.stringify(match).slice(1, -1),
|
|
|
|
|
|
) as T;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (obj === null || typeof obj !== 'object') {
|
|
|
|
|
|
return obj;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (Array.isArray(obj)) {
|
|
|
|
|
|
let newArr: unknown[] | null = null;
|
|
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < obj.length; i++) {
|
|
|
|
|
|
const value = obj[i];
|
|
|
|
|
|
const escapedValue = escapeAnsiCtrlCodes(value);
|
|
|
|
|
|
if (escapedValue !== value) {
|
|
|
|
|
|
if (newArr === null) {
|
|
|
|
|
|
newArr = [...obj];
|
|
|
|
|
|
}
|
|
|
|
|
|
newArr[i] = escapedValue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return (newArr !== null ? newArr : obj) as T;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let newObj: T | null = null;
|
|
|
|
|
|
const keys = Object.keys(obj);
|
|
|
|
|
|
|
|
|
|
|
|
for (const key of keys) {
|
|
|
|
|
|
const value = (obj as Record<string, unknown>)[key];
|
|
|
|
|
|
const escapedValue = escapeAnsiCtrlCodes(value);
|
|
|
|
|
|
|
|
|
|
|
|
if (escapedValue !== value) {
|
|
|
|
|
|
if (newObj === null) {
|
|
|
|
|
|
newObj = { ...obj };
|
|
|
|
|
|
}
|
|
|
|
|
|
(newObj as Record<string, unknown>)[key] = escapedValue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return newObj !== null ? newObj : obj;
|
|
|
|
|
|
}
|