packages/cli/src/ui/utils/textUtils.ts

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import stripAnsi from 'strip-ansi';
import ansiRegex from 'ansi-regex';
import { stripVTControlCharacters } from 'node:util';
import stringWidth from 'string-width';
import { LRUCache } from 'mnemonist';
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';

/**
 * Calculates the maximum width of a multi-line ASCII art string.
 * @param asciiArt The ASCII art string.
 * @returns The length of the longest line in the ASCII art.
 */
export const getAsciiArtWidth = (asciiArt: string): number => {
  if (!asciiArt) {
    return 0;
  }
  const lines = asciiArt.split('\n');
  return Math.max(...lines.map((line) => line.length));
};

/*
 * -------------------------------------------------------------------------
 *  Unicode‑aware helpers (work at the code‑point level rather than UTF‑16
 *  code units so that surrogate‑pair emoji count as one "column".)
 * ---------------------------------------------------------------------- */

// Cache for code points
const MAX_STRING_LENGTH_TO_CACHE = 1000;
const codePointsCache = new LRUCache<string, string[]>(
  LRU_BUFFER_PERF_CACHE_LIMIT,
);

export function toCodePoints(str: string): string[] {
  // ASCII fast path - check if all chars are ASCII (0-127)
  let isAscii = true;
  for (let i = 0; i < str.length; i++) {
    if (str.charCodeAt(i) > 127) {
      isAscii = false;
      break;
    }
  }
  if (isAscii) {
    return str.split('');
  }

  // Cache short strings
  if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
    const cached = codePointsCache.get(str);
    if (cached !== undefined) {
      return cached;
    }
  }

  const result = Array.from(str);

  // Cache result
  if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
    codePointsCache.set(str, result);
  }

  return result;
}

export function cpLen(str: string): number {
  return toCodePoints(str).length;
}

export function cpSlice(str: string, start: number, end?: number): string {
  // Slice by code‑point indices and re‑join.
  const arr = toCodePoints(str).slice(start, end);
  return arr.join('');
}

/**
 * Strip characters that can break terminal rendering.
 *
 * Uses Node.js built-in stripVTControlCharacters to handle VT sequences,
 * then filters remaining control characters that can disrupt display.
 *
 * Characters stripped:
 * - ANSI escape sequences (via strip-ansi)
 * - VT control sequences (via Node.js util.stripVTControlCharacters)
 * - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere
 * - C1 control chars (0x80-0x9F) that can cause display issues
 *
 * Characters preserved:
 * - All printable Unicode including emojis
 * - DEL (0x7F) - handled functionally by applyOperations, not a display issue
 * - CR/LF (0x0D/0x0A) - needed for line breaks
 * - TAB (0x09) - preserve tabs
 */
export function stripUnsafeCharacters(str: string): string {
  const strippedAnsi = stripAnsi(str);
  const strippedVT = stripVTControlCharacters(strippedAnsi);

  return toCodePoints(strippedVT)
    .filter((char) => {
      const code = char.codePointAt(0);
      if (code === undefined) return false;

      // Preserve CR/LF/TAB for line handling
      if (code === 0x0a || code === 0x0d || code === 0x09) return true;

      // Remove C0 control chars (except CR/LF) that can break display
      // Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C)
      if (code >= 0x00 && code <= 0x1f) return false;

      // Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes
      if (code >= 0x80 && code <= 0x9f) return false;

      // Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace
      // and doesn't cause rendering issues when displayed

      // Preserve all other characters including Unicode/emojis
      return true;
    })
    .join('');
}

/**
 * Sanitize a string for display in list-like UI components (e.g. Help, Suggestions).
 * Removes ANSI codes, collapses whitespace characters into a single space, and optionally truncates.
 */
export function sanitizeForListDisplay(
  str: string,
  maxLength?: number,
): string {
  if (!str) {
    return '';
  }

  let sanitized = stripAnsi(str).replace(/\s+/g, ' ');

  if (maxLength && sanitized.length > maxLength) {
    sanitized = sanitized.substring(0, maxLength - 3) + '...';
  }

  return sanitized;
}

const stringWidthCache = new LRUCache<string, number>(
  LRU_BUFFER_PERF_CACHE_LIMIT,
);

/**
 * Cached version of stringWidth function for better performance
 */
export const getCachedStringWidth = (str: string): number => {
  // ASCII printable chars (32-126) have width 1.
  // This is a very frequent path, so we use a fast numeric check.
  if (str.length === 1) {
    const code = str.charCodeAt(0);
    if (code >= 0x20 && code <= 0x7e) {
      return 1;
    }
  }

  const cached = stringWidthCache.get(str);
  if (cached !== undefined) {
    return cached;
  }

  let width: number;
  try {
    width = stringWidth(str);
  } catch {
    // Fallback for characters that cause string-width to crash (e.g. U+0602)
    // See: https://github.com/google-gemini/gemini-cli/issues/16418
    width = toCodePoints(stripAnsi(str)).length;
  }

  stringWidthCache.set(str, width);

  return width;
};

/**
 * Clear the string width cache
 */
export const clearStringWidthCache = (): void => {
  stringWidthCache.clear();
};

const regex = ansiRegex();

/* Recursively traverses a JSON-like structure (objects, arrays, primitives)
 * and escapes all ANSI control characters found in any string values.
 *
 * This function is designed to be robust, handling deeply nested objects and
 * arrays. It applies a regex-based replacement to all string values to
 * safely escape control characters.
 *
 * To optimize performance, this function uses a "copy-on-write" strategy.
 * It avoids allocating new objects or arrays if no nested string values
 * required escaping, returning the original object reference in such cases.
 *
 * @param obj The JSON-like value (object, array, string, etc.) to traverse.
 * @returns A new value with all nested string fields escaped, or the
 * original `obj` reference if no changes were necessary.
 */
export function escapeAnsiCtrlCodes<T>(obj: T): T {
  if (typeof obj === 'string') {
    if (obj.search(regex) === -1) {
      return obj; // No changes return original string
    }

    regex.lastIndex = 0; // needed for global regex
    return obj.replace(regex, (match) =>
      JSON.stringify(match).slice(1, -1),
    ) as T;
  }

  if (obj === null || typeof obj !== 'object') {
    return obj;
  }

  if (Array.isArray(obj)) {
    let newArr: unknown[] | null = null;

    for (let i = 0; i < obj.length; i++) {
      const value = obj[i];
      const escapedValue = escapeAnsiCtrlCodes(value);
      if (escapedValue !== value) {
        if (newArr === null) {
          newArr = [...obj];
        }
        newArr[i] = escapedValue;
      }
    }
    return (newArr !== null ? newArr : obj) as T;
  }

  let newObj: T | null = null;
  const keys = Object.keys(obj);

  for (const key of keys) {
    const value = (obj as Record<string, unknown>)[key];
    const escapedValue = escapeAnsiCtrlCodes(value);

    if (escapedValue !== value) {
      if (newObj === null) {
        newObj = { ...obj };
      }
      (newObj as Record<string, unknown>)[key] = escapedValue;
    }
  }

  return newObj !== null ? newObj : obj;
}
-												Replace logo with custom ASCII (#958)


											
										
										
											2025-06-13 00:59:45 -07:00
+								/**
 								 * @license
 								 * Copyright 2025 Google LLC
 								 * SPDX-License-Identifier: Apache-2.0
 								 */
-												feat(settings) support editing string settings. (#6732)


											
										
										
											2025-08-21 16:43:56 -07:00
+								import stripAnsi from 'strip-ansi';
-												feat(escape ansi): escape ansi ctrl codes from model output before displaying to user (#8636)


											
										
										
											2025-09-25 14:07:17 -04:00
+								import ansiRegex from 'ansi-regex';
-												chore: consistently import node modules with prefix (#3013)

Co-authored-by: N. Taylor Mullen <ntaylormullen@google.com>
											
										
										
											2025-08-25 22:11:27 +02:00
+								import { stripVTControlCharacters } from 'node:util';
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								import stringWidth from 'string-width';
-												Remove LRUCache class migrating to mnemoist (#16872)


											
										
										
											2026-01-16 13:17:31 -08:00
+								import { LRUCache } from 'mnemonist';
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
-												feat(settings) support editing string settings. (#6732)


											
										
										
											2025-08-21 16:43:56 -07:00
-												Replace logo with custom ASCII (#958)


											
										
										
											2025-06-13 00:59:45 -07:00
+								/**
 								 * Calculates the maximum width of a multi-line ASCII art string.
 								 * @param asciiArt The ASCII art string.
 								 * @returns The length of the longest line in the ASCII art.
 								 */
 								export const getAsciiArtWidth = (asciiArt: string): number => {
 								  if (!asciiArt) {
 								    return 0;
 								  }
 								  const lines = asciiArt.split('\n');
 								  return Math.max(...lines.map((line) => line.length));
 								};
-												feat: Adds shell command context to gemini history (#1076)


											
										
										
											2025-06-15 22:09:30 -04:00
-												Fix flicker issues by ensuring all actively changing content fits in the viewport (#1217)


											
										
										
											2025-06-19 20:17:23 +00:00
+								/*
 								 * -------------------------------------------------------------------------
 								 *  Unicode‑aware helpers (work at the code‑point level rather than UTF‑16
 								 *  code units so that surrogate‑pair emoji count as one "column".)
 								 * ---------------------------------------------------------------------- */
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								// Cache for code points
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								const MAX_STRING_LENGTH_TO_CACHE = 1000;
-												Remove LRUCache class migrating to mnemoist (#16872)


											
										
										
											2026-01-16 13:17:31 -08:00
+								const codePointsCache = new LRUCache<string, string[]>(
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								  LRU_BUFFER_PERF_CACHE_LIMIT,
 								);
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
-												Fix flicker issues by ensuring all actively changing content fits in the viewport (#1217)


											
										
										
											2025-06-19 20:17:23 +00:00
+								export function toCodePoints(str: string): string[] {
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								  // ASCII fast path - check if all chars are ASCII (0-127)
 								  let isAscii = true;
 								  for (let i = 0; i < str.length; i++) {
 								    if (str.charCodeAt(i) > 127) {
 								      isAscii = false;
 								      break;
 								    }
 								  }
 								  if (isAscii) {
 								    return str.split('');
 								  }
 								  // Cache short strings
 								  if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
 								    const cached = codePointsCache.get(str);
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								    if (cached !== undefined) {
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								      return cached;
 								    }
 								  }
 								  const result = Array.from(str);
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								  // Cache result
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								  if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
 								    codePointsCache.set(str, result);
 								  }
 								  return result;
-												Fix flicker issues by ensuring all actively changing content fits in the viewport (#1217)


											
										
										
											2025-06-19 20:17:23 +00:00
+								}
 								export function cpLen(str: string): number {
 								  return toCodePoints(str).length;
 								}
 								export function cpSlice(str: string, start: number, end?: number): string {
 								  // Slice by code‑point indices and re‑join.
 								  const arr = toCodePoints(str).slice(start, end);
 								  return arr.join('');
 								}
-												feat(settings) support editing string settings. (#6732)


											
										
										
											2025-08-21 16:43:56 -07:00
 								/**
 								 * Strip characters that can break terminal rendering.
 								 *
 								 * Uses Node.js built-in stripVTControlCharacters to handle VT sequences,
 								 * then filters remaining control characters that can disrupt display.
 								 *
 								 * Characters stripped:
 								 * - ANSI escape sequences (via strip-ansi)
 								 * - VT control sequences (via Node.js util.stripVTControlCharacters)
 								 * - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere
 								 * - C1 control chars (0x80-0x9F) that can cause display issues
 								 *
 								 * Characters preserved:
 								 * - All printable Unicode including emojis
 								 * - DEL (0x7F) - handled functionally by applyOperations, not a display issue
 								 * - CR/LF (0x0D/0x0A) - needed for line breaks
-												Preserve tabs on paste (#12735)


											
										
										
											2025-11-10 07:37:43 -08:00
+								 * - TAB (0x09) - preserve tabs
-												feat(settings) support editing string settings. (#6732)


											
										
										
											2025-08-21 16:43:56 -07:00
+								 */
 								export function stripUnsafeCharacters(str: string): string {
 								  const strippedAnsi = stripAnsi(str);
 								  const strippedVT = stripVTControlCharacters(strippedAnsi);
 								  return toCodePoints(strippedVT)
 								    .filter((char) => {
 								      const code = char.codePointAt(0);
 								      if (code === undefined) return false;
-												Preserve tabs on paste (#12735)


											
										
										
											2025-11-10 07:37:43 -08:00
+								      // Preserve CR/LF/TAB for line handling
 								      if (code === 0x0a || code === 0x0d || code === 0x09) return true;
-												feat(settings) support editing string settings. (#6732)


											
										
										
											2025-08-21 16:43:56 -07:00
 								      // Remove C0 control chars (except CR/LF) that can break display
 								      // Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C)
 								      if (code >= 0x00 && code <= 0x1f) return false;
 								      // Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes
 								      if (code >= 0x80 && code <= 0x9f) return false;
 								      // Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace
 								      // and doesn't cause rendering issues when displayed
 								      // Preserve all other characters including Unicode/emojis
 								      return true;
 								    })
 								    .join('');
 								}
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
-												Sanitize command names and descriptions (#17228)


											
										
										
											2026-01-22 11:41:51 -05:00
+								/**
 								 * Sanitize a string for display in list-like UI components (e.g. Help, Suggestions).
 								 * Removes ANSI codes, collapses whitespace characters into a single space, and optionally truncates.
 								 */
 								export function sanitizeForListDisplay(
 								  str: string,
 								  maxLength?: number,
 								): string {
 								  if (!str) {
 								    return '';
 								  }
 								  let sanitized = stripAnsi(str).replace(/\s+/g, ' ');
 								  if (maxLength && sanitized.length > maxLength) {
 								    sanitized = sanitized.substring(0, maxLength - 3) + '...';
 								  }
 								  return sanitized;
 								}
-												Remove LRUCache class migrating to mnemoist (#16872)


											
										
										
											2026-01-16 13:17:31 -08:00
+								const stringWidthCache = new LRUCache<string, number>(
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								  LRU_BUFFER_PERF_CACHE_LIMIT,
 								);
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
 								/**
 								 * Cached version of stringWidth function for better performance
 								 */
 								export const getCachedStringWidth = (str: string): number => {
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								  // ASCII printable chars (32-126) have width 1.
 								  // This is a very frequent path, so we use a fast numeric check.
 								  if (str.length === 1) {
 								    const code = str.charCodeAt(0);
 								    if (code >= 0x20 && code <= 0x7e) {
 								      return 1;
 								    }
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								  }
-												perf(ui): optimize text buffer and highlighting for large inputs (#16782)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2026-01-16 09:33:13 -08:00
+								  const cached = stringWidthCache.get(str);
 								  if (cached !== undefined) {
 								    return cached;
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								  }
-												Fix crash on unicode character (#16420)


											
										
										
											2026-01-12 11:52:19 -05:00
+								  let width: number;
 								  try {
 								    width = stringWidth(str);
 								  } catch {
 								    // Fallback for characters that cause string-width to crash (e.g. U+0602)
 								    // See: https://github.com/google-gemini/gemini-cli/issues/16418
 								    width = toCodePoints(stripAnsi(str)).length;
 								  }
-												feat: add cached string width function for performance optimization (#7850)

Co-authored-by: lifeloating <imshuazi@126.com>
											
										
										
											2025-09-10 21:20:40 -07:00
+								  stringWidthCache.set(str, width);
 								  return width;
 								};
 								/**
 								 * Clear the string width cache
 								 */
 								export const clearStringWidthCache = (): void => {
 								  stringWidthCache.clear();
 								};
-												feat(escape ansi): escape ansi ctrl codes from model output before displaying to user (#8636)


											
										
										
											2025-09-25 14:07:17 -04:00
 								const regex = ansiRegex();
 								/* Recursively traverses a JSON-like structure (objects, arrays, primitives)
 								 * and escapes all ANSI control characters found in any string values.
 								 *
 								 * This function is designed to be robust, handling deeply nested objects and
 								 * arrays. It applies a regex-based replacement to all string values to
 								 * safely escape control characters.
 								 *
 								 * To optimize performance, this function uses a "copy-on-write" strategy.
 								 * It avoids allocating new objects or arrays if no nested string values
 								 * required escaping, returning the original object reference in such cases.
 								 *
 								 * @param obj The JSON-like value (object, array, string, etc.) to traverse.
 								 * @returns A new value with all nested string fields escaped, or the
 								 * original `obj` reference if no changes were necessary.
 								 */
 								export function escapeAnsiCtrlCodes<T>(obj: T): T {
 								  if (typeof obj === 'string') {
 								    if (obj.search(regex) === -1) {
 								      return obj; // No changes return original string
 								    }
 								    regex.lastIndex = 0; // needed for global regex
 								    return obj.replace(regex, (match) =>
 								      JSON.stringify(match).slice(1, -1),
 								    ) as T;
 								  }
 								  if (obj === null || typeof obj !== 'object') {
 								    return obj;
 								  }
 								  if (Array.isArray(obj)) {
 								    let newArr: unknown[] | null = null;
 								    for (let i = 0; i < obj.length; i++) {
 								      const value = obj[i];
 								      const escapedValue = escapeAnsiCtrlCodes(value);
 								      if (escapedValue !== value) {
 								        if (newArr === null) {
 								          newArr = [...obj];
 								        }
 								        newArr[i] = escapedValue;
 								      }
 								    }
 								    return (newArr !== null ? newArr : obj) as T;
 								  }
 								  let newObj: T | null = null;
 								  const keys = Object.keys(obj);
 								  for (const key of keys) {
 								    const value = (obj as Record<string, unknown>)[key];
 								    const escapedValue = escapeAnsiCtrlCodes(value);
 								    if (escapedValue !== value) {
 								      if (newObj === null) {
 								        newObj = { ...obj };
 								      }
 								      (newObj as Record<string, unknown>)[key] = escapedValue;
 								    }
 								  }
 								  return newObj !== null ? newObj : obj;
 								}