gemini-cli/packages/core/src/voice/responseFormatter.ts

/**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

/**
 * Options for formatForSpeech().
 */
export interface FormatForSpeechOptions {
  /**
   * Maximum output length in characters before truncating.
   * @default 500
   */
  maxLength?: number;
  /**
   * Number of trailing path segments to keep when abbreviating absolute paths.
   * @default 3
   */
  pathDepth?: number;
  /**
   * Maximum number of characters in a JSON value before summarising it.
   * @default 80
   */
  jsonThreshold?: number;
}

// ANSI escape sequences (CSI, OSC, etc.)
// eslint-disable-next-line no-control-regex
const ANSI_RE = /\x1b(?:\[[0-9;]*[mGKHF]|\][^\x07\x1b]*\x07|[()][AB012])/g;

// Fenced code blocks  ```lang\n...\n```
const CODE_FENCE_RE = /```[^\n]*\n([\s\S]*?)```/g;

// Inline code  `...`
const INLINE_CODE_RE = /`([^`]+)`/g;

// Bold/italic markers  **text**, *text*, __text__, _text_
// Exclude newlines so the pattern cannot span multiple lines and accidentally
// consume list markers that haven't been stripped yet.
const BOLD_ITALIC_RE = /\*{1,2}([^*\n]+)\*{1,2}|_{1,2}([^_\n]+)_{1,2}/g;

// Blockquote prefix  "> "
const BLOCKQUOTE_RE = /^>\s?/gm;

// ATX headings  # heading
const HEADING_RE = /^#{1,6}\s+/gm;

// Markdown links  [text](url)
const LINK_RE = /\[([^\]]+)\]\([^)]+\)/g;

// Markdown list markers  "- " or "* " or "N. " at line start
const LIST_MARKER_RE = /^[ \t]*(?:[-*]|\d+\.)\s+/gm;

// Two or more consecutive stack-trace frames (Node.js style "    at …" lines).
// Matching blocks of ≥2 lets us replace each group in-place, preserving any
// text that follows the trace rather than appending it to the end.
const STACK_BLOCK_RE = /(?:^[ \t]+at [^\n]+(?:\n|$)){2,}/gm;

// Absolute Unix paths optionally ending with :line or :line:col
// Hyphen placed at start of char class to avoid useless-escape lint error
const UNIX_PATH_RE =
  /(?:^|(?<=\s|[(`"']))(\/[-\w.@]+(?:\/[-\w.@]+)*)(:\d+(?::\d+)?)?/g;

// Absolute Windows paths  C:\...  or  C:/...  (any drive letter)
const WIN_PATH_RE =
  /(?:^|(?<=\s|[(`"']))([A-Za-z]:[/\\][-\w. ]+(?:[/\\][-\w. ]+)*)(:\d+(?::\d+)?)?/g;

/**
 * Abbreviates an absolute path to at most `depth` trailing segments,
 * prefixed with "…". Optionally converts `:line` suffix to `line N`.
 */
function abbreviatePath(
  full: string,
  suffix: string | undefined,
  depth: number,
): string {
  const segments = full.split(/[/\\]/).filter(Boolean);
  const kept = segments.length > depth ? segments.slice(-depth) : segments;
  const abbreviated =
    segments.length > depth ? `\u2026/${kept.join('/')}` : full;

  if (!suffix) return abbreviated;
  // Convert ":142" → " line 142", ":142:7" → " line 142"
  const lineNum = suffix.split(':').filter(Boolean)[0];
  return `${abbreviated} line ${lineNum}`;
}

/**
 * Summarises a JSON string as "(JSON object with N keys)" or
 * "(JSON array with N items)", falling back to the original if parsing fails.
 */
function summariseJson(jsonStr: string): string {
  try {
    const parsed: unknown = JSON.parse(jsonStr);
    if (Array.isArray(parsed)) {
      return `(JSON array with ${parsed.length} item${parsed.length === 1 ? '' : 's'})`;
    }
    if (parsed !== null && typeof parsed === 'object') {
      const keys = Object.keys(parsed).length;
      return `(JSON object with ${keys} key${keys === 1 ? '' : 's'})`;
    }
  } catch {
    // not valid JSON — leave as-is
  }
  return jsonStr;
}

/**
 * Transforms a markdown/ANSI-formatted string into speech-ready plain text.
 *
 * Transformations applied (in order):
 * 1. Strip ANSI escape codes
 * 2. Collapse fenced code blocks to their content (or a JSON summary)
 * 3. Collapse stack traces to first frame + count
 * 4. Strip markdown syntax (bold, italic, blockquotes, headings, links, lists, inline code)
 * 5. Abbreviate deep absolute paths
 * 6. Normalise whitespace
 * 7. Truncate to maxLength
 */
export function formatForSpeech(
  text: string,
  options?: FormatForSpeechOptions,
): string {
  const maxLength = options?.maxLength ?? 500;
  const pathDepth = options?.pathDepth ?? 3;
  const jsonThreshold = options?.jsonThreshold ?? 80;

  if (!text) return '';

  let out = text;

  // 1. Strip ANSI escape codes
  out = out.replace(ANSI_RE, '');

  // 2. Fenced code blocks — try to summarise JSON content, else keep text
  out = out.replace(CODE_FENCE_RE, (_match, body: string) => {
    const trimmed = body.trim();
    if (trimmed.length > jsonThreshold) {
      const summary = summariseJson(trimmed);
      if (summary !== trimmed) return summary;
    }
    return trimmed;
  });

  // 3. Collapse stack traces: replace each contiguous block of ≥2 frames
  //    in-place so that any text after the trace is preserved in order.
  out = out.replace(STACK_BLOCK_RE, (block) => {
    const lines = block
      .trim()
      .split('\n')
      .map((l) => l.trim());
    const rest = lines.length - 1;
    return `${lines[0]} (and ${rest} more frame${rest === 1 ? '' : 's'})\n`;
  });

  // 4. Strip markdown syntax
  out = out
    .replace(INLINE_CODE_RE, '$1')
    .replace(BOLD_ITALIC_RE, (_m, g1?: string, g2?: string) => g1 ?? g2 ?? '')
    .replace(BLOCKQUOTE_RE, '')
    .replace(HEADING_RE, '')
    .replace(LINK_RE, '$1')
    .replace(LIST_MARKER_RE, '');

  // 5. Abbreviate absolute paths
  //    Windows paths first to avoid the leading letter being caught by Unix RE
  out = out.replace(WIN_PATH_RE, (_m, full: string, suffix?: string) =>
    abbreviatePath(full, suffix, pathDepth),
  );
  out = out.replace(UNIX_PATH_RE, (_m, full: string, suffix?: string) =>
    abbreviatePath(full, suffix, pathDepth),
  );

  // 6. Normalise whitespace: collapse multiple blank lines, trim
  out = out.replace(/\n{3,}/g, '\n\n').trim();

  // 7. Truncate
  if (out.length > maxLength) {
    const total = out.length;
    out = out.slice(0, maxLength).trimEnd() + `\u2026 (${total} chars total)`;
  }

  return out;
}