fix(cli): render LaTeX-style output as Unicode in the TUI (#25802)

Co-authored-by: cynthialong0-0 <82900738+cynthialong0-0@users.noreply.github.com>
2026-05-13 13:22:35 -07:00 · 2026-05-04 23:35:06 +05:30
parent 0da1a2026a
commit 77f4be1f3d
4 changed files with 990 additions and 1 deletions
@@ -0,0 +1,304 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { convertLatexToUnicode } from './latexToUnicode.js';
+
+describe('convertLatexToUnicode', () => {
+  describe('fast path', () => {
+    it('returns empty string unchanged', () => {
+      expect(convertLatexToUnicode('')).toBe('');
+    });
+
+    it('returns text without backslash or dollar unchanged', () => {
+      const input = 'hello world 123';
+      expect(convertLatexToUnicode(input)).toBe(input);
+    });
+
+    it('short-circuits plain ASCII identically', () => {
+      const input = 'The quick brown fox jumps over the lazy dog.';
+      expect(convertLatexToUnicode(input)).toBe(input);
+    });
+  });
+
+  describe('issue #25656 examples', () => {
+    it('converts the set-of-processes example', () => {
+      const input = 'A set of processes $\\{P_0, P_1, \\dots, P_n\\}$ exists';
+      expect(convertLatexToUnicode(input)).toBe(
+        'A set of processes {P₀, P₁, …, Pₙ} exists',
+      );
+    });
+
+    it('converts the deadlock arrow example', () => {
+      const input = 'If the graph contains no cycles $\\to$ No Deadlock.';
+      expect(convertLatexToUnicode(input)).toBe(
+        'If the graph contains no cycles → No Deadlock.',
+      );
+    });
+  });
+
+  describe('math delimiters', () => {
+    it('strips $...$ when the content contains LaTeX markers', () => {
+      expect(convertLatexToUnicode('see $\\alpha$ here')).toBe('see α here');
+    });
+
+    it('strips $...$ around single variables', () => {
+      expect(convertLatexToUnicode('let $x$ be a value')).toBe(
+        'let x be a value',
+      );
+    });
+
+    it('strips $$...$$ display math', () => {
+      expect(convertLatexToUnicode('$$\\alpha + \\beta$$')).toBe('α + β');
+    });
+
+    it('leaves currency $5.99 alone', () => {
+      expect(convertLatexToUnicode('It costs $5.99 total')).toBe(
+        'It costs $5.99 total',
+      );
+    });
+
+    it('leaves two dollar amounts alone', () => {
+      // The regex matches `$5 to $` as a pair, but the inner content is
+      // neither mathy nor purely variables, so it is left intact.
+      expect(convertLatexToUnicode('prices range $5 to $10')).toBe(
+        'prices range $5 to $10',
+      );
+    });
+
+    it('leaves shell-style $ interpolation alone', () => {
+      expect(convertLatexToUnicode('echo $USER $HOME')).toBe(
+        'echo $USER $HOME',
+      );
+    });
+
+    it('does not strip dollars across newlines', () => {
+      expect(convertLatexToUnicode('price $5\nfee $3')).toBe(
+        'price $5\nfee $3',
+      );
+    });
+  });
+
+  describe('greek letters', () => {
+    it('converts lowercase greek', () => {
+      expect(convertLatexToUnicode('\\alpha \\beta \\gamma')).toBe('α β γ');
+    });
+
+    it('converts uppercase greek', () => {
+      expect(convertLatexToUnicode('\\Omega \\Delta')).toBe('Ω Δ');
+    });
+
+    it('does not mangle a prefix match', () => {
+      // `\alphabet` is not a known command — must stay intact.
+      expect(convertLatexToUnicode('\\alphabet')).toBe('\\alphabet');
+    });
+  });
+
+  describe('named commands', () => {
+    it('converts arrows', () => {
+      expect(convertLatexToUnicode('\\to \\rightarrow \\Rightarrow')).toBe(
+        '→ → ⇒',
+      );
+    });
+
+    it('converts relations', () => {
+      expect(convertLatexToUnicode('\\leq \\geq \\neq \\approx')).toBe(
+        '≤ ≥ ≠ ≈',
+      );
+    });
+
+    it('converts set theory', () => {
+      expect(convertLatexToUnicode('\\in \\notin \\cup \\cap')).toBe('∈ ∉ ∪ ∩');
+    });
+
+    it('converts logic', () => {
+      expect(convertLatexToUnicode('\\forall x \\exists y')).toBe('∀ x ∃ y');
+    });
+
+    it('converts large operators', () => {
+      expect(convertLatexToUnicode('\\sum \\prod \\int')).toBe('∑ ∏ ∫');
+    });
+
+    it('converts ellipses', () => {
+      expect(convertLatexToUnicode('a, b, \\dots, z')).toBe('a, b, …, z');
+    });
+
+    it('converts infty', () => {
+      expect(convertLatexToUnicode('\\infty')).toBe('∞');
+    });
+
+    it('leaves unknown commands untouched', () => {
+      expect(convertLatexToUnicode('\\thisIsNotReal')).toBe('\\thisIsNotReal');
+    });
+  });
+
+  describe('escaped specials', () => {
+    it('unescapes braces and underscore', () => {
+      expect(convertLatexToUnicode('\\{ \\} \\_')).toBe('{ } _');
+    });
+
+    it('unescapes percent, ampersand, hash, dollar, pipe', () => {
+      expect(convertLatexToUnicode('\\% \\& \\# \\$ \\|')).toBe('% & # $ |');
+    });
+
+    it('unescapes backslash-space as a regular space', () => {
+      expect(convertLatexToUnicode('word\\ boundary')).toBe('word boundary');
+    });
+
+    it('converts \\\\ to a newline inside math mode', () => {
+      // `\\` is a LaTeX line break in math/tabular contexts. Only convert
+      // inside `$...$` — outside math this would mangle Windows UNC paths
+      // (`\\server\share`) and escaped backslashes in code-like prose.
+      expect(convertLatexToUnicode('$a\\\\b$')).toBe('a\nb');
+    });
+
+    it('leaves \\\\ alone outside math mode', () => {
+      expect(convertLatexToUnicode('line1\\\\line2')).toBe('line1\\\\line2');
+    });
+  });
+
+  describe('text formatting', () => {
+    it('wraps textbf in markdown bold', () => {
+      expect(convertLatexToUnicode('\\textbf{hello}')).toBe('**hello**');
+    });
+
+    it('wraps textit in markdown italic', () => {
+      expect(convertLatexToUnicode('\\textit{hello}')).toBe('*hello*');
+    });
+
+    it('strips \\text wrapper', () => {
+      expect(convertLatexToUnicode('\\text{plain}')).toBe('plain');
+    });
+
+    it('strips \\mathrm', () => {
+      expect(convertLatexToUnicode('\\mathrm{foo}')).toBe('foo');
+    });
+
+    it('handles \\emph as italic', () => {
+      expect(convertLatexToUnicode('\\emph{emphasized}')).toBe('*emphasized*');
+    });
+  });
+
+  describe('fractions and roots', () => {
+    it('converts \\frac', () => {
+      expect(convertLatexToUnicode('\\frac{a}{b}')).toBe('(a)/(b)');
+    });
+
+    it('converts \\sqrt', () => {
+      expect(convertLatexToUnicode('\\sqrt{x}')).toBe('√(x)');
+    });
+
+    it('converts \\sqrt with index', () => {
+      expect(convertLatexToUnicode('\\sqrt[3]{x}')).toBe('3√(x)');
+    });
+
+    it('converts \\frac combined with greek', () => {
+      expect(convertLatexToUnicode('\\frac{\\alpha}{\\beta}')).toBe('(α)/(β)');
+    });
+  });
+
+  describe('subscripts and superscripts', () => {
+    // Sub/superscripts are only applied inside math delimiters to avoid
+    // mangling identifiers like `file_name` and `foo_bar` in regular prose.
+    it('converts digit subscripts inside math', () => {
+      expect(convertLatexToUnicode('$x_0 + x_1 + x_2$')).toBe('x₀ + x₁ + x₂');
+    });
+
+    it('converts digit superscripts inside math', () => {
+      expect(convertLatexToUnicode('$E = mc^2$')).toBe('E = mc²');
+    });
+
+    it('converts letter subscripts where available', () => {
+      expect(convertLatexToUnicode('$P_n$ and $x_i$')).toBe('Pₙ and xᵢ');
+    });
+
+    it('converts braced digit subscripts', () => {
+      expect(convertLatexToUnicode('$x_{12}$')).toBe('x₁₂');
+    });
+
+    it('leaves subscripts with no unicode mapping alone', () => {
+      // `q` has no subscript glyph in Unicode — leave the whole operand
+      // untouched to avoid inconsistent-looking output.
+      expect(convertLatexToUnicode('$x_{abq}$')).toBe('x_{abq}');
+    });
+
+    it('does not subscript identifiers in prose', () => {
+      // Outside math delimiters, `_` is left alone entirely so that
+      // snake_case identifiers and file paths render correctly. This is a
+      // deliberate trade-off against model output that emits subscripts
+      // unwrapped.
+      expect(convertLatexToUnicode('the file_name variable')).toBe(
+        'the file_name variable',
+      );
+      expect(convertLatexToUnicode('_private')).toBe('_private');
+    });
+
+    it('does not superscript when character is unmapped in sup', () => {
+      // `^Q` — Q has no superscript. The regex only matches when the char is
+      // in the map; leave as-is even inside math.
+      expect(convertLatexToUnicode('$x^Q$')).toBe('x^Q');
+    });
+
+    it('leaves bare x_0 alone outside math', () => {
+      // Deliberate: we cannot tell `P_0` (subscript) from `my_0` (identifier)
+      // in arbitrary prose, so prefer to preserve identifiers.
+      expect(convertLatexToUnicode('x_0 is fine')).toBe('x_0 is fine');
+    });
+  });
+
+  describe('protection of non-LaTeX content', () => {
+    it('leaves Windows paths alone', () => {
+      expect(convertLatexToUnicode('C:\\Users\\foo\\bar')).toBe(
+        'C:\\Users\\foo\\bar',
+      );
+    });
+
+    it('leaves Windows UNC paths alone (no line-break rewrite in prose)', () => {
+      // `\\server\share\file` must NOT be rewritten to a newline. Line-break
+      // conversion is restricted to math mode. See PR #25802.
+      expect(convertLatexToUnicode('\\\\server\\share\\file')).toBe(
+        '\\\\server\\share\\file',
+      );
+    });
+
+    it('leaves regex backslash escapes alone', () => {
+      expect(convertLatexToUnicode('\\d+\\w*')).toBe('\\d+\\w*');
+    });
+
+    it('leaves $ in code-like prose alone', () => {
+      expect(convertLatexToUnicode('run $(command)$ to see output')).toBe(
+        'run $(command)$ to see output',
+      );
+    });
+  });
+
+  describe('combined scenarios', () => {
+    it('handles complex math in prose', () => {
+      const input =
+        'The complexity is $O(n \\log n)$ for sorting $n$ elements.';
+      expect(convertLatexToUnicode(input)).toBe(
+        'The complexity is O(n log n) for sorting n elements.',
+      );
+    });
+
+    it('handles multiple constructs in one line', () => {
+      const input = 'Let $\\alpha \\in \\mathbb{R}$ and $\\beta \\geq 0$.';
+      expect(convertLatexToUnicode(input)).toBe('Let α ∈ R and β ≥ 0.');
+    });
+
+    it('preserves surrounding text exactly', () => {
+      const input = 'Before $\\to$ after.';
+      expect(convertLatexToUnicode(input)).toBe('Before → after.');
+    });
+
+    it('idempotency — running twice yields the same result', () => {
+      const input = '$\\{P_0, \\dots, P_n\\}$';
+      const once = convertLatexToUnicode(input);
+      const twice = convertLatexToUnicode(once);
+      expect(twice).toBe(once);
+    });
+  });
+});
@@ -0,0 +1,599 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Converts common LaTeX-style syntax in model output into terminal-friendly
+ * Unicode (and lightweight markdown where appropriate).
+ *
+ * Terminals cannot natively render LaTeX, but model responses — especially for
+ * math, CS, and algorithms — frequently include constructs like `$\{P_0,
+ * \dots, P_n\}$` or `$\to$`. Left as-is, the raw backslash commands show up
+ * verbatim and make the output look broken.
+ *
+ * This function is a conservative, lossy post-processor that handles the
+ * common cases and leaves anything it does not recognise untouched, so that
+ * legitimate backslash content (e.g. Windows paths, regex examples) is not
+ * mangled.
+ *
+ * See issue #25656.
+ */
+
+// Greek letters, lower and upper case, plus the common "var" variants.
+const GREEK_LETTERS: Readonly<Record<string, string>> = Object.freeze({
+  alpha: 'α',
+  beta: 'β',
+  gamma: 'γ',
+  delta: 'δ',
+  epsilon: 'ε',
+  zeta: 'ζ',
+  eta: 'η',
+  theta: 'θ',
+  iota: 'ι',
+  kappa: 'κ',
+  lambda: 'λ',
+  mu: 'μ',
+  nu: 'ν',
+  xi: 'ξ',
+  omicron: 'ο',
+  pi: 'π',
+  rho: 'ρ',
+  sigma: 'σ',
+  tau: 'τ',
+  upsilon: 'υ',
+  phi: 'φ',
+  chi: 'χ',
+  psi: 'ψ',
+  omega: 'ω',
+  Alpha: 'Α',
+  Beta: 'Β',
+  Gamma: 'Γ',
+  Delta: 'Δ',
+  Epsilon: 'Ε',
+  Zeta: 'Ζ',
+  Eta: 'Η',
+  Theta: 'Θ',
+  Iota: 'Ι',
+  Kappa: 'Κ',
+  Lambda: 'Λ',
+  Mu: 'Μ',
+  Nu: 'Ν',
+  Xi: 'Ξ',
+  Omicron: 'Ο',
+  Pi: 'Π',
+  Rho: 'Ρ',
+  Sigma: 'Σ',
+  Tau: 'Τ',
+  Upsilon: 'Υ',
+  Phi: 'Φ',
+  Chi: 'Χ',
+  Psi: 'Ψ',
+  Omega: 'Ω',
+  varepsilon: 'ε',
+  vartheta: 'ϑ',
+  varphi: 'φ',
+  varrho: 'ϱ',
+  varsigma: 'ς',
+  varpi: 'ϖ',
+});
+
+// Named LaTeX commands → Unicode. Covers arrows, relations, set theory,
+// logic, large operators, and a handful of common decorations. Anything not
+// listed here is deliberately left untouched.
+const LATEX_COMMANDS: Readonly<Record<string, string>> = Object.freeze({
+  // Arrows
+  to: '→',
+  rightarrow: '→',
+  Rightarrow: '⇒',
+  leftarrow: '←',
+  Leftarrow: '⇐',
+  leftrightarrow: '↔',
+  Leftrightarrow: '⇔',
+  mapsto: '↦',
+  longrightarrow: '⟶',
+  longleftarrow: '⟵',
+  longleftrightarrow: '⟷',
+  uparrow: '↑',
+  downarrow: '↓',
+  Uparrow: '⇑',
+  Downarrow: '⇓',
+  hookrightarrow: '↪',
+  hookleftarrow: '↩',
+
+  // Ellipses
+  dots: '…',
+  ldots: '…',
+  cdots: '⋯',
+  vdots: '⋮',
+  ddots: '⋱',
+
+  // Arithmetic / comparison
+  times: '×',
+  cdot: '·',
+  div: '÷',
+  pm: '±',
+  mp: '∓',
+  ast: '∗',
+  leq: '≤',
+  le: '≤',
+  geq: '≥',
+  ge: '≥',
+  neq: '≠',
+  ne: '≠',
+  ll: '≪',
+  gg: '≫',
+  approx: '≈',
+  equiv: '≡',
+  sim: '∼',
+  simeq: '≃',
+  cong: '≅',
+  propto: '∝',
+
+  // Set theory
+  in: '∈',
+  notin: '∉',
+  ni: '∋',
+  subset: '⊂',
+  supset: '⊃',
+  subseteq: '⊆',
+  supseteq: '⊇',
+  cup: '∪',
+  cap: '∩',
+  setminus: '∖',
+  emptyset: '∅',
+  varnothing: '∅',
+
+  // Logic
+  forall: '∀',
+  exists: '∃',
+  nexists: '∄',
+  neg: '¬',
+  lnot: '¬',
+  land: '∧',
+  wedge: '∧',
+  lor: '∨',
+  vee: '∨',
+  oplus: '⊕',
+  otimes: '⊗',
+  implies: '⟹',
+  iff: '⟺',
+
+  // Large operators
+  sum: '∑',
+  prod: '∏',
+  coprod: '∐',
+  int: '∫',
+  iint: '∬',
+  iiint: '∭',
+  oint: '∮',
+
+  // Calculus
+  partial: '∂',
+  nabla: '∇',
+  infty: '∞',
+
+  // Misc letters / constants
+  ell: 'ℓ',
+  hbar: 'ℏ',
+  Re: 'ℜ',
+  Im: 'ℑ',
+  aleph: 'ℵ',
+  beth: 'ℶ',
+
+  // Brackets / delimiters
+  lbrace: '{',
+  rbrace: '}',
+  lbrack: '[',
+  rbrack: ']',
+  langle: '⟨',
+  rangle: '⟩',
+  lceil: '⌈',
+  rceil: '⌉',
+  lfloor: '⌊',
+  rfloor: '⌋',
+
+  // Geometry / misc
+  perp: '⊥',
+  parallel: '∥',
+  angle: '∠',
+  triangle: '△',
+  square: '□',
+  circ: '∘',
+  bullet: '•',
+  star: '⋆',
+  prime: '′',
+  dag: '†',
+  ddag: '‡',
+  therefore: '∴',
+  because: '∵',
+  top: '⊤',
+  bot: '⊥',
+
+  // Operator names (`\log`, `\sin`, …) render in LaTeX as upright text. In a
+  // terminal the closest equivalent is the lowercase word itself.
+  log: 'log',
+  ln: 'ln',
+  lg: 'lg',
+  exp: 'exp',
+  sin: 'sin',
+  cos: 'cos',
+  tan: 'tan',
+  cot: 'cot',
+  sec: 'sec',
+  csc: 'csc',
+  arcsin: 'arcsin',
+  arccos: 'arccos',
+  arctan: 'arctan',
+  sinh: 'sinh',
+  cosh: 'cosh',
+  tanh: 'tanh',
+  max: 'max',
+  min: 'min',
+  sup: 'sup',
+  inf: 'inf',
+  lim: 'lim',
+  limsup: 'lim sup',
+  liminf: 'lim inf',
+  arg: 'arg',
+  det: 'det',
+  dim: 'dim',
+  ker: 'ker',
+  gcd: 'gcd',
+  deg: 'deg',
+  hom: 'hom',
+  mod: 'mod',
+  bmod: 'mod',
+  pmod: 'mod',
+
+  // Whitespace commands — render as visible space so layout is roughly right.
+  quad: '  ',
+  qquad: '    ',
+  // These are all "thin-space" style commands in LaTeX; render as a single
+  // space so the surrounding tokens don't jam together.
+  ',': ' ',
+  ';': ' ',
+  ':': ' ',
+  '!': '',
+});
+
+// Unicode subscript mappings (digits, operators, and the common letters that
+// have full-height subscript glyphs in Unicode).
+const SUBSCRIPT_MAP: Readonly<Record<string, string>> = Object.freeze({
+  '0': '₀',
+  '1': '₁',
+  '2': '₂',
+  '3': '₃',
+  '4': '₄',
+  '5': '₅',
+  '6': '₆',
+  '7': '₇',
+  '8': '₈',
+  '9': '₉',
+  '+': '₊',
+  '-': '₋',
+  '=': '₌',
+  '(': '₍',
+  ')': '₎',
+  a: 'ₐ',
+  e: 'ₑ',
+  h: 'ₕ',
+  i: 'ᵢ',
+  j: 'ⱼ',
+  k: 'ₖ',
+  l: 'ₗ',
+  m: 'ₘ',
+  n: 'ₙ',
+  o: 'ₒ',
+  p: 'ₚ',
+  r: 'ᵣ',
+  s: 'ₛ',
+  t: 'ₜ',
+  u: 'ᵤ',
+  v: 'ᵥ',
+  x: 'ₓ',
+});
+
+// Unicode superscript mappings. A superset of subscripts — most letters have
+// superscript glyphs.
+const SUPERSCRIPT_MAP: Readonly<Record<string, string>> = Object.freeze({
+  '0': '⁰',
+  '1': '¹',
+  '2': '²',
+  '3': '³',
+  '4': '⁴',
+  '5': '⁵',
+  '6': '⁶',
+  '7': '⁷',
+  '8': '⁸',
+  '9': '⁹',
+  '+': '⁺',
+  '-': '⁻',
+  '=': '⁼',
+  '(': '⁽',
+  ')': '⁾',
+  a: 'ᵃ',
+  b: 'ᵇ',
+  c: 'ᶜ',
+  d: 'ᵈ',
+  e: 'ᵉ',
+  f: 'ᶠ',
+  g: 'ᵍ',
+  h: 'ʰ',
+  i: 'ⁱ',
+  j: 'ʲ',
+  k: 'ᵏ',
+  l: 'ˡ',
+  m: 'ᵐ',
+  n: 'ⁿ',
+  o: 'ᵒ',
+  p: 'ᵖ',
+  r: 'ʳ',
+  s: 'ˢ',
+  t: 'ᵗ',
+  u: 'ᵘ',
+  v: 'ᵛ',
+  w: 'ʷ',
+  x: 'ˣ',
+  y: 'ʸ',
+  z: 'ᶻ',
+});
+
+/**
+ * Strips `$...$` and `$$...$$` math delimiters when the inner content looks
+ * like math, applying the full set of math-mode conversions (including
+ * sub/superscripts) to the inner text. The goal is to handle model output
+ * without eating dollar signs that appear in ordinary prose (prices,
+ * shell examples, etc.).
+ *
+ * A pair of `$...$` is treated as math when the inner text either:
+ *   - contains a LaTeX marker (`\command`, `_`, `^`), or
+ *   - is a single letter, possibly with whitespace padding (e.g. `$x$`,
+ *     `$ n $`). Shell-style variables like `$USER` are LEFT intact because
+ *     multi-letter all-caps sequences look much more like shell vars than
+ *     math in practice.
+ *
+ * A currency expression like `$5.99` (single `$`) never matches the pair
+ * regex. `From $5 to $10` matches `$5 to $` as a pair but the inner text is
+ * neither mathy nor a single variable, so it is left intact.
+ */
+function stripMathDelimiters(text: string): string {
+  // Display math first, greedy-safe with non-dollar inner class.
+  let out = text.replace(/\$\$([^$]+)\$\$/g, (_, inner: string) =>
+    applyMathModeConversions(inner),
+  );
+
+  // Inline math: lazy, single-line to avoid eating across paragraphs.
+  out = out.replace(/\$([^$\n]+?)\$/g, (match, inner: string) => {
+    const hasLatexMarkers = /\\[A-Za-z]|[\\_^]/.test(inner);
+    const isSingleVariable = /^\s*[A-Za-z]\s*$/.test(inner);
+    if (hasLatexMarkers || isSingleVariable) {
+      return applyMathModeConversions(inner);
+    }
+    return match;
+  });
+
+  return out;
+}
+
+/**
+ * Converts `\textbf{..}`, `\textit{..}`, `\emph{..}`, `\text{..}`,
+ * `\mathrm{..}`, `\mathbf{..}`, `\mathit{..}`, `\mathsf{..}`, `\mathtt{..}`,
+ * and `\operatorname{..}` into markdown-equivalent wrappers or plain text so
+ * the regular inline parser picks them up downstream.
+ *
+ * Only handles a single level of nesting (no inner braces) — this keeps the
+ * regex bounded and avoids catastrophic backtracking on adversarial input.
+ */
+function convertTextFormatting(text: string): string {
+  let out = text;
+  out = out.replace(
+    /\\(?:textbf|mathbf)\{([^{}]*)\}/g,
+    (_, inner: string) => `**${inner}**`,
+  );
+  out = out.replace(
+    /\\(?:textit|emph|mathit)\{([^{}]*)\}/g,
+    (_, inner: string) => `*${inner}*`,
+  );
+  out = out.replace(
+    /\\(?:text|mathrm|mathsf|mathtt|mathbb|mathcal|mathfrak|operatorname)\{([^{}]*)\}/g,
+    (_, inner: string) => inner,
+  );
+  return out;
+}
+
+/**
+ * Handles `\frac{a}{b}` → `(a)/(b)` and `\sqrt{x}` → `√(x)`.
+ * Only a single level of braces is supported.
+ */
+function convertFractionsAndRoots(text: string): string {
+  let out = text;
+  out = out.replace(
+    /\\frac\{([^{}]*)\}\{([^{}]*)\}/g,
+    (_, num: string, den: string) => `(${num})/(${den})`,
+  );
+  out = out.replace(
+    /\\sqrt\[([^\]]*)\]\{([^{}]*)\}/g,
+    (_, index: string, radicand: string) => `${index}√(${radicand})`,
+  );
+  out = out.replace(
+    /\\sqrt\{([^{}]*)\}/g,
+    (_, radicand: string) => `√(${radicand})`,
+  );
+  return out;
+}
+
+/**
+ * Converts escaped single-character specials (`\{` → `{`, `\_` → `_`, etc.).
+ * Runs before command lookup so `\{` is not misread as a command named `{`.
+ */
+function convertEscapedSpecials(text: string): string {
+  // The set is intentionally narrow: only characters that have meaning in
+  // LaTeX and also appear unescaped in plain text. We do not unescape `\\`
+  // (line break) here — it is handled separately.
+  let out = text.replace(/\\([{}[\]_%&#$|])/g, (_, ch: string) => ch);
+  // `\ ` (backslash + space) is LaTeX for a non-breaking space; just keep it
+  // as a regular space so words do not collide.
+  out = out.replace(/\\ /g, ' ');
+  return out;
+}
+
+/**
+ * Converts named commands (alphabetic control sequences) to Unicode. Anything
+ * not in the tables is left as-is so unrelated backslash content
+ * (e.g. Windows paths) is not disturbed.
+ */
+function convertNamedCommands(text: string): string {
+  return text.replace(
+    /\\([A-Za-z]+)(?![A-Za-z])/g,
+    (match, name: string) =>
+      GREEK_LETTERS[name] ?? LATEX_COMMANDS[name] ?? match,
+  );
+}
+
+/**
+ * Converts the short-form punctuation commands `\,`, `\;`, `\:`, `\!` used
+ * for spacing in LaTeX. These are handled separately from alphabetic commands
+ * because the regex for the latter only matches letters.
+ */
+function convertPunctuationCommands(text: string): string {
+  // `\,`, `\;`, `\:` all render as a single space; `\!` is a negative space
+  // and is stripped.
+  return text.replace(/\\([,;:!])/g, (_, ch: string) => {
+    switch (ch) {
+      case ',':
+      case ';':
+      case ':':
+        return ' ';
+      case '!':
+        return '';
+      default:
+        return ch;
+    }
+  });
+}
+
+/**
+ * Converts the `\\` line-break command (used inside math environments and
+ * tables) to a literal newline. Must run after `\` specials but before any
+ * other regex that might see a lingering backslash.
+ */
+function convertLineBreaks(text: string): string {
+  return text.replace(/\\\\/g, '\n');
+}
+
+/**
+ * Converts subscripts and superscripts to Unicode where every character in
+ * the operand maps. If any character has no mapping the whole operand is
+ * left alone, to avoid "half-converted" output that looks worse than no
+ * conversion.
+ */
+function convertSubSuperScripts(text: string): string {
+  // Braced form first: x_{...}, x^{...}. We only support BMP characters (the
+  // mapping tables are ASCII-only), so iterating with `Array.from` over code
+  // units is safe and keeps the lint rule against splitting strings happy.
+  const charsOf = (s: string): string[] => Array.from(s);
+
+  let out = text.replace(/_\{([^{}]+)\}/g, (match, inner: string) => {
+    const chars = charsOf(inner);
+    if (chars.every((c) => SUBSCRIPT_MAP[c] !== undefined)) {
+      return chars.map((c) => SUBSCRIPT_MAP[c]).join('');
+    }
+    return match;
+  });
+  out = out.replace(/\^\{([^{}]+)\}/g, (match, inner: string) => {
+    const chars = charsOf(inner);
+    if (chars.every((c) => SUPERSCRIPT_MAP[c] !== undefined)) {
+      return chars.map((c) => SUPERSCRIPT_MAP[c]).join('');
+    }
+    return match;
+  });
+
+  // Single-character form: x_0, x^2. Only convert when the character actually
+  // has a mapping — leaves `file_name` and `foo^bar` alone.
+  out = out.replace(
+    /([A-Za-z0-9)\]])_([A-Za-z0-9+\-=()])/g,
+    (match, base: string, c: string) => {
+      const sub = SUBSCRIPT_MAP[c];
+      return sub ? `${base}${sub}` : match;
+    },
+  );
+  out = out.replace(
+    /([A-Za-z0-9)\]])\^([A-Za-z0-9+\-=()])/g,
+    (match, base: string, c: string) => {
+      const sup = SUPERSCRIPT_MAP[c];
+      return sup ? `${base}${sup}` : match;
+    },
+  );
+
+  return out;
+}
+
+/**
+ * Applies the full set of conversions that make sense inside a LaTeX math
+ * region (i.e. text that was originally wrapped in `$...$`). This includes
+ * sub/superscripts, which are NOT safe to apply to arbitrary prose because
+ * they would mangle identifiers like `file_name`.
+ */
+function applyMathModeConversions(text: string): string {
+  let out = text;
+  out = convertTextFormatting(out);
+  out = convertFractionsAndRoots(out);
+  out = convertEscapedSpecials(out);
+  out = convertLineBreaks(out);
+  out = convertNamedCommands(out);
+  out = convertPunctuationCommands(out);
+  out = convertSubSuperScripts(out);
+  return out;
+}
+
+/**
+ * Applies conversions that are safe to run on arbitrary prose — anything
+ * keyed off explicit LaTeX tokens like `\alpha`, `\textbf{...}`, `\to`. Does
+ * NOT touch standalone `_` or `^` so identifiers and snake_case names are
+ * preserved.
+ */
+function applyProseConversions(text: string): string {
+  let out = text;
+  out = convertTextFormatting(out);
+  out = convertFractionsAndRoots(out);
+  out = convertEscapedSpecials(out);
+  // Deliberately NOT running convertLineBreaks here: outside math delimiters
+  // `\\` is far more likely to be a Windows UNC path (`\\server\share`) or an
+  // escaped backslash in code-like prose than a LaTeX line break. Legitimate
+  // LaTeX line breaks belong inside `$...$` or `$$...$$` and are handled by
+  // applyMathModeConversions. See PR #25802 review.
+  out = convertNamedCommands(out);
+  out = convertPunctuationCommands(out);
+  return out;
+}
+
+/**
+ * Top-level entry point. Two-phase conversion:
+ *
+ *   1. Strip `$...$` / `$$...$$` math regions, applying math-mode conversions
+ *      (including sub/superscripts) to the inner text. The heuristic for
+ *      "this dollar pair is math" runs against the ORIGINAL input so that
+ *      model-authored LaTeX is recognised before any tokens are rewritten.
+ *
+ *   2. Run prose-safe conversions over the remaining text, catching
+ *      unwrapped LaTeX tokens (`\alpha`, `\to`, `\textbf{...}`) that the
+ *      model emitted outside math delimiters.
+ *
+ * Short-circuits on input that has no LaTeX markers at all (`\` or `$`) so
+ * the hot rendering path stays cheap for ordinary prose.
+ */
+export function convertLatexToUnicode(input: string): string {
+  if (!input) return input;
+  // Fast path: if there's no backslash and no dollar sign, there's nothing to
+  // convert. This keeps the hot rendering path inexpensive for ordinary text.
+  if (input.indexOf('\\') === -1 && input.indexOf('$') === -1) {
+    return input;
+  }
+
+  let text = input;
+  text = stripMathDelimiters(text);
+  text = applyProseConversions(text);
+  return text;
+}
@@ -222,5 +222,52 @@ describe('parsingUtils', () => {
        ),
      );
    });
+
+    describe('LaTeX conversion (issue #25656)', () => {
+      it('converts LaTeX in plain text (no markdown tokens)', () => {
+        const input = 'No cycles $\\to$ no deadlock';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(primary('No cycles → no deadlock'));
+      });
+
+      it('converts LaTeX in the set example from the issue', () => {
+        const input = 'Processes $\\{P_0, \\dots, P_n\\}$';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(primary('Processes {P₀, …, Pₙ}'));
+      });
+
+      it('preserves LaTeX inside inline code', () => {
+        // Content between backticks must be rendered verbatim — conversion
+        // must NOT be applied inside code spans, even when the code contains
+        // `$...$` that would otherwise be stripped.
+        const input = 'use `$\\to$` for an arrow';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(
+          `${primary('use ')}${accent('$\\to$')}${primary(' for an arrow')}`,
+        );
+      });
+
+      it('converts LaTeX in slices around markdown tokens', () => {
+        const input = '$\\alpha$ is **bold** and $\\beta$ is plain';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(
+          `${primary('α is ')}${chalk.bold(primary('bold'))}${primary(
+            ' and β is plain',
+          )}`,
+        );
+      });
+
+      it('leaves Windows paths alone', () => {
+        const input = 'Path: C:\\Users\\foo';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(primary('Path: C:\\Users\\foo'));
+      });
+
+      it('leaves currency amounts alone', () => {
+        const input = 'It costs $5.99 total';
+        const output = parseMarkdownToANSI(input);
+        expect(output).toBe(primary('It costs $5.99 total'));
+      });
+    });
  });
 });
@@ -12,6 +12,7 @@ import {
 } from '../themes/color-utils.js';
 import { theme } from '../semantic-colors.js';
 import { debugLogger } from '@google/gemini-cli-core';
+import { convertLatexToUnicode } from './latexToUnicode.js';

 // Constants for Markdown parsing
 const BOLD_MARKER_LENGTH = 2; // For "**"
@@ -72,11 +73,49 @@ const ansiColorize = (str: string, color: string | undefined): string => {
 * Converts markdown text into a string with ANSI escape codes.
 * This mirrors the parsing logic in InlineMarkdownRenderer.tsx
 */
+// Private-Use-Area codepoint used as a placeholder sentinel when masking
+// inline code / URL spans from LaTeX conversion. Not touched by
+// stripUnsafeCharacters and not matched by the markdown tokenizer.
+const MASK_SENTINEL = '\uE000';
+const MASK_PATTERN = /\uE000(\d+)\uE000/g;
+
+/**
+ * Runs LaTeX conversion on `text` while keeping inline code spans and bare
+ * URLs verbatim. Without masking, the LaTeX pass would happily rewrite
+ * ``$\to$`` inside a backtick code span — violating the "code is verbatim"
+ * contract — and could rewrite URL query strings containing `$`.
+ */
+const convertLatexPreservingSpans = (text: string): string => {
+  const preserved: string[] = [];
+  // Match inline code spans (with matched backtick counts) and bare URLs.
+  // Order matters: code spans first so they win over a URL inside a span.
+  const masked = text.replace(/(`+)([^`\n]+?)\1|https?:\/\/\S+/g, (match) => {
+    const index = preserved.push(match) - 1;
+    return `${MASK_SENTINEL}${index}${MASK_SENTINEL}`;
+  });
+  const converted = convertLatexToUnicode(masked);
+  return converted.replace(
+    MASK_PATTERN,
+    // Fallback to the literal match if the index is somehow out of range —
+    // defensive against the unlikely case where the PUA sentinel appears in
+    // user input. Without the fallback, replace would emit "undefined".
+    (match, i: string) => preserved[Number(i)] ?? match,
+  );
+};
+
 export const parseMarkdownToANSI = (
-  text: string,
+  rawText: string,
  defaultColor?: string,
 ): string => {
  const baseColor = defaultColor ?? theme.text.primary;
+  // Convert LaTeX-style math/commands to Unicode BEFORE tokenizing markdown,
+  // so constructs like `$\{P_0, \dots, P_n\}$` are handled as a whole even
+  // when they contain underscores (which the tokenizer would otherwise treat
+  // as italic markers). Inline code and URLs are masked during the
+  // conversion so their contents are preserved verbatim. Unknown `\foo`
+  // sequences are left alone, so Windows paths and regex escapes survive.
+  // See issue #25656.
+  const text = convertLatexPreservingSpans(rawText);
  // Early return for plain text without markdown or URLs
  if (!/[*_~`<[https?:]/.test(text)) {
    return ansiColorize(text, baseColor);