diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 47af5f76e1..e035dc4502 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -219,5 +219,8 @@ export * from './agents/types.js'; export * from './utils/stdio.js'; export * from './utils/terminal.js'; +// Export voice utilities +export * from './voice/responseFormatter.js'; + // Export types from @google/genai export type { Content, Part, FunctionCall } from '@google/genai'; diff --git a/packages/core/src/voice/responseFormatter.test.ts b/packages/core/src/voice/responseFormatter.test.ts new file mode 100644 index 0000000000..679ff1b89c --- /dev/null +++ b/packages/core/src/voice/responseFormatter.test.ts @@ -0,0 +1,288 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { formatForSpeech } from './responseFormatter.js'; + +describe('formatForSpeech', () => { + describe('edge cases', () => { + it('should return empty string for empty input', () => { + expect(formatForSpeech('')).toBe(''); + }); + + it('should return plain text unchanged', () => { + expect(formatForSpeech('Hello world')).toBe('Hello world'); + }); + }); + + describe('ANSI escape codes', () => { + it('should strip color codes', () => { + expect(formatForSpeech('\x1b[31mError\x1b[0m')).toBe('Error'); + }); + + it('should strip bold/dim codes', () => { + expect(formatForSpeech('\x1b[1mBold\x1b[22m text')).toBe('Bold text'); + }); + + it('should strip cursor movement codes', () => { + expect(formatForSpeech('line1\x1b[2Kline2')).toBe('line1line2'); + }); + }); + + describe('markdown stripping', () => { + it('should strip bold markers **text**', () => { + expect(formatForSpeech('**Error**: something went wrong')).toBe( + 'Error: something went wrong', + ); + }); + + it('should strip bold markers __text__', () => { + expect(formatForSpeech('__Error__: something')).toBe('Error: something'); + }); + + it('should strip italic markers *text*', () => { + expect(formatForSpeech('*note*: pay attention')).toBe( + 'note: pay attention', + ); + }); + + it('should strip inline code backticks', () => { + expect(formatForSpeech('Run `npm install` first')).toBe( + 'Run npm install first', + ); + }); + + it('should strip blockquote prefix', () => { + expect(formatForSpeech('> This is a quote')).toBe('This is a quote'); + }); + + it('should strip heading markers', () => { + expect(formatForSpeech('# Results\n## Details')).toBe('Results\nDetails'); + }); + + it('should replace markdown links with link text', () => { + expect(formatForSpeech('[Gemini API](https://ai.google.dev)')).toBe( + 'Gemini API', + ); + }); + + it('should strip unordered list markers', () => { + expect(formatForSpeech('- item one\n- item two')).toBe( + 'item one\nitem two', + ); + }); + + it('should strip ordered list markers', () => { + expect(formatForSpeech('1. first\n2. second')).toBe('first\nsecond'); + }); + }); + + describe('fenced code blocks', () => { + it('should unwrap a plain code block', () => { + expect(formatForSpeech('```\nconsole.log("hi")\n```')).toBe( + 'console.log("hi")', + ); + }); + + it('should unwrap a language-tagged code block', () => { + expect(formatForSpeech('```typescript\nconst x = 1;\n```')).toBe( + 'const x = 1;', + ); + }); + + it('should summarise a JSON object code block above threshold', () => { + const json = JSON.stringify({ status: 'ok', count: 42, items: [] }); + // Pass jsonThreshold lower than the json string length (38 chars) + const result = formatForSpeech(`\`\`\`json\n${json}\n\`\`\``, { + jsonThreshold: 10, + }); + expect(result).toBe('(JSON object with 3 keys)'); + }); + + it('should summarise a JSON array code block above threshold', () => { + const json = JSON.stringify([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + // Pass jsonThreshold lower than the json string length (23 chars) + const result = formatForSpeech(`\`\`\`\n${json}\n\`\`\``, { + jsonThreshold: 10, + }); + expect(result).toBe('(JSON array with 10 items)'); + }); + + it('should summarise a large JSON object using default threshold', () => { + // Build a JSON object whose stringified form exceeds the default 80-char threshold + const big = { + status: 'success', + count: 42, + items: ['alpha', 'beta', 'gamma'], + meta: { page: 1, totalPages: 10 }, + timestamp: '2026-03-03T00:00:00Z', + }; + const json = JSON.stringify(big); + expect(json.length).toBeGreaterThan(80); + const result = formatForSpeech(`\`\`\`json\n${json}\n\`\`\``); + expect(result).toBe('(JSON object with 5 keys)'); + }); + + it('should not summarise a tiny JSON value', () => { + // Below the default 80-char threshold → keep as-is + const result = formatForSpeech('```json\n{"a":1}\n```', { + jsonThreshold: 80, + }); + expect(result).toBe('{"a":1}'); + }); + }); + + describe('path abbreviation', () => { + it('should abbreviate a deep Unix path (default depth 3)', () => { + const result = formatForSpeech( + 'at /home/user/project/packages/core/src/tools/file.ts', + ); + expect(result).toContain('\u2026/src/tools/file.ts'); + expect(result).not.toContain('/home/user/project'); + }); + + it('should convert :line suffix to "line N"', () => { + const result = formatForSpeech( + 'Error at /home/user/project/src/tools/file.ts:142', + ); + expect(result).toContain('line 142'); + }); + + it('should drop column from :line:col suffix', () => { + const result = formatForSpeech( + 'Error at /home/user/project/src/tools/file.ts:142:7', + ); + expect(result).toContain('line 142'); + expect(result).not.toContain(':7'); + }); + + it('should respect custom pathDepth option', () => { + const result = formatForSpeech( + '/home/user/project/packages/core/src/file.ts', + { pathDepth: 2 }, + ); + expect(result).toContain('\u2026/src/file.ts'); + }); + + it('should not abbreviate a short path within depth', () => { + const result = formatForSpeech('/src/file.ts', { pathDepth: 3 }); + // Only 2 segments — no abbreviation needed + expect(result).toBe('/src/file.ts'); + }); + + it('should abbreviate a Windows path on a non-C drive', () => { + const result = formatForSpeech( + 'D:\\Users\\project\\packages\\core\\src\\file.ts', + { pathDepth: 3 }, + ); + expect(result).toContain('\u2026/core/src/file.ts'); + expect(result).not.toContain('D:\\Users\\project'); + }); + + it('should convert :line on a Windows path on a non-C drive', () => { + const result = formatForSpeech( + 'Error at D:\\Users\\project\\src\\tools\\file.ts:55', + ); + expect(result).toContain('line 55'); + expect(result).not.toContain('D:\\Users\\project'); + }); + + it('should abbreviate a Unix path containing a scoped npm package segment', () => { + const result = formatForSpeech( + 'at /home/user/project/node_modules/@google/gemini-cli-core/src/index.ts:12:3', + { pathDepth: 5 }, + ); + expect(result).toContain('line 12'); + expect(result).not.toContain(':3'); + expect(result).toContain('@google'); + }); + }); + + describe('stack trace collapsing', () => { + it('should collapse a multi-frame stack trace', () => { + const trace = [ + 'Error: ENOENT', + ' at Object.open (/project/src/file.ts:10:5)', + ' at Module._load (/project/node_modules/loader.js:20:3)', + ' at Function.Module._load (/project/node_modules/loader.js:30:3)', + ].join('\n'); + + const result = formatForSpeech(trace); + expect(result).toContain('and 2 more frames'); + expect(result).not.toContain('Module._load'); + }); + + it('should not collapse a single stack frame', () => { + const trace = + 'Error: ENOENT\n at Object.open (/project/src/file.ts:10:5)'; + const result = formatForSpeech(trace); + expect(result).not.toContain('more frames'); + }); + + it('should preserve surrounding text when collapsing a stack trace', () => { + const input = [ + 'Operation failed.', + ' at Object.open (/project/src/file.ts:10:5)', + ' at Module._load (/project/node_modules/loader.js:20:3)', + ' at Function.load (/project/node_modules/loader.js:30:3)', + 'Please try again.', + ].join('\n'); + + const result = formatForSpeech(input); + expect(result).toContain('Operation failed.'); + expect(result).toContain('Please try again.'); + expect(result).toContain('and 2 more frames'); + }); + }); + + describe('truncation', () => { + it('should truncate output longer than maxLength', () => { + const long = 'word '.repeat(200); + const result = formatForSpeech(long, { maxLength: 50 }); + expect(result.length).toBeLessThanOrEqual( + 50 + '\u2026 (1000 chars total)'.length, + ); + expect(result).toContain('\u2026'); + expect(result).toContain('chars total'); + }); + + it('should not truncate output within maxLength', () => { + const short = 'Hello world'; + expect(formatForSpeech(short, { maxLength: 500 })).toBe('Hello world'); + }); + }); + + describe('whitespace normalisation', () => { + it('should collapse more than two consecutive blank lines', () => { + const result = formatForSpeech('para1\n\n\n\n\npara2'); + expect(result).toBe('para1\n\npara2'); + }); + + it('should trim leading and trailing whitespace', () => { + expect(formatForSpeech(' hello ')).toBe('hello'); + }); + }); + + describe('real-world examples', () => { + it('should clean an ENOENT error with markdown and path', () => { + const input = + '**Error**: `ENOENT: no such file or directory`\n> at /home/user/project/packages/core/src/tools/file-utils.ts:142:7'; + const result = formatForSpeech(input); + expect(result).not.toContain('**'); + expect(result).not.toContain('`'); + expect(result).not.toContain('>'); + expect(result).toContain('Error'); + expect(result).toContain('ENOENT'); + expect(result).toContain('line 142'); + }); + + it('should clean a heading + list response', () => { + const input = '# Results\n- item one\n- item two\n- item three'; + const result = formatForSpeech(input); + expect(result).toBe('Results\nitem one\nitem two\nitem three'); + }); + }); +}); diff --git a/packages/core/src/voice/responseFormatter.ts b/packages/core/src/voice/responseFormatter.ts new file mode 100644 index 0000000000..dc1cbac4c4 --- /dev/null +++ b/packages/core/src/voice/responseFormatter.ts @@ -0,0 +1,185 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Options for formatForSpeech(). + */ +export interface FormatForSpeechOptions { + /** + * Maximum output length in characters before truncating. + * @default 500 + */ + maxLength?: number; + /** + * Number of trailing path segments to keep when abbreviating absolute paths. + * @default 3 + */ + pathDepth?: number; + /** + * Maximum number of characters in a JSON value before summarising it. + * @default 80 + */ + jsonThreshold?: number; +} + +// ANSI escape sequences (CSI, OSC, etc.) +// eslint-disable-next-line no-control-regex +const ANSI_RE = /\x1b(?:\[[0-9;]*[mGKHF]|\][^\x07\x1b]*\x07|[()][AB012])/g; + +// Fenced code blocks ```lang\n...\n``` +const CODE_FENCE_RE = /```[^\n]*\n([\s\S]*?)```/g; + +// Inline code `...` +const INLINE_CODE_RE = /`([^`]+)`/g; + +// Bold/italic markers **text**, *text*, __text__, _text_ +// Exclude newlines so the pattern cannot span multiple lines and accidentally +// consume list markers that haven't been stripped yet. +const BOLD_ITALIC_RE = /\*{1,2}([^*\n]+)\*{1,2}|_{1,2}([^_\n]+)_{1,2}/g; + +// Blockquote prefix "> " +const BLOCKQUOTE_RE = /^>\s?/gm; + +// ATX headings # heading +const HEADING_RE = /^#{1,6}\s+/gm; + +// Markdown links [text](url) +const LINK_RE = /\[([^\]]+)\]\([^)]+\)/g; + +// Markdown list markers "- " or "* " or "N. " at line start +const LIST_MARKER_RE = /^[ \t]*(?:[-*]|\d+\.)\s+/gm; + +// Two or more consecutive stack-trace frames (Node.js style " at …" lines). +// Matching blocks of ≥2 lets us replace each group in-place, preserving any +// text that follows the trace rather than appending it to the end. +const STACK_BLOCK_RE = /(?:^[ \t]+at [^\n]+(?:\n|$)){2,}/gm; + +// Absolute Unix paths optionally ending with :line or :line:col +// Hyphen placed at start of char class to avoid useless-escape lint error +const UNIX_PATH_RE = + /(?:^|(?<=\s|[(`"']))(\/[-\w.@]+(?:\/[-\w.@]+)*)(:\d+(?::\d+)?)?/g; + +// Absolute Windows paths C:\... or C:/... (any drive letter) +const WIN_PATH_RE = + /(?:^|(?<=\s|[(`"']))([A-Za-z]:[/\\][-\w. ]+(?:[/\\][-\w. ]+)*)(:\d+(?::\d+)?)?/g; + +/** + * Abbreviates an absolute path to at most `depth` trailing segments, + * prefixed with "…". Optionally converts `:line` suffix to `line N`. + */ +function abbreviatePath( + full: string, + suffix: string | undefined, + depth: number, +): string { + const segments = full.split(/[/\\]/).filter(Boolean); + const kept = segments.length > depth ? segments.slice(-depth) : segments; + const abbreviated = + segments.length > depth ? `\u2026/${kept.join('/')}` : full; + + if (!suffix) return abbreviated; + // Convert ":142" → " line 142", ":142:7" → " line 142" + const lineNum = suffix.split(':').filter(Boolean)[0]; + return `${abbreviated} line ${lineNum}`; +} + +/** + * Summarises a JSON string as "(JSON object with N keys)" or + * "(JSON array with N items)", falling back to the original if parsing fails. + */ +function summariseJson(jsonStr: string): string { + try { + const parsed: unknown = JSON.parse(jsonStr); + if (Array.isArray(parsed)) { + return `(JSON array with ${parsed.length} item${parsed.length === 1 ? '' : 's'})`; + } + if (parsed !== null && typeof parsed === 'object') { + const keys = Object.keys(parsed).length; + return `(JSON object with ${keys} key${keys === 1 ? '' : 's'})`; + } + } catch { + // not valid JSON — leave as-is + } + return jsonStr; +} + +/** + * Transforms a markdown/ANSI-formatted string into speech-ready plain text. + * + * Transformations applied (in order): + * 1. Strip ANSI escape codes + * 2. Collapse fenced code blocks to their content (or a JSON summary) + * 3. Collapse stack traces to first frame + count + * 4. Strip markdown syntax (bold, italic, blockquotes, headings, links, lists, inline code) + * 5. Abbreviate deep absolute paths + * 6. Normalise whitespace + * 7. Truncate to maxLength + */ +export function formatForSpeech( + text: string, + options?: FormatForSpeechOptions, +): string { + const maxLength = options?.maxLength ?? 500; + const pathDepth = options?.pathDepth ?? 3; + const jsonThreshold = options?.jsonThreshold ?? 80; + + if (!text) return ''; + + let out = text; + + // 1. Strip ANSI escape codes + out = out.replace(ANSI_RE, ''); + + // 2. Fenced code blocks — try to summarise JSON content, else keep text + out = out.replace(CODE_FENCE_RE, (_match, body: string) => { + const trimmed = body.trim(); + if (trimmed.length > jsonThreshold) { + const summary = summariseJson(trimmed); + if (summary !== trimmed) return summary; + } + return trimmed; + }); + + // 3. Collapse stack traces: replace each contiguous block of ≥2 frames + // in-place so that any text after the trace is preserved in order. + out = out.replace(STACK_BLOCK_RE, (block) => { + const lines = block + .trim() + .split('\n') + .map((l) => l.trim()); + const rest = lines.length - 1; + return `${lines[0]} (and ${rest} more frame${rest === 1 ? '' : 's'})\n`; + }); + + // 4. Strip markdown syntax + out = out + .replace(INLINE_CODE_RE, '$1') + .replace(BOLD_ITALIC_RE, (_m, g1?: string, g2?: string) => g1 ?? g2 ?? '') + .replace(BLOCKQUOTE_RE, '') + .replace(HEADING_RE, '') + .replace(LINK_RE, '$1') + .replace(LIST_MARKER_RE, ''); + + // 5. Abbreviate absolute paths + // Windows paths first to avoid the leading letter being caught by Unix RE + out = out.replace(WIN_PATH_RE, (_m, full: string, suffix?: string) => + abbreviatePath(full, suffix, pathDepth), + ); + out = out.replace(UNIX_PATH_RE, (_m, full: string, suffix?: string) => + abbreviatePath(full, suffix, pathDepth), + ); + + // 6. Normalise whitespace: collapse multiple blank lines, trim + out = out.replace(/\n{3,}/g, '\n\n').trim(); + + // 7. Truncate + if (out.length > maxLength) { + const total = out.length; + out = out.slice(0, maxLength).trimEnd() + `\u2026 (${total} chars total)`; + } + + return out; +}