diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts index 999182e8c8..7a9601a4c6 100644 --- a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts +++ b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts @@ -319,6 +319,35 @@ describe('handleAtCommand', () => { ); }, 10000); + it('should correctly handle file paths with narrow non-breaking space (NNBSP)', async () => { + const nnbsp = '\u202F'; + const fileContent = 'NNBSP file content.'; + const filePath = await createTestFile( + path.join(testRootDir, `my${nnbsp}file.txt`), + fileContent, + ); + const relativePath = getRelativePath(filePath); + const query = `@${filePath}`; + + const result = await handleAtCommand({ + query, + config: mockConfig, + addItem: mockAddItem, + onDebugMessage: mockOnDebugMessage, + messageId: 129, + signal: abortController.signal, + }); + + expect(result.error).toBeUndefined(); + expect(result.processedQuery).toEqual([ + { text: `@${relativePath}` }, + { text: '\n--- Content from referenced files ---' }, + { text: `\nContent from @${relativePath}:\n` }, + { text: fileContent }, + { text: '\n--- End of content ---' }, + ]); + }); + it('should handle multiple @file references', async () => { const content1 = 'Content file1'; const file1Path = await createTestFile( diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.ts b/packages/cli/src/ui/hooks/atCommandProcessor.ts index 28bbef074c..18dcf9a0de 100644 --- a/packages/cli/src/ui/hooks/atCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/atCommandProcessor.ts @@ -27,6 +27,17 @@ import type { UseHistoryManagerReturn } from './useHistoryManager.js'; const REF_CONTENT_HEADER = `\n${REFERENCE_CONTENT_START}`; const REF_CONTENT_FOOTER = `\n${REFERENCE_CONTENT_END}`; +/** + * Regex source for the path/command part of an @ reference. + * It uses strict ASCII whitespace delimiters to allow Unicode characters like NNBSP in filenames. + * + * 1. \\. matches any escaped character (e.g., \ ). + * 2. [^ \t\n\r,;!?()\[\]{}.] matches any character that is NOT a delimiter and NOT a period. + * 3. \.(?!$|[ \t\n\r]) matches a period ONLY if it is NOT followed by whitespace or end-of-string. + */ +export const AT_COMMAND_PATH_REGEX_SOURCE = + '(?:\\\\.|[^ \\t\\n\\r,;!?()\\[\\]{}.]|\\.(?!$|[ \\t\\n\\r]))+'; + interface HandleAtCommandParams { query: string; config: Config; @@ -52,68 +63,40 @@ interface AtCommandPart { */ function parseAllAtCommands(query: string): AtCommandPart[] { const parts: AtCommandPart[] = []; - let currentIndex = 0; + let lastIndex = 0; - while (currentIndex < query.length) { - let atIndex = -1; - let nextSearchIndex = currentIndex; - // Find next unescaped '@' - while (nextSearchIndex < query.length) { - if ( - query[nextSearchIndex] === '@' && - (nextSearchIndex === 0 || query[nextSearchIndex - 1] !== '\\') - ) { - atIndex = nextSearchIndex; - break; - } - nextSearchIndex++; - } + // Create a new RegExp instance for each call to avoid shared state/lastIndex issues. + const atCommandRegex = new RegExp( + `(? currentIndex) { + if (matchIndex > lastIndex) { parts.push({ type: 'text', - content: query.substring(currentIndex, atIndex), + content: query.substring(lastIndex, matchIndex), }); } - // Parse @path - let pathEndIndex = atIndex + 1; - let inEscape = false; - while (pathEndIndex < query.length) { - const char = query[pathEndIndex]; - if (inEscape) { - inEscape = false; - } else if (char === '\\') { - inEscape = true; - } else if (/[,\s;!?()[\]{}]/.test(char)) { - // Path ends at first whitespace or punctuation not escaped - break; - } else if (char === '.') { - // For . we need to be more careful - only terminate if followed by whitespace or end of string - // This allows file extensions like .txt, .js but terminates at sentence endings like "file.txt. Next sentence" - const nextChar = - pathEndIndex + 1 < query.length ? query[pathEndIndex + 1] : ''; - if (nextChar === '' || /\s/.test(nextChar)) { - break; - } - } - pathEndIndex++; - } - const rawAtPath = query.substring(atIndex, pathEndIndex); // unescapePath expects the @ symbol to be present, and will handle it. - const atPath = unescapePath(rawAtPath); + const atPath = unescapePath(fullMatch); parts.push({ type: 'atPath', content: atPath }); - currentIndex = pathEndIndex; + + lastIndex = matchIndex + fullMatch.length; } + + // Add remaining text + if (lastIndex < query.length) { + parts.push({ type: 'text', content: query.substring(lastIndex) }); + } + // Filter out empty text parts that might result from consecutive @paths or leading/trailing spaces return parts.filter( (part) => !(part.type === 'text' && part.content.trim() === ''), diff --git a/packages/cli/src/ui/utils/highlight.test.ts b/packages/cli/src/ui/utils/highlight.test.ts index 70af079771..808f2d1bef 100644 --- a/packages/cli/src/ui/utils/highlight.test.ts +++ b/packages/cli/src/ui/utils/highlight.test.ts @@ -134,6 +134,14 @@ describe('parseInputForHighlighting', () => { { text: '@/my\\ path/file.txt', type: 'file' }, ]); }); + + it('should highlight a file path with narrow non-breaking spaces (NNBSP)', () => { + const text = 'cat @/my\u202Fpath/file.txt'; + expect(parseInputForHighlighting(text, 0)).toEqual([ + { text: 'cat ', type: 'default' }, + { text: '@/my\u202Fpath/file.txt', type: 'file' }, + ]); + }); }); describe('parseInputForHighlighting with Transformations', () => { diff --git a/packages/cli/src/ui/utils/highlight.ts b/packages/cli/src/ui/utils/highlight.ts index a6166204b0..d294b422f1 100644 --- a/packages/cli/src/ui/utils/highlight.ts +++ b/packages/cli/src/ui/utils/highlight.ts @@ -11,6 +11,7 @@ import { import { LRUCache } from 'mnemonist'; import { cpLen, cpSlice } from './textUtils.js'; import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js'; +import { AT_COMMAND_PATH_REGEX_SOURCE } from '../hooks/atCommandProcessor.js'; export type HighlightToken = { text: string; @@ -19,11 +20,12 @@ export type HighlightToken = { // Matches slash commands (e.g., /help), @ references (files or MCP resource URIs), // and large paste placeholders (e.g., [Pasted Text: 6 lines]). -// The @ pattern uses a negated character class to support URIs like `@file:///example.txt` -// which contain colons. It matches any character except delimiters: comma, whitespace, -// semicolon, common punctuation, and brackets. +// +// The @ pattern uses the same source as the command processor to ensure consistency. +// It matches any character except strict delimiters (ASCII whitespace, comma, etc.). +// This supports URIs like `@file:///example.txt` and filenames with Unicode spaces (like NNBSP). const HIGHLIGHT_REGEX = new RegExp( - `(^/[a-zA-Z0-9_-]+|@(?:\\\\ |[^,\\s;!?()\\[\\]{}])+|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`, + `(^/[a-zA-Z0-9_-]+|@${AT_COMMAND_PATH_REGEX_SOURCE}|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`, 'g', );