Properly parse at-commands with narrow non-breaking spaces (#18677)

2026-06-16 14:27:24 -07:00 · 2026-02-09 16:51:24 -08:00
parent cc2798018b
commit eb94284256
4 changed files with 76 additions and 54 deletions
@@ -319,6 +319,35 @@ describe('handleAtCommand', () => {
    );
  }, 10000);

+  it('should correctly handle file paths with narrow non-breaking space (NNBSP)', async () => {
+    const nnbsp = '\u202F';
+    const fileContent = 'NNBSP file content.';
+    const filePath = await createTestFile(
+      path.join(testRootDir, `my${nnbsp}file.txt`),
+      fileContent,
+    );
+    const relativePath = getRelativePath(filePath);
+    const query = `@${filePath}`;
+
+    const result = await handleAtCommand({
+      query,
+      config: mockConfig,
+      addItem: mockAddItem,
+      onDebugMessage: mockOnDebugMessage,
+      messageId: 129,
+      signal: abortController.signal,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.processedQuery).toEqual([
+      { text: `@${relativePath}` },
+      { text: '\n--- Content from referenced files ---' },
+      { text: `\nContent from @${relativePath}:\n` },
+      { text: fileContent },
+      { text: '\n--- End of content ---' },
+    ]);
+  });
+
  it('should handle multiple @file references', async () => {
    const content1 = 'Content file1';
    const file1Path = await createTestFile(
@@ -27,6 +27,17 @@ import type { UseHistoryManagerReturn } from './useHistoryManager.js';
 const REF_CONTENT_HEADER = `\n${REFERENCE_CONTENT_START}`;
 const REF_CONTENT_FOOTER = `\n${REFERENCE_CONTENT_END}`;

+/**
+ * Regex source for the path/command part of an @ reference.
+ * It uses strict ASCII whitespace delimiters to allow Unicode characters like NNBSP in filenames.
+ *
+ * 1. \\. matches any escaped character (e.g., \ ).
+ * 2. [^ \t\n\r,;!?()\[\]{}.] matches any character that is NOT a delimiter and NOT a period.
+ * 3. \.(?!$|[ \t\n\r]) matches a period ONLY if it is NOT followed by whitespace or end-of-string.
+ */
+export const AT_COMMAND_PATH_REGEX_SOURCE =
+  '(?:\\\\.|[^ \\t\\n\\r,;!?()\\[\\]{}.]|\\.(?!$|[ \\t\\n\\r]))+';
+
 interface HandleAtCommandParams {
  query: string;
  config: Config;
@@ -52,68 +63,40 @@ interface AtCommandPart {
 */
 function parseAllAtCommands(query: string): AtCommandPart[] {
  const parts: AtCommandPart[] = [];
-  let currentIndex = 0;
+  let lastIndex = 0;

-  while (currentIndex < query.length) {
-    let atIndex = -1;
-    let nextSearchIndex = currentIndex;
-    // Find next unescaped '@'
-    while (nextSearchIndex < query.length) {
-      if (
-        query[nextSearchIndex] === '@' &&
-        (nextSearchIndex === 0 || query[nextSearchIndex - 1] !== '\\')
-      ) {
-        atIndex = nextSearchIndex;
-        break;
-      }
-      nextSearchIndex++;
-    }
+  // Create a new RegExp instance for each call to avoid shared state/lastIndex issues.
+  const atCommandRegex = new RegExp(
+    `(?<!\\\\)@${AT_COMMAND_PATH_REGEX_SOURCE}`,
+    'g',
+  );

-    if (atIndex === -1) {
-      // No more @
-      if (currentIndex < query.length) {
-        parts.push({ type: 'text', content: query.substring(currentIndex) });
-      }
-      break;
-    }
+  let match: RegExpExecArray | null;
+
+  while ((match = atCommandRegex.exec(query)) !== null) {
+    const matchIndex = match.index;
+    const fullMatch = match[0];

    // Add text before @
-    if (atIndex > currentIndex) {
+    if (matchIndex > lastIndex) {
      parts.push({
        type: 'text',
-        content: query.substring(currentIndex, atIndex),
+        content: query.substring(lastIndex, matchIndex),
      });
    }

-    // Parse @path
-    let pathEndIndex = atIndex + 1;
-    let inEscape = false;
-    while (pathEndIndex < query.length) {
-      const char = query[pathEndIndex];
-      if (inEscape) {
-        inEscape = false;
-      } else if (char === '\\') {
-        inEscape = true;
-      } else if (/[,\s;!?()[\]{}]/.test(char)) {
-        // Path ends at first whitespace or punctuation not escaped
-        break;
-      } else if (char === '.') {
-        // For . we need to be more careful - only terminate if followed by whitespace or end of string
-        // This allows file extensions like .txt, .js but terminates at sentence endings like "file.txt. Next sentence"
-        const nextChar =
-          pathEndIndex + 1 < query.length ? query[pathEndIndex + 1] : '';
-        if (nextChar === '' || /\s/.test(nextChar)) {
-          break;
-        }
-      }
-      pathEndIndex++;
-    }
-    const rawAtPath = query.substring(atIndex, pathEndIndex);
    // unescapePath expects the @ symbol to be present, and will handle it.
-    const atPath = unescapePath(rawAtPath);
+    const atPath = unescapePath(fullMatch);
    parts.push({ type: 'atPath', content: atPath });
-    currentIndex = pathEndIndex;
+
+    lastIndex = matchIndex + fullMatch.length;
  }
+
+  // Add remaining text
+  if (lastIndex < query.length) {
+    parts.push({ type: 'text', content: query.substring(lastIndex) });
+  }
+
  // Filter out empty text parts that might result from consecutive @paths or leading/trailing spaces
  return parts.filter(
    (part) => !(part.type === 'text' && part.content.trim() === ''),
@@ -134,6 +134,14 @@ describe('parseInputForHighlighting', () => {
      { text: '@/my\\ path/file.txt', type: 'file' },
    ]);
  });
+
+  it('should highlight a file path with narrow non-breaking spaces (NNBSP)', () => {
+    const text = 'cat @/my\u202Fpath/file.txt';
+    expect(parseInputForHighlighting(text, 0)).toEqual([
+      { text: 'cat ', type: 'default' },
+      { text: '@/my\u202Fpath/file.txt', type: 'file' },
+    ]);
+  });
 });

 describe('parseInputForHighlighting with Transformations', () => {
@@ -11,6 +11,7 @@ import {
 import { LRUCache } from 'mnemonist';
 import { cpLen, cpSlice } from './textUtils.js';
 import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
+import { AT_COMMAND_PATH_REGEX_SOURCE } from '../hooks/atCommandProcessor.js';

 export type HighlightToken = {
  text: string;
@@ -19,11 +20,12 @@ export type HighlightToken = {

 // Matches slash commands (e.g., /help), @ references (files or MCP resource URIs),
 // and large paste placeholders (e.g., [Pasted Text: 6 lines]).
-// The @ pattern uses a negated character class to support URIs like `@file:///example.txt`
-// which contain colons. It matches any character except delimiters: comma, whitespace,
-// semicolon, common punctuation, and brackets.
+//
+// The @ pattern uses the same source as the command processor to ensure consistency.
+// It matches any character except strict delimiters (ASCII whitespace, comma, etc.).
+// This supports URIs like `@file:///example.txt` and filenames with Unicode spaces (like NNBSP).
 const HIGHLIGHT_REGEX = new RegExp(
-  `(^/[a-zA-Z0-9_-]+|@(?:\\\\ |[^,\\s;!?()\\[\\]{}])+|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
+  `(^/[a-zA-Z0-9_-]+|@${AT_COMMAND_PATH_REGEX_SOURCE}|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
  'g',
 );