mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-11 06:31:01 -07:00
Properly parse at-commands with narrow non-breaking spaces (#18677)
This commit is contained in:
committed by
GitHub
parent
cc2798018b
commit
eb94284256
@@ -319,6 +319,35 @@ describe('handleAtCommand', () => {
|
||||
);
|
||||
}, 10000);
|
||||
|
||||
it('should correctly handle file paths with narrow non-breaking space (NNBSP)', async () => {
|
||||
const nnbsp = '\u202F';
|
||||
const fileContent = 'NNBSP file content.';
|
||||
const filePath = await createTestFile(
|
||||
path.join(testRootDir, `my${nnbsp}file.txt`),
|
||||
fileContent,
|
||||
);
|
||||
const relativePath = getRelativePath(filePath);
|
||||
const query = `@${filePath}`;
|
||||
|
||||
const result = await handleAtCommand({
|
||||
query,
|
||||
config: mockConfig,
|
||||
addItem: mockAddItem,
|
||||
onDebugMessage: mockOnDebugMessage,
|
||||
messageId: 129,
|
||||
signal: abortController.signal,
|
||||
});
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.processedQuery).toEqual([
|
||||
{ text: `@${relativePath}` },
|
||||
{ text: '\n--- Content from referenced files ---' },
|
||||
{ text: `\nContent from @${relativePath}:\n` },
|
||||
{ text: fileContent },
|
||||
{ text: '\n--- End of content ---' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle multiple @file references', async () => {
|
||||
const content1 = 'Content file1';
|
||||
const file1Path = await createTestFile(
|
||||
|
||||
@@ -27,6 +27,17 @@ import type { UseHistoryManagerReturn } from './useHistoryManager.js';
|
||||
const REF_CONTENT_HEADER = `\n${REFERENCE_CONTENT_START}`;
|
||||
const REF_CONTENT_FOOTER = `\n${REFERENCE_CONTENT_END}`;
|
||||
|
||||
/**
|
||||
* Regex source for the path/command part of an @ reference.
|
||||
* It uses strict ASCII whitespace delimiters to allow Unicode characters like NNBSP in filenames.
|
||||
*
|
||||
* 1. \\. matches any escaped character (e.g., \ ).
|
||||
* 2. [^ \t\n\r,;!?()\[\]{}.] matches any character that is NOT a delimiter and NOT a period.
|
||||
* 3. \.(?!$|[ \t\n\r]) matches a period ONLY if it is NOT followed by whitespace or end-of-string.
|
||||
*/
|
||||
export const AT_COMMAND_PATH_REGEX_SOURCE =
|
||||
'(?:\\\\.|[^ \\t\\n\\r,;!?()\\[\\]{}.]|\\.(?!$|[ \\t\\n\\r]))+';
|
||||
|
||||
interface HandleAtCommandParams {
|
||||
query: string;
|
||||
config: Config;
|
||||
@@ -52,68 +63,40 @@ interface AtCommandPart {
|
||||
*/
|
||||
function parseAllAtCommands(query: string): AtCommandPart[] {
|
||||
const parts: AtCommandPart[] = [];
|
||||
let currentIndex = 0;
|
||||
let lastIndex = 0;
|
||||
|
||||
while (currentIndex < query.length) {
|
||||
let atIndex = -1;
|
||||
let nextSearchIndex = currentIndex;
|
||||
// Find next unescaped '@'
|
||||
while (nextSearchIndex < query.length) {
|
||||
if (
|
||||
query[nextSearchIndex] === '@' &&
|
||||
(nextSearchIndex === 0 || query[nextSearchIndex - 1] !== '\\')
|
||||
) {
|
||||
atIndex = nextSearchIndex;
|
||||
break;
|
||||
}
|
||||
nextSearchIndex++;
|
||||
}
|
||||
// Create a new RegExp instance for each call to avoid shared state/lastIndex issues.
|
||||
const atCommandRegex = new RegExp(
|
||||
`(?<!\\\\)@${AT_COMMAND_PATH_REGEX_SOURCE}`,
|
||||
'g',
|
||||
);
|
||||
|
||||
if (atIndex === -1) {
|
||||
// No more @
|
||||
if (currentIndex < query.length) {
|
||||
parts.push({ type: 'text', content: query.substring(currentIndex) });
|
||||
}
|
||||
break;
|
||||
}
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = atCommandRegex.exec(query)) !== null) {
|
||||
const matchIndex = match.index;
|
||||
const fullMatch = match[0];
|
||||
|
||||
// Add text before @
|
||||
if (atIndex > currentIndex) {
|
||||
if (matchIndex > lastIndex) {
|
||||
parts.push({
|
||||
type: 'text',
|
||||
content: query.substring(currentIndex, atIndex),
|
||||
content: query.substring(lastIndex, matchIndex),
|
||||
});
|
||||
}
|
||||
|
||||
// Parse @path
|
||||
let pathEndIndex = atIndex + 1;
|
||||
let inEscape = false;
|
||||
while (pathEndIndex < query.length) {
|
||||
const char = query[pathEndIndex];
|
||||
if (inEscape) {
|
||||
inEscape = false;
|
||||
} else if (char === '\\') {
|
||||
inEscape = true;
|
||||
} else if (/[,\s;!?()[\]{}]/.test(char)) {
|
||||
// Path ends at first whitespace or punctuation not escaped
|
||||
break;
|
||||
} else if (char === '.') {
|
||||
// For . we need to be more careful - only terminate if followed by whitespace or end of string
|
||||
// This allows file extensions like .txt, .js but terminates at sentence endings like "file.txt. Next sentence"
|
||||
const nextChar =
|
||||
pathEndIndex + 1 < query.length ? query[pathEndIndex + 1] : '';
|
||||
if (nextChar === '' || /\s/.test(nextChar)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pathEndIndex++;
|
||||
}
|
||||
const rawAtPath = query.substring(atIndex, pathEndIndex);
|
||||
// unescapePath expects the @ symbol to be present, and will handle it.
|
||||
const atPath = unescapePath(rawAtPath);
|
||||
const atPath = unescapePath(fullMatch);
|
||||
parts.push({ type: 'atPath', content: atPath });
|
||||
currentIndex = pathEndIndex;
|
||||
|
||||
lastIndex = matchIndex + fullMatch.length;
|
||||
}
|
||||
|
||||
// Add remaining text
|
||||
if (lastIndex < query.length) {
|
||||
parts.push({ type: 'text', content: query.substring(lastIndex) });
|
||||
}
|
||||
|
||||
// Filter out empty text parts that might result from consecutive @paths or leading/trailing spaces
|
||||
return parts.filter(
|
||||
(part) => !(part.type === 'text' && part.content.trim() === ''),
|
||||
|
||||
@@ -134,6 +134,14 @@ describe('parseInputForHighlighting', () => {
|
||||
{ text: '@/my\\ path/file.txt', type: 'file' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should highlight a file path with narrow non-breaking spaces (NNBSP)', () => {
|
||||
const text = 'cat @/my\u202Fpath/file.txt';
|
||||
expect(parseInputForHighlighting(text, 0)).toEqual([
|
||||
{ text: 'cat ', type: 'default' },
|
||||
{ text: '@/my\u202Fpath/file.txt', type: 'file' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseInputForHighlighting with Transformations', () => {
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
import { LRUCache } from 'mnemonist';
|
||||
import { cpLen, cpSlice } from './textUtils.js';
|
||||
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
|
||||
import { AT_COMMAND_PATH_REGEX_SOURCE } from '../hooks/atCommandProcessor.js';
|
||||
|
||||
export type HighlightToken = {
|
||||
text: string;
|
||||
@@ -19,11 +20,12 @@ export type HighlightToken = {
|
||||
|
||||
// Matches slash commands (e.g., /help), @ references (files or MCP resource URIs),
|
||||
// and large paste placeholders (e.g., [Pasted Text: 6 lines]).
|
||||
// The @ pattern uses a negated character class to support URIs like `@file:///example.txt`
|
||||
// which contain colons. It matches any character except delimiters: comma, whitespace,
|
||||
// semicolon, common punctuation, and brackets.
|
||||
//
|
||||
// The @ pattern uses the same source as the command processor to ensure consistency.
|
||||
// It matches any character except strict delimiters (ASCII whitespace, comma, etc.).
|
||||
// This supports URIs like `@file:///example.txt` and filenames with Unicode spaces (like NNBSP).
|
||||
const HIGHLIGHT_REGEX = new RegExp(
|
||||
`(^/[a-zA-Z0-9_-]+|@(?:\\\\ |[^,\\s;!?()\\[\\]{}])+|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
|
||||
`(^/[a-zA-Z0-9_-]+|@${AT_COMMAND_PATH_REGEX_SOURCE}|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
|
||||
'g',
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user