diff --git a/integration-tests/utf-bom-encoding.test.ts b/integration-tests/utf-bom-encoding.test.ts new file mode 100644 index 0000000000..da660744aa --- /dev/null +++ b/integration-tests/utf-bom-encoding.test.ts @@ -0,0 +1,132 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { TestRig } from './test-helper.js'; + +// Windows skip (Option A: avoid infra scope) +const d = process.platform === 'win32' ? describe.skip : describe; + +// BOM encoders +const utf8BOM = (s: string) => + Buffer.concat([Buffer.from([0xef, 0xbb, 0xbf]), Buffer.from(s, 'utf8')]); +const utf16LE = (s: string) => + Buffer.concat([Buffer.from([0xff, 0xfe]), Buffer.from(s, 'utf16le')]); +const utf16BE = (s: string) => { + const bom = Buffer.from([0xfe, 0xff]); + const le = Buffer.from(s, 'utf16le'); + le.swap16(); + return Buffer.concat([bom, le]); +}; +const utf32LE = (s: string) => { + const bom = Buffer.from([0xff, 0xfe, 0x00, 0x00]); + const cps = Array.from(s, (c) => c.codePointAt(0)!); + const payload = Buffer.alloc(cps.length * 4); + cps.forEach((cp, i) => { + const o = i * 4; + payload[o] = cp & 0xff; + payload[o + 1] = (cp >>> 8) & 0xff; + payload[o + 2] = (cp >>> 16) & 0xff; + payload[o + 3] = (cp >>> 24) & 0xff; + }); + return Buffer.concat([bom, payload]); +}; +const utf32BE = (s: string) => { + const bom = Buffer.from([0x00, 0x00, 0xfe, 0xff]); + const cps = Array.from(s, (c) => c.codePointAt(0)!); + const payload = Buffer.alloc(cps.length * 4); + cps.forEach((cp, i) => { + const o = i * 4; + payload[o] = (cp >>> 24) & 0xff; + payload[o + 1] = (cp >>> 16) & 0xff; + payload[o + 2] = (cp >>> 8) & 0xff; + payload[o + 3] = cp & 0xff; + }); + return Buffer.concat([bom, payload]); +}; + +// Minimal binary sentinel (PNG header only) +const fakePng = () => + Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); + +let rig: TestRig; +let dir: string; + +d('BOM end-to-end integration', () => { + beforeAll(async () => { + rig = new TestRig(); + await rig.setup('bom-integration'); + dir = rig.testDir!; + }); + + afterAll(async () => { + await rig.cleanup(); + }); + + async function runAndAssert( + filename: string, + content: Buffer, + expectedText: string | null, + ) { + writeFileSync(join(dir, filename), content); + const prompt = `read the file ${filename} and output its exact contents`; + const output = await rig.run(prompt); + await rig.waitForToolCall('read_file'); + const lower = output.toLowerCase(); + if (expectedText === null) { + expect( + lower.includes('binary') || + lower.includes('skipped binary file') || + lower.includes('cannot display'), + ).toBeTruthy(); + } else { + expect(output.includes(expectedText)).toBeTruthy(); + expect(lower.includes('skipped binary file')).toBeFalsy(); + } + } + + it('UTF-8 BOM', async () => { + await runAndAssert('utf8.txt', utf8BOM('BOM_OK UTF-8'), 'BOM_OK UTF-8'); + }); + + it('UTF-16 LE BOM', async () => { + await runAndAssert( + 'utf16le.txt', + utf16LE('BOM_OK UTF-16LE'), + 'BOM_OK UTF-16LE', + ); + }); + + it('UTF-16 BE BOM', async () => { + await runAndAssert( + 'utf16be.txt', + utf16BE('BOM_OK UTF-16BE'), + 'BOM_OK UTF-16BE', + ); + }); + + it('UTF-32 LE BOM', async () => { + await runAndAssert( + 'utf32le.txt', + utf32LE('BOM_OK UTF-32LE'), + 'BOM_OK UTF-32LE', + ); + }); + + it('UTF-32 BE BOM', async () => { + await runAndAssert( + 'utf32be.txt', + utf32BE('BOM_OK UTF-32BE'), + 'BOM_OK UTF-32BE', + ); + }); + + it('Binary sentinel', async () => { + await runAndAssert('image.png', fakePng(), null); + }); +}); diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index 7b3e3ca1e7..706b0dc077 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -25,6 +25,8 @@ import { isBinaryFile, detectFileType, processSingleFileContent, + detectBOM, + readFileWithEncoding, } from './fileUtils.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; @@ -181,6 +183,367 @@ describe('fileUtils', () => { }); }); + describe('BOM detection and encoding', () => { + let testDir: string; + + beforeEach(async () => { + testDir = await fsPromises.mkdtemp( + path.join( + await fsPromises.realpath(os.tmpdir()), + 'fileUtils-bom-test-', + ), + ); + }); + + afterEach(async () => { + if (testDir) { + await fsPromises.rm(testDir, { recursive: true, force: true }); + } + }); + + describe('detectBOM', () => { + it('should detect UTF-8 BOM', () => { + const buf = Buffer.from([ + 0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f, + ]); + const result = detectBOM(buf); + expect(result).toEqual({ encoding: 'utf8', bomLength: 3 }); + }); + + it('should detect UTF-16 LE BOM', () => { + const buf = Buffer.from([0xff, 0xfe, 0x48, 0x00, 0x65, 0x00]); + const result = detectBOM(buf); + expect(result).toEqual({ encoding: 'utf16le', bomLength: 2 }); + }); + + it('should detect UTF-16 BE BOM', () => { + const buf = Buffer.from([0xfe, 0xff, 0x00, 0x48, 0x00, 0x65]); + const result = detectBOM(buf); + expect(result).toEqual({ encoding: 'utf16be', bomLength: 2 }); + }); + + it('should detect UTF-32 LE BOM', () => { + const buf = Buffer.from([ + 0xff, 0xfe, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + ]); + const result = detectBOM(buf); + expect(result).toEqual({ encoding: 'utf32le', bomLength: 4 }); + }); + + it('should detect UTF-32 BE BOM', () => { + const buf = Buffer.from([ + 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x48, + ]); + const result = detectBOM(buf); + expect(result).toEqual({ encoding: 'utf32be', bomLength: 4 }); + }); + + it('should return null for no BOM', () => { + const buf = Buffer.from([0x48, 0x65, 0x6c, 0x6c, 0x6f]); + const result = detectBOM(buf); + expect(result).toBeNull(); + }); + + it('should return null for empty buffer', () => { + const buf = Buffer.alloc(0); + const result = detectBOM(buf); + expect(result).toBeNull(); + }); + + it('should return null for partial BOM', () => { + const buf = Buffer.from([0xef, 0xbb]); // Incomplete UTF-8 BOM + const result = detectBOM(buf); + expect(result).toBeNull(); + }); + }); + + describe('readFileWithEncoding', () => { + it('should read UTF-8 BOM file correctly', async () => { + const content = 'Hello, δΈ–η•Œ! 🌍'; + const utf8Bom = Buffer.from([0xef, 0xbb, 0xbf]); + const utf8Content = Buffer.from(content, 'utf8'); + const fullBuffer = Buffer.concat([utf8Bom, utf8Content]); + + const filePath = path.join(testDir, 'utf8-bom.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should read UTF-16 LE BOM file correctly', async () => { + const content = 'Hello, δΈ–η•Œ! 🌍'; + const utf16leBom = Buffer.from([0xff, 0xfe]); + const utf16leContent = Buffer.from(content, 'utf16le'); + const fullBuffer = Buffer.concat([utf16leBom, utf16leContent]); + + const filePath = path.join(testDir, 'utf16le-bom.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should read UTF-16 BE BOM file correctly', async () => { + const content = 'Hello, δΈ–η•Œ! 🌍'; + // Manually encode UTF-16 BE: each char as big-endian 16-bit + const utf16beBom = Buffer.from([0xfe, 0xff]); + const chars = Array.from(content); + const utf16beBytes: number[] = []; + + for (const char of chars) { + const code = char.codePointAt(0)!; + if (code > 0xffff) { + // Surrogate pair for emoji + const surrogate1 = 0xd800 + ((code - 0x10000) >> 10); + const surrogate2 = 0xdc00 + ((code - 0x10000) & 0x3ff); + utf16beBytes.push((surrogate1 >> 8) & 0xff, surrogate1 & 0xff); + utf16beBytes.push((surrogate2 >> 8) & 0xff, surrogate2 & 0xff); + } else { + utf16beBytes.push((code >> 8) & 0xff, code & 0xff); + } + } + + const utf16beContent = Buffer.from(utf16beBytes); + const fullBuffer = Buffer.concat([utf16beBom, utf16beContent]); + + const filePath = path.join(testDir, 'utf16be-bom.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should read UTF-32 LE BOM file correctly', async () => { + const content = 'Hello, δΈ–η•Œ! 🌍'; + const utf32leBom = Buffer.from([0xff, 0xfe, 0x00, 0x00]); + + const utf32leBytes: number[] = []; + for (const char of Array.from(content)) { + const code = char.codePointAt(0)!; + utf32leBytes.push( + code & 0xff, + (code >> 8) & 0xff, + (code >> 16) & 0xff, + (code >> 24) & 0xff, + ); + } + + const utf32leContent = Buffer.from(utf32leBytes); + const fullBuffer = Buffer.concat([utf32leBom, utf32leContent]); + + const filePath = path.join(testDir, 'utf32le-bom.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should read UTF-32 BE BOM file correctly', async () => { + const content = 'Hello, δΈ–η•Œ! 🌍'; + const utf32beBom = Buffer.from([0x00, 0x00, 0xfe, 0xff]); + + const utf32beBytes: number[] = []; + for (const char of Array.from(content)) { + const code = char.codePointAt(0)!; + utf32beBytes.push( + (code >> 24) & 0xff, + (code >> 16) & 0xff, + (code >> 8) & 0xff, + code & 0xff, + ); + } + + const utf32beContent = Buffer.from(utf32beBytes); + const fullBuffer = Buffer.concat([utf32beBom, utf32beContent]); + + const filePath = path.join(testDir, 'utf32be-bom.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should read file without BOM as UTF-8', async () => { + const content = 'Hello, δΈ–η•Œ!'; + const filePath = path.join(testDir, 'no-bom.txt'); + await fsPromises.writeFile(filePath, content, 'utf8'); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(content); + }); + + it('should handle empty file', async () => { + const filePath = path.join(testDir, 'empty.txt'); + await fsPromises.writeFile(filePath, ''); + + const result = await readFileWithEncoding(filePath); + expect(result).toBe(''); + }); + }); + + describe('isBinaryFile with BOM awareness', () => { + it('should not treat UTF-8 BOM file as binary', async () => { + const content = 'Hello, world!'; + const utf8Bom = Buffer.from([0xef, 0xbb, 0xbf]); + const utf8Content = Buffer.from(content, 'utf8'); + const fullBuffer = Buffer.concat([utf8Bom, utf8Content]); + + const filePath = path.join(testDir, 'utf8-bom-test.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await isBinaryFile(filePath); + expect(result).toBe(false); + }); + + it('should not treat UTF-16 LE BOM file as binary', async () => { + const content = 'Hello, world!'; + const utf16leBom = Buffer.from([0xff, 0xfe]); + const utf16leContent = Buffer.from(content, 'utf16le'); + const fullBuffer = Buffer.concat([utf16leBom, utf16leContent]); + + const filePath = path.join(testDir, 'utf16le-bom-test.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await isBinaryFile(filePath); + expect(result).toBe(false); + }); + + it('should not treat UTF-16 BE BOM file as binary', async () => { + const utf16beBom = Buffer.from([0xfe, 0xff]); + // Simple ASCII in UTF-16 BE + const utf16beContent = Buffer.from([ + 0x00, + 0x48, // H + 0x00, + 0x65, // e + 0x00, + 0x6c, // l + 0x00, + 0x6c, // l + 0x00, + 0x6f, // o + 0x00, + 0x2c, // , + 0x00, + 0x20, // space + 0x00, + 0x77, // w + 0x00, + 0x6f, // o + 0x00, + 0x72, // r + 0x00, + 0x6c, // l + 0x00, + 0x64, // d + 0x00, + 0x21, // ! + ]); + const fullBuffer = Buffer.concat([utf16beBom, utf16beContent]); + + const filePath = path.join(testDir, 'utf16be-bom-test.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await isBinaryFile(filePath); + expect(result).toBe(false); + }); + + it('should not treat UTF-32 LE BOM file as binary', async () => { + const utf32leBom = Buffer.from([0xff, 0xfe, 0x00, 0x00]); + const utf32leContent = Buffer.from([ + 0x48, + 0x00, + 0x00, + 0x00, // H + 0x65, + 0x00, + 0x00, + 0x00, // e + 0x6c, + 0x00, + 0x00, + 0x00, // l + 0x6c, + 0x00, + 0x00, + 0x00, // l + 0x6f, + 0x00, + 0x00, + 0x00, // o + ]); + const fullBuffer = Buffer.concat([utf32leBom, utf32leContent]); + + const filePath = path.join(testDir, 'utf32le-bom-test.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await isBinaryFile(filePath); + expect(result).toBe(false); + }); + + it('should not treat UTF-32 BE BOM file as binary', async () => { + const utf32beBom = Buffer.from([0x00, 0x00, 0xfe, 0xff]); + const utf32beContent = Buffer.from([ + 0x00, + 0x00, + 0x00, + 0x48, // H + 0x00, + 0x00, + 0x00, + 0x65, // e + 0x00, + 0x00, + 0x00, + 0x6c, // l + 0x00, + 0x00, + 0x00, + 0x6c, // l + 0x00, + 0x00, + 0x00, + 0x6f, // o + ]); + const fullBuffer = Buffer.concat([utf32beBom, utf32beContent]); + + const filePath = path.join(testDir, 'utf32be-bom-test.txt'); + await fsPromises.writeFile(filePath, fullBuffer); + + const result = await isBinaryFile(filePath); + expect(result).toBe(false); + }); + + it('should still treat actual binary file as binary', async () => { + // PNG header + some binary data with null bytes + const pngHeader = Buffer.from([ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, + ]); + const binaryData = Buffer.from([ + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, + ]); // IHDR chunk with nulls + const fullContent = Buffer.concat([pngHeader, binaryData]); + const filePath = path.join(testDir, 'test.png'); + await fsPromises.writeFile(filePath, fullContent); + + const result = await isBinaryFile(filePath); + expect(result).toBe(true); + }); + + it('should treat file with null bytes (no BOM) as binary', async () => { + const content = Buffer.from([ + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64, + ]); + const filePath = path.join(testDir, 'null-bytes.bin'); + await fsPromises.writeFile(filePath, content); + + const result = await isBinaryFile(filePath); + expect(result).toBe(true); + }); + }); + }); + describe('detectFileType', () => { let filePathForDetectTest: string; diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index dd81942e9e..fba5de922b 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -19,6 +19,138 @@ const MAX_LINE_LENGTH_TEXT_FILE = 2000; // Default values for encoding and separator format export const DEFAULT_ENCODING: BufferEncoding = 'utf-8'; +// --- Unicode BOM detection & decoding helpers -------------------------------- + +type UnicodeEncoding = 'utf8' | 'utf16le' | 'utf16be' | 'utf32le' | 'utf32be'; + +interface BOMInfo { + encoding: UnicodeEncoding; + bomLength: number; +} + +/** + * Detect a Unicode BOM (Byte Order Mark) if present. + * Reads up to the first 4 bytes and returns encoding + BOM length, else null. + */ +export function detectBOM(buf: Buffer): BOMInfo | null { + if (buf.length >= 4) { + // UTF-32 LE: FF FE 00 00 + if ( + buf[0] === 0xff && + buf[1] === 0xfe && + buf[2] === 0x00 && + buf[3] === 0x00 + ) { + return { encoding: 'utf32le', bomLength: 4 }; + } + // UTF-32 BE: 00 00 FE FF + if ( + buf[0] === 0x00 && + buf[1] === 0x00 && + buf[2] === 0xfe && + buf[3] === 0xff + ) { + return { encoding: 'utf32be', bomLength: 4 }; + } + } + if (buf.length >= 3) { + // UTF-8: EF BB BF + if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) { + return { encoding: 'utf8', bomLength: 3 }; + } + } + if (buf.length >= 2) { + // UTF-16 LE: FF FE (but not UTF-32 LE already matched above) + if ( + buf[0] === 0xff && + buf[1] === 0xfe && + (buf.length < 4 || buf[2] !== 0x00 || buf[3] !== 0x00) + ) { + return { encoding: 'utf16le', bomLength: 2 }; + } + // UTF-16 BE: FE FF + if (buf[0] === 0xfe && buf[1] === 0xff) { + return { encoding: 'utf16be', bomLength: 2 }; + } + } + return null; +} + +/** + * Convert a UTF-16 BE buffer to a JS string by swapping to LE then using Node's decoder. + * (Node has 'utf16le' but not 'utf16be'.) + */ +function decodeUTF16BE(buf: Buffer): string { + if (buf.length === 0) return ''; + const swapped = Buffer.from(buf); // swap16 mutates in place, so copy + swapped.swap16(); + return swapped.toString('utf16le'); +} + +/** + * Decode a UTF-32 buffer (LE or BE) into a JS string. + * Invalid code points are replaced with U+FFFD, partial trailing bytes are ignored. + */ +function decodeUTF32(buf: Buffer, littleEndian: boolean): string { + if (buf.length < 4) return ''; + const usable = buf.length - (buf.length % 4); + let out = ''; + for (let i = 0; i < usable; i += 4) { + const cp = littleEndian + ? (buf[i] | + (buf[i + 1] << 8) | + (buf[i + 2] << 16) | + (buf[i + 3] << 24)) >>> + 0 + : (buf[i + 3] | + (buf[i + 2] << 8) | + (buf[i + 1] << 16) | + (buf[i] << 24)) >>> + 0; + // Valid planes: 0x0000..0x10FFFF excluding surrogates + if (cp <= 0x10ffff && !(cp >= 0xd800 && cp <= 0xdfff)) { + out += String.fromCodePoint(cp); + } else { + out += '\uFFFD'; + } + } + return out; +} + +/** + * Read a file as text, honoring BOM encodings (UTF‑8/16/32) and stripping the BOM. + * Falls back to utf8 when no BOM is present. + */ +export async function readFileWithEncoding(filePath: string): Promise { + // Read the file once; detect BOM and decode from the single buffer. + const full = await fs.promises.readFile(filePath); + if (full.length === 0) return ''; + + const bom = detectBOM(full); + if (!bom) { + // No BOM β†’ treat as UTF‑8 + return full.toString('utf8'); + } + + // Strip BOM and decode per encoding + const content = full.subarray(bom.bomLength); + switch (bom.encoding) { + case 'utf8': + return content.toString('utf8'); + case 'utf16le': + return content.toString('utf16le'); + case 'utf16be': + return decodeUTF16BE(content); + case 'utf32le': + return decodeUTF32(content, true); + case 'utf32be': + return decodeUTF32(content, false); + default: + // Defensive fallback; should be unreachable + return content.toString('utf8'); + } +} + /** * Looks up the specific MIME type for a file path. * @param filePath Path to the file. @@ -57,59 +189,52 @@ export function isWithinRoot( } /** - * Determines if a file is likely binary based on content sampling. - * @param filePath Path to the file. - * @returns Promise that resolves to true if the file appears to be binary. + * Heuristic: determine if a file is likely binary. + * Now BOM-aware: if a Unicode BOM is detected, we treat it as text. + * For non-BOM files, retain the existing null-byte and non-printable ratio checks. */ export async function isBinaryFile(filePath: string): Promise { - let fileHandle: fs.promises.FileHandle | undefined; + let fh: fs.promises.FileHandle | null = null; try { - fileHandle = await fs.promises.open(filePath, 'r'); - - // Read up to 4KB or file size, whichever is smaller - const stats = await fileHandle.stat(); + fh = await fs.promises.open(filePath, 'r'); + const stats = await fh.stat(); const fileSize = stats.size; - if (fileSize === 0) { - // Empty file is not considered binary for content checking - return false; - } - const bufferSize = Math.min(4096, fileSize); - const buffer = Buffer.alloc(bufferSize); - const result = await fileHandle.read(buffer, 0, buffer.length, 0); - const bytesRead = result.bytesRead; + if (fileSize === 0) return false; // empty is not binary + // Sample up to 4KB from the head (previous behavior) + const sampleSize = Math.min(4096, fileSize); + const buf = Buffer.alloc(sampleSize); + const { bytesRead } = await fh.read(buf, 0, sampleSize, 0); if (bytesRead === 0) return false; + // BOM β†’ text (avoid false positives for UTF‑16/32 with nulls) + const bom = detectBOM(buf.subarray(0, Math.min(4, bytesRead))); + if (bom) return false; + let nonPrintableCount = 0; for (let i = 0; i < bytesRead; i++) { - if (buffer[i] === 0) return true; // Null byte is a strong indicator - if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) { + if (buf[i] === 0) return true; // strong indicator of binary when no BOM + if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) { nonPrintableCount++; } } // If >30% non-printable characters, consider it binary return nonPrintableCount / bytesRead > 0.3; } catch (error) { - // Log error for debugging while maintaining existing behavior console.warn( `Failed to check if file is binary: ${filePath}`, error instanceof Error ? error.message : String(error), ); - // If any error occurs (e.g. file not found, permissions), - // treat as not binary here; let higher-level functions handle existence/access errors. return false; } finally { - // Safely close the file handle if it was successfully opened - if (fileHandle) { + if (fh) { try { - await fileHandle.close(); + await fh.close(); } catch (closeError) { - // Log close errors for debugging while continuing with cleanup console.warn( `Failed to close file handle for: ${filePath}`, closeError instanceof Error ? closeError.message : String(closeError), ); - // The important thing is that we attempted to clean up } } } @@ -244,14 +369,15 @@ export async function processSingleFileContent( returnDisplay: `Skipped large SVG file (>1MB): ${relativePathForDisplay}`, }; } - const content = await fileSystemService.readTextFile(filePath); + const content = await readFileWithEncoding(filePath); return { llmContent: content, returnDisplay: `Read SVG as text: ${relativePathForDisplay}`, }; } case 'text': { - const content = await fileSystemService.readTextFile(filePath); + // Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently + const content = await readFileWithEncoding(filePath); const lines = content.split('\n'); const originalLineCount = lines.length;