mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-20 10:10:56 -07:00
fix(core): replace custom binary detection with isbinaryfile to correctly handle UTF-8 (U+FFFD) (#25297)
This commit is contained in:
@@ -68,6 +68,7 @@
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"ignore": "^7.0.0",
|
||||
"ipaddr.js": "^1.9.1",
|
||||
"isbinaryfile": "^5.0.7",
|
||||
"js-yaml": "^4.1.1",
|
||||
"json-stable-stringify": "^1.3.0",
|
||||
"marked": "^15.0.12",
|
||||
|
||||
@@ -286,6 +286,25 @@ describe('fileUtils', () => {
|
||||
}
|
||||
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a source file containing literal U+FFFD (replacement character)', async () => {
|
||||
const content =
|
||||
'// Rust-style source\npub const UNICODE_REPLACEMENT_CHAR: char = \'\uFFFD\';\nlet s = "\uFFFD\uFFFD\uFFFD";\n';
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, content, 'utf8');
|
||||
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a file with mixed CJK, emoji, and U+FFFD content', async () => {
|
||||
const content = '\uFFFD\uFFFD hello \u4e16\u754c \uD83D\uDE00\n';
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, content, 'utf8');
|
||||
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for a file with dense invalid UTF-8 byte sequences', async () => {
|
||||
const binaryContent = Buffer.alloc(128, 0x80);
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('BOM detection and encoding', () => {
|
||||
|
||||
@@ -8,6 +8,7 @@ import fs from 'node:fs';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import type { PartUnion } from '@google/genai';
|
||||
import { isBinaryFile as isBinaryFileCheck } from 'isbinaryfile';
|
||||
import mime from 'mime/lite';
|
||||
import type { FileSystemService } from '../services/fileSystemService.js';
|
||||
import { ToolErrorType } from '../tools/tool-error.js';
|
||||
@@ -345,53 +346,17 @@ export async function isEmpty(filePath: string): Promise<boolean> {
|
||||
|
||||
/**
|
||||
* Heuristic: determine if a file is likely binary.
|
||||
* Now BOM-aware: if a Unicode BOM is detected, we treat it as text.
|
||||
* For non-BOM files, retain the existing null-byte and non-printable ratio checks.
|
||||
* Delegates to the `isbinaryfile` package for UTF-8-aware detection.
|
||||
*/
|
||||
export async function isBinaryFile(filePath: string): Promise<boolean> {
|
||||
let fh: fs.promises.FileHandle | null = null;
|
||||
try {
|
||||
fh = await fs.promises.open(filePath, 'r');
|
||||
const stats = await fh.stat();
|
||||
const fileSize = stats.size;
|
||||
if (fileSize === 0) return false; // empty is not binary
|
||||
|
||||
// Sample up to 4KB from the head (previous behavior)
|
||||
const sampleSize = Math.min(4096, fileSize);
|
||||
const buf = Buffer.alloc(sampleSize);
|
||||
const { bytesRead } = await fh.read(buf, 0, sampleSize, 0);
|
||||
if (bytesRead === 0) return false;
|
||||
|
||||
// BOM → text (avoid false positives for UTF‑16/32 with nulls)
|
||||
const bom = detectBOM(buf.subarray(0, Math.min(4, bytesRead)));
|
||||
if (bom) return false;
|
||||
|
||||
let nonPrintableCount = 0;
|
||||
for (let i = 0; i < bytesRead; i++) {
|
||||
if (buf[i] === 0) return true; // strong indicator of binary when no BOM
|
||||
if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) {
|
||||
nonPrintableCount++;
|
||||
}
|
||||
}
|
||||
// If >30% non-printable characters, consider it binary
|
||||
return nonPrintableCount / bytesRead > 0.3;
|
||||
return await isBinaryFileCheck(filePath);
|
||||
} catch (error) {
|
||||
debugLogger.warn(
|
||||
`Failed to check if file is binary: ${filePath}`,
|
||||
error instanceof Error ? error.message : String(error),
|
||||
);
|
||||
return false;
|
||||
} finally {
|
||||
if (fh) {
|
||||
try {
|
||||
await fh.close();
|
||||
} catch (closeError) {
|
||||
debugLogger.warn(
|
||||
`Failed to close file handle for: ${filePath}`,
|
||||
closeError instanceof Error ? closeError.message : String(closeError),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user