2025-05-29 22:30:18 +00:00
|
|
|
|
/**
|
|
|
|
|
|
* @license
|
|
|
|
|
|
* Copyright 2025 Google LLC
|
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2025-07-21 08:16:42 +09:00
|
|
|
|
import fs from 'node:fs';
|
2025-09-08 14:44:56 -07:00
|
|
|
|
import fsPromises from 'node:fs/promises';
|
2025-07-21 08:16:42 +09:00
|
|
|
|
import path from 'node:path';
|
2025-08-26 00:04:53 +02:00
|
|
|
|
import type { PartUnion } from '@google/genai';
|
2025-09-04 23:00:27 +02:00
|
|
|
|
// eslint-disable-next-line import/no-internal-modules
|
|
|
|
|
|
import mime from 'mime/lite';
|
2025-08-26 00:04:53 +02:00
|
|
|
|
import type { FileSystemService } from '../services/fileSystemService.js';
|
2025-08-20 16:13:29 -07:00
|
|
|
|
import { ToolErrorType } from '../tools/tool-error.js';
|
2025-08-23 13:35:00 +09:00
|
|
|
|
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
|
2025-10-16 17:25:30 -07:00
|
|
|
|
import { createRequire as createModuleRequire } from 'node:module';
|
|
|
|
|
|
|
|
|
|
|
|
const requireModule = createModuleRequire(import.meta.url);
|
|
|
|
|
|
|
|
|
|
|
|
export async function readWasmBinaryFromDisk(
|
|
|
|
|
|
specifier: string,
|
|
|
|
|
|
): Promise<Uint8Array> {
|
|
|
|
|
|
const resolvedPath = requireModule.resolve(specifier);
|
|
|
|
|
|
const buffer = await fsPromises.readFile(resolvedPath);
|
|
|
|
|
|
return new Uint8Array(buffer);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export async function loadWasmBinary(
|
|
|
|
|
|
dynamicImport: () => Promise<{ default: Uint8Array }>,
|
|
|
|
|
|
fallbackSpecifier: string,
|
|
|
|
|
|
): Promise<Uint8Array> {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const module = await dynamicImport();
|
|
|
|
|
|
if (module?.default instanceof Uint8Array) {
|
|
|
|
|
|
return module.default;
|
|
|
|
|
|
}
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
return await readWasmBinaryFromDisk(fallbackSpecifier);
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
throw error;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
return await readWasmBinaryFromDisk(fallbackSpecifier);
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
throw new Error('WASM binary module did not provide a Uint8Array export', {
|
|
|
|
|
|
cause: error,
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-05-29 22:30:18 +00:00
|
|
|
|
|
|
|
|
|
|
// Constants for text file processing
|
|
|
|
|
|
const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
|
|
|
|
|
|
const MAX_LINE_LENGTH_TEXT_FILE = 2000;
|
|
|
|
|
|
|
|
|
|
|
|
// Default values for encoding and separator format
|
|
|
|
|
|
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
|
|
|
|
|
|
|
2025-08-28 21:13:46 -04:00
|
|
|
|
// --- Unicode BOM detection & decoding helpers --------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
type UnicodeEncoding = 'utf8' | 'utf16le' | 'utf16be' | 'utf32le' | 'utf32be';
|
|
|
|
|
|
|
|
|
|
|
|
interface BOMInfo {
|
|
|
|
|
|
encoding: UnicodeEncoding;
|
|
|
|
|
|
bomLength: number;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Detect a Unicode BOM (Byte Order Mark) if present.
|
|
|
|
|
|
* Reads up to the first 4 bytes and returns encoding + BOM length, else null.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export function detectBOM(buf: Buffer): BOMInfo | null {
|
|
|
|
|
|
if (buf.length >= 4) {
|
|
|
|
|
|
// UTF-32 LE: FF FE 00 00
|
|
|
|
|
|
if (
|
|
|
|
|
|
buf[0] === 0xff &&
|
|
|
|
|
|
buf[1] === 0xfe &&
|
|
|
|
|
|
buf[2] === 0x00 &&
|
|
|
|
|
|
buf[3] === 0x00
|
|
|
|
|
|
) {
|
|
|
|
|
|
return { encoding: 'utf32le', bomLength: 4 };
|
|
|
|
|
|
}
|
|
|
|
|
|
// UTF-32 BE: 00 00 FE FF
|
|
|
|
|
|
if (
|
|
|
|
|
|
buf[0] === 0x00 &&
|
|
|
|
|
|
buf[1] === 0x00 &&
|
|
|
|
|
|
buf[2] === 0xfe &&
|
|
|
|
|
|
buf[3] === 0xff
|
|
|
|
|
|
) {
|
|
|
|
|
|
return { encoding: 'utf32be', bomLength: 4 };
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (buf.length >= 3) {
|
|
|
|
|
|
// UTF-8: EF BB BF
|
|
|
|
|
|
if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
|
|
|
|
|
|
return { encoding: 'utf8', bomLength: 3 };
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (buf.length >= 2) {
|
|
|
|
|
|
// UTF-16 LE: FF FE (but not UTF-32 LE already matched above)
|
|
|
|
|
|
if (
|
|
|
|
|
|
buf[0] === 0xff &&
|
|
|
|
|
|
buf[1] === 0xfe &&
|
|
|
|
|
|
(buf.length < 4 || buf[2] !== 0x00 || buf[3] !== 0x00)
|
|
|
|
|
|
) {
|
|
|
|
|
|
return { encoding: 'utf16le', bomLength: 2 };
|
|
|
|
|
|
}
|
|
|
|
|
|
// UTF-16 BE: FE FF
|
|
|
|
|
|
if (buf[0] === 0xfe && buf[1] === 0xff) {
|
|
|
|
|
|
return { encoding: 'utf16be', bomLength: 2 };
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Convert a UTF-16 BE buffer to a JS string by swapping to LE then using Node's decoder.
|
|
|
|
|
|
* (Node has 'utf16le' but not 'utf16be'.)
|
|
|
|
|
|
*/
|
|
|
|
|
|
function decodeUTF16BE(buf: Buffer): string {
|
|
|
|
|
|
if (buf.length === 0) return '';
|
|
|
|
|
|
const swapped = Buffer.from(buf); // swap16 mutates in place, so copy
|
|
|
|
|
|
swapped.swap16();
|
|
|
|
|
|
return swapped.toString('utf16le');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Decode a UTF-32 buffer (LE or BE) into a JS string.
|
|
|
|
|
|
* Invalid code points are replaced with U+FFFD, partial trailing bytes are ignored.
|
|
|
|
|
|
*/
|
|
|
|
|
|
function decodeUTF32(buf: Buffer, littleEndian: boolean): string {
|
|
|
|
|
|
if (buf.length < 4) return '';
|
|
|
|
|
|
const usable = buf.length - (buf.length % 4);
|
|
|
|
|
|
let out = '';
|
|
|
|
|
|
for (let i = 0; i < usable; i += 4) {
|
|
|
|
|
|
const cp = littleEndian
|
|
|
|
|
|
? (buf[i] |
|
|
|
|
|
|
(buf[i + 1] << 8) |
|
|
|
|
|
|
(buf[i + 2] << 16) |
|
|
|
|
|
|
(buf[i + 3] << 24)) >>>
|
|
|
|
|
|
0
|
|
|
|
|
|
: (buf[i + 3] |
|
|
|
|
|
|
(buf[i + 2] << 8) |
|
|
|
|
|
|
(buf[i + 1] << 16) |
|
|
|
|
|
|
(buf[i] << 24)) >>>
|
|
|
|
|
|
0;
|
|
|
|
|
|
// Valid planes: 0x0000..0x10FFFF excluding surrogates
|
|
|
|
|
|
if (cp <= 0x10ffff && !(cp >= 0xd800 && cp <= 0xdfff)) {
|
|
|
|
|
|
out += String.fromCodePoint(cp);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
out += '\uFFFD';
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return out;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Read a file as text, honoring BOM encodings (UTF‑8/16/32) and stripping the BOM.
|
|
|
|
|
|
* Falls back to utf8 when no BOM is present.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export async function readFileWithEncoding(filePath: string): Promise<string> {
|
|
|
|
|
|
// Read the file once; detect BOM and decode from the single buffer.
|
|
|
|
|
|
const full = await fs.promises.readFile(filePath);
|
|
|
|
|
|
if (full.length === 0) return '';
|
|
|
|
|
|
|
|
|
|
|
|
const bom = detectBOM(full);
|
|
|
|
|
|
if (!bom) {
|
|
|
|
|
|
// No BOM → treat as UTF‑8
|
|
|
|
|
|
return full.toString('utf8');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Strip BOM and decode per encoding
|
|
|
|
|
|
const content = full.subarray(bom.bomLength);
|
|
|
|
|
|
switch (bom.encoding) {
|
|
|
|
|
|
case 'utf8':
|
|
|
|
|
|
return content.toString('utf8');
|
|
|
|
|
|
case 'utf16le':
|
|
|
|
|
|
return content.toString('utf16le');
|
|
|
|
|
|
case 'utf16be':
|
|
|
|
|
|
return decodeUTF16BE(content);
|
|
|
|
|
|
case 'utf32le':
|
|
|
|
|
|
return decodeUTF32(content, true);
|
|
|
|
|
|
case 'utf32be':
|
|
|
|
|
|
return decodeUTF32(content, false);
|
|
|
|
|
|
default:
|
|
|
|
|
|
// Defensive fallback; should be unreachable
|
|
|
|
|
|
return content.toString('utf8');
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Add file operation telemetry (#1068)
Introduces telemetry for file create, read, and update operations.
This change adds the `gemini_cli.file.operation.count` metric, recorded by the `read-file`, `read-many-files`, and `write-file` tools.
The metric includes the following attributes:
- `operation` (string: `create`, `read`, `update`): The type of file operation.
- `lines` (optional, Int): Number of lines in the file.
- `mimetype` (optional, string): Mimetype of the file.
- `extension` (optional, string): File extension of the file.
Here is a stacked bar chart of file operations by extension (`js`, `ts`, `md`):

Here is a stacked bar chart of file operations by type (`create`, `read`, `update`):

#750
cc @allenhutchison as discussed
2025-06-15 16:24:53 -04:00
|
|
|
|
/**
|
|
|
|
|
|
* Looks up the specific MIME type for a file path.
|
|
|
|
|
|
* @param filePath Path to the file.
|
|
|
|
|
|
* @returns The specific MIME type string (e.g., 'text/python', 'application/javascript') or undefined if not found or ambiguous.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export function getSpecificMimeType(filePath: string): string | undefined {
|
2025-09-04 23:00:27 +02:00
|
|
|
|
const lookedUpMime = mime.getType(filePath);
|
Add file operation telemetry (#1068)
Introduces telemetry for file create, read, and update operations.
This change adds the `gemini_cli.file.operation.count` metric, recorded by the `read-file`, `read-many-files`, and `write-file` tools.
The metric includes the following attributes:
- `operation` (string: `create`, `read`, `update`): The type of file operation.
- `lines` (optional, Int): Number of lines in the file.
- `mimetype` (optional, string): Mimetype of the file.
- `extension` (optional, string): File extension of the file.
Here is a stacked bar chart of file operations by extension (`js`, `ts`, `md`):

Here is a stacked bar chart of file operations by type (`create`, `read`, `update`):

#750
cc @allenhutchison as discussed
2025-06-15 16:24:53 -04:00
|
|
|
|
return typeof lookedUpMime === 'string' ? lookedUpMime : undefined;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-29 22:30:18 +00:00
|
|
|
|
/**
|
|
|
|
|
|
* Checks if a path is within a given root directory.
|
|
|
|
|
|
* @param pathToCheck The absolute path to check.
|
|
|
|
|
|
* @param rootDirectory The absolute root directory.
|
|
|
|
|
|
* @returns True if the path is within the root directory, false otherwise.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export function isWithinRoot(
|
|
|
|
|
|
pathToCheck: string,
|
|
|
|
|
|
rootDirectory: string,
|
|
|
|
|
|
): boolean {
|
2025-07-14 22:55:49 -07:00
|
|
|
|
const normalizedPathToCheck = path.resolve(pathToCheck);
|
|
|
|
|
|
const normalizedRootDirectory = path.resolve(rootDirectory);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
|
|
|
|
|
|
// Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
|
|
|
|
|
|
// unless it's the root path itself (e.g., '/' or 'C:\').
|
|
|
|
|
|
const rootWithSeparator =
|
|
|
|
|
|
normalizedRootDirectory === path.sep ||
|
|
|
|
|
|
normalizedRootDirectory.endsWith(path.sep)
|
|
|
|
|
|
? normalizedRootDirectory
|
|
|
|
|
|
: normalizedRootDirectory + path.sep;
|
|
|
|
|
|
|
|
|
|
|
|
return (
|
|
|
|
|
|
normalizedPathToCheck === normalizedRootDirectory ||
|
|
|
|
|
|
normalizedPathToCheck.startsWith(rootWithSeparator)
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2025-08-28 21:13:46 -04:00
|
|
|
|
* Heuristic: determine if a file is likely binary.
|
|
|
|
|
|
* Now BOM-aware: if a Unicode BOM is detected, we treat it as text.
|
|
|
|
|
|
* For non-BOM files, retain the existing null-byte and non-printable ratio checks.
|
2025-05-29 22:30:18 +00:00
|
|
|
|
*/
|
2025-07-21 08:16:42 +09:00
|
|
|
|
export async function isBinaryFile(filePath: string): Promise<boolean> {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
let fh: fs.promises.FileHandle | null = null;
|
2025-05-29 22:30:18 +00:00
|
|
|
|
try {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
fh = await fs.promises.open(filePath, 'r');
|
|
|
|
|
|
const stats = await fh.stat();
|
2025-07-21 08:16:42 +09:00
|
|
|
|
const fileSize = stats.size;
|
2025-08-28 21:13:46 -04:00
|
|
|
|
if (fileSize === 0) return false; // empty is not binary
|
2025-05-29 22:30:18 +00:00
|
|
|
|
|
2025-08-28 21:13:46 -04:00
|
|
|
|
// Sample up to 4KB from the head (previous behavior)
|
|
|
|
|
|
const sampleSize = Math.min(4096, fileSize);
|
|
|
|
|
|
const buf = Buffer.alloc(sampleSize);
|
|
|
|
|
|
const { bytesRead } = await fh.read(buf, 0, sampleSize, 0);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
if (bytesRead === 0) return false;
|
|
|
|
|
|
|
2025-08-28 21:13:46 -04:00
|
|
|
|
// BOM → text (avoid false positives for UTF‑16/32 with nulls)
|
|
|
|
|
|
const bom = detectBOM(buf.subarray(0, Math.min(4, bytesRead)));
|
|
|
|
|
|
if (bom) return false;
|
|
|
|
|
|
|
2025-05-29 22:30:18 +00:00
|
|
|
|
let nonPrintableCount = 0;
|
|
|
|
|
|
for (let i = 0; i < bytesRead; i++) {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
if (buf[i] === 0) return true; // strong indicator of binary when no BOM
|
|
|
|
|
|
if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) {
|
2025-05-29 22:30:18 +00:00
|
|
|
|
nonPrintableCount++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// If >30% non-printable characters, consider it binary
|
|
|
|
|
|
return nonPrintableCount / bytesRead > 0.3;
|
2025-07-21 08:16:42 +09:00
|
|
|
|
} catch (error) {
|
|
|
|
|
|
console.warn(
|
|
|
|
|
|
`Failed to check if file is binary: ${filePath}`,
|
|
|
|
|
|
error instanceof Error ? error.message : String(error),
|
|
|
|
|
|
);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
return false;
|
2025-07-21 08:16:42 +09:00
|
|
|
|
} finally {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
if (fh) {
|
2025-07-21 08:16:42 +09:00
|
|
|
|
try {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
await fh.close();
|
2025-07-21 08:16:42 +09:00
|
|
|
|
} catch (closeError) {
|
|
|
|
|
|
console.warn(
|
|
|
|
|
|
`Failed to close file handle for: ${filePath}`,
|
|
|
|
|
|
closeError instanceof Error ? closeError.message : String(closeError),
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-05-29 22:30:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Detects the type of file based on extension and content.
|
|
|
|
|
|
* @param filePath Path to the file.
|
2025-07-21 08:16:42 +09:00
|
|
|
|
* @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
|
2025-05-29 22:30:18 +00:00
|
|
|
|
*/
|
2025-07-21 08:16:42 +09:00
|
|
|
|
export async function detectFileType(
|
2025-05-29 22:30:18 +00:00
|
|
|
|
filePath: string,
|
2025-07-21 08:16:42 +09:00
|
|
|
|
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
|
2025-05-29 22:30:18 +00:00
|
|
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
|
|
|
|
|
2025-08-05 23:15:53 -07:00
|
|
|
|
// The mimetype for various TypeScript extensions (ts, mts, cts, tsx) can be
|
|
|
|
|
|
// MPEG transport stream (a video format), but we want to assume these are
|
|
|
|
|
|
// TypeScript files instead.
|
|
|
|
|
|
if (['.ts', '.mts', '.cts'].includes(ext)) {
|
2025-07-01 16:05:33 -07:00
|
|
|
|
return 'text';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-07-07 11:21:32 +05:30
|
|
|
|
if (ext === '.svg') {
|
|
|
|
|
|
return 'svg';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-09-04 23:00:27 +02:00
|
|
|
|
const lookedUpMimeType = mime.getType(filePath); // Returns null if not found, or the mime type string
|
2025-07-02 00:52:32 +05:30
|
|
|
|
if (lookedUpMimeType) {
|
|
|
|
|
|
if (lookedUpMimeType.startsWith('image/')) {
|
|
|
|
|
|
return 'image';
|
|
|
|
|
|
}
|
|
|
|
|
|
if (lookedUpMimeType.startsWith('audio/')) {
|
|
|
|
|
|
return 'audio';
|
|
|
|
|
|
}
|
|
|
|
|
|
if (lookedUpMimeType.startsWith('video/')) {
|
|
|
|
|
|
return 'video';
|
|
|
|
|
|
}
|
|
|
|
|
|
if (lookedUpMimeType === 'application/pdf') {
|
|
|
|
|
|
return 'pdf';
|
|
|
|
|
|
}
|
2025-05-29 22:30:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Stricter binary check for common non-text extensions before content check
|
|
|
|
|
|
// These are often not well-covered by mime-types or might be misidentified.
|
2025-08-23 13:35:00 +09:00
|
|
|
|
if (BINARY_EXTENSIONS.includes(ext)) {
|
2025-05-29 22:30:18 +00:00
|
|
|
|
return 'binary';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-07-21 17:54:44 -04:00
|
|
|
|
// Fall back to content-based check if mime type wasn't conclusive for image/pdf
|
2025-05-29 22:30:18 +00:00
|
|
|
|
// and it's not a known binary extension.
|
2025-07-21 08:16:42 +09:00
|
|
|
|
if (await isBinaryFile(filePath)) {
|
2025-05-29 22:30:18 +00:00
|
|
|
|
return 'binary';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return 'text';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export interface ProcessedFileReadResult {
|
|
|
|
|
|
llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
|
|
|
|
|
|
returnDisplay: string;
|
|
|
|
|
|
error?: string; // Optional error message for the LLM if file processing failed
|
2025-08-20 16:13:29 -07:00
|
|
|
|
errorType?: ToolErrorType; // Structured error type
|
2025-05-29 22:30:18 +00:00
|
|
|
|
isTruncated?: boolean; // For text files, indicates if content was truncated
|
|
|
|
|
|
originalLineCount?: number; // For text files
|
|
|
|
|
|
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Reads and processes a single file, handling text, images, and PDFs.
|
|
|
|
|
|
* @param filePath Absolute path to the file.
|
|
|
|
|
|
* @param rootDirectory Absolute path to the project root for relative path display.
|
|
|
|
|
|
* @param offset Optional offset for text files (0-based line number).
|
|
|
|
|
|
* @param limit Optional limit for text files (number of lines to read).
|
|
|
|
|
|
* @returns ProcessedFileReadResult object.
|
|
|
|
|
|
*/
|
|
|
|
|
|
export async function processSingleFileContent(
|
|
|
|
|
|
filePath: string,
|
|
|
|
|
|
rootDirectory: string,
|
2025-08-18 16:29:45 -06:00
|
|
|
|
fileSystemService: FileSystemService,
|
2025-05-29 22:30:18 +00:00
|
|
|
|
offset?: number,
|
|
|
|
|
|
limit?: number,
|
|
|
|
|
|
): Promise<ProcessedFileReadResult> {
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (!fs.existsSync(filePath)) {
|
|
|
|
|
|
// Sync check is acceptable before async read
|
|
|
|
|
|
return {
|
2025-08-20 16:13:29 -07:00
|
|
|
|
llmContent:
|
|
|
|
|
|
'Could not read file because no file was found at the specified path.',
|
2025-05-29 22:30:18 +00:00
|
|
|
|
returnDisplay: 'File not found.',
|
|
|
|
|
|
error: `File not found: ${filePath}`,
|
2025-08-20 16:13:29 -07:00
|
|
|
|
errorType: ToolErrorType.FILE_NOT_FOUND,
|
2025-05-29 22:30:18 +00:00
|
|
|
|
};
|
|
|
|
|
|
}
|
2025-07-02 00:52:32 +05:30
|
|
|
|
const stats = await fs.promises.stat(filePath);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
if (stats.isDirectory()) {
|
|
|
|
|
|
return {
|
2025-08-20 16:13:29 -07:00
|
|
|
|
llmContent:
|
|
|
|
|
|
'Could not read file because the provided path is a directory, not a file.',
|
2025-05-29 22:30:18 +00:00
|
|
|
|
returnDisplay: 'Path is a directory.',
|
|
|
|
|
|
error: `Path is a directory, not a file: ${filePath}`,
|
2025-08-20 16:13:29 -07:00
|
|
|
|
errorType: ToolErrorType.TARGET_IS_DIRECTORY,
|
2025-05-29 22:30:18 +00:00
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-20 16:13:29 -07:00
|
|
|
|
const fileSizeInMB = stats.size / (1024 * 1024);
|
|
|
|
|
|
if (fileSizeInMB > 20) {
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: 'File size exceeds the 20MB limit.',
|
|
|
|
|
|
returnDisplay: 'File size exceeds the 20MB limit.',
|
|
|
|
|
|
error: `File size exceeds the 20MB limit: ${filePath} (${fileSizeInMB.toFixed(2)}MB)`,
|
|
|
|
|
|
errorType: ToolErrorType.FILE_TOO_LARGE,
|
|
|
|
|
|
};
|
2025-07-02 00:52:32 +05:30
|
|
|
|
}
|
|
|
|
|
|
|
2025-07-21 08:16:42 +09:00
|
|
|
|
const fileType = await detectFileType(filePath);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
const relativePathForDisplay = path
|
|
|
|
|
|
.relative(rootDirectory, filePath)
|
|
|
|
|
|
.replace(/\\/g, '/');
|
|
|
|
|
|
|
|
|
|
|
|
switch (fileType) {
|
|
|
|
|
|
case 'binary': {
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
|
|
|
|
|
|
returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
2025-07-07 11:21:32 +05:30
|
|
|
|
case 'svg': {
|
|
|
|
|
|
const SVG_MAX_SIZE_BYTES = 1 * 1024 * 1024;
|
|
|
|
|
|
if (stats.size > SVG_MAX_SIZE_BYTES) {
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: `Cannot display content of SVG file larger than 1MB: ${relativePathForDisplay}`,
|
|
|
|
|
|
returnDisplay: `Skipped large SVG file (>1MB): ${relativePathForDisplay}`,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
2025-08-28 21:13:46 -04:00
|
|
|
|
const content = await readFileWithEncoding(filePath);
|
2025-07-07 11:21:32 +05:30
|
|
|
|
return {
|
|
|
|
|
|
llmContent: content,
|
|
|
|
|
|
returnDisplay: `Read SVG as text: ${relativePathForDisplay}`,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
2025-05-29 22:30:18 +00:00
|
|
|
|
case 'text': {
|
2025-08-28 21:13:46 -04:00
|
|
|
|
// Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently
|
|
|
|
|
|
const content = await readFileWithEncoding(filePath);
|
2025-05-29 22:30:18 +00:00
|
|
|
|
const lines = content.split('\n');
|
|
|
|
|
|
const originalLineCount = lines.length;
|
|
|
|
|
|
|
|
|
|
|
|
const startLine = offset || 0;
|
|
|
|
|
|
const effectiveLimit =
|
|
|
|
|
|
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
|
|
|
|
|
|
// Ensure endLine does not exceed originalLineCount
|
|
|
|
|
|
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
|
|
|
|
|
|
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
|
|
|
|
|
|
const actualStartLine = Math.min(startLine, originalLineCount);
|
|
|
|
|
|
const selectedLines = lines.slice(actualStartLine, endLine);
|
|
|
|
|
|
|
|
|
|
|
|
let linesWereTruncatedInLength = false;
|
|
|
|
|
|
const formattedLines = selectedLines.map((line) => {
|
|
|
|
|
|
if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
|
|
|
|
|
|
linesWereTruncatedInLength = true;
|
|
|
|
|
|
return (
|
|
|
|
|
|
line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
return line;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
2025-08-05 11:52:39 -07:00
|
|
|
|
const contentRangeTruncated =
|
|
|
|
|
|
startLine > 0 || endLine < originalLineCount;
|
2025-05-29 22:30:18 +00:00
|
|
|
|
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
|
2025-08-06 13:52:04 -07:00
|
|
|
|
const llmContent = formattedLines.join('\n');
|
2025-05-29 22:30:18 +00:00
|
|
|
|
|
2025-07-31 09:31:14 -07:00
|
|
|
|
// By default, return nothing to streamline the common case of a successful read_file.
|
|
|
|
|
|
let returnDisplay = '';
|
|
|
|
|
|
if (contentRangeTruncated) {
|
|
|
|
|
|
returnDisplay = `Read lines ${
|
|
|
|
|
|
actualStartLine + 1
|
|
|
|
|
|
}-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`;
|
|
|
|
|
|
if (linesWereTruncatedInLength) {
|
|
|
|
|
|
returnDisplay += ' (some lines were shortened)';
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (linesWereTruncatedInLength) {
|
|
|
|
|
|
returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-29 22:30:18 +00:00
|
|
|
|
return {
|
2025-08-06 13:52:04 -07:00
|
|
|
|
llmContent,
|
2025-07-31 09:31:14 -07:00
|
|
|
|
returnDisplay,
|
2025-05-29 22:30:18 +00:00
|
|
|
|
isTruncated,
|
|
|
|
|
|
originalLineCount,
|
|
|
|
|
|
linesShown: [actualStartLine + 1, endLine],
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
case 'image':
|
2025-07-02 00:52:32 +05:30
|
|
|
|
case 'pdf':
|
|
|
|
|
|
case 'audio':
|
|
|
|
|
|
case 'video': {
|
2025-05-29 22:30:18 +00:00
|
|
|
|
const contentBuffer = await fs.promises.readFile(filePath);
|
|
|
|
|
|
const base64Data = contentBuffer.toString('base64');
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: {
|
|
|
|
|
|
inlineData: {
|
|
|
|
|
|
data: base64Data,
|
2025-09-04 23:00:27 +02:00
|
|
|
|
mimeType: mime.getType(filePath) || 'application/octet-stream',
|
2025-05-29 22:30:18 +00:00
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
default: {
|
|
|
|
|
|
// Should not happen with current detectFileType logic
|
|
|
|
|
|
const exhaustiveCheck: never = fileType;
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: `Unhandled file type: ${exhaustiveCheck}`,
|
|
|
|
|
|
returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
|
|
|
|
|
|
error: `Unhandled file type for ${filePath}`,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
|
|
|
|
const displayPath = path
|
|
|
|
|
|
.relative(rootDirectory, filePath)
|
|
|
|
|
|
.replace(/\\/g, '/');
|
|
|
|
|
|
return {
|
|
|
|
|
|
llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
|
|
|
|
|
|
returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
|
|
|
|
|
|
error: `Error reading file ${filePath}: ${errorMessage}`,
|
2025-08-20 16:13:29 -07:00
|
|
|
|
errorType: ToolErrorType.READ_CONTENT_FAILURE,
|
2025-05-29 22:30:18 +00:00
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-09-08 14:44:56 -07:00
|
|
|
|
|
|
|
|
|
|
export async function fileExists(filePath: string): Promise<boolean> {
|
|
|
|
|
|
try {
|
|
|
|
|
|
await fsPromises.access(filePath, fs.constants.F_OK);
|
|
|
|
|
|
return true;
|
|
|
|
|
|
} catch (_: unknown) {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|