packages/core/src/utils/fileUtils.ts

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import path from 'node:path';
import type { PartUnion } from '@google/genai';
// eslint-disable-next-line import/no-internal-modules
import mime from 'mime/lite';
import type { FileSystemService } from '../services/fileSystemService.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
import { createRequire as createModuleRequire } from 'node:module';
import { debugLogger } from './debugLogger.js';

const requireModule = createModuleRequire(import.meta.url);

export async function readWasmBinaryFromDisk(
  specifier: string,
): Promise<Uint8Array> {
  const resolvedPath = requireModule.resolve(specifier);
  const buffer = await fsPromises.readFile(resolvedPath);
  return new Uint8Array(buffer);
}

export async function loadWasmBinary(
  dynamicImport: () => Promise<{ default: Uint8Array }>,
  fallbackSpecifier: string,
): Promise<Uint8Array> {
  try {
    const module = await dynamicImport();
    if (module?.default instanceof Uint8Array) {
      return module.default;
    }
  } catch (error) {
    try {
      return await readWasmBinaryFromDisk(fallbackSpecifier);
    } catch {
      throw error;
    }
  }

  try {
    return await readWasmBinaryFromDisk(fallbackSpecifier);
  } catch (error) {
    throw new Error('WASM binary module did not provide a Uint8Array export', {
      cause: error,
    });
  }
}

// Constants for text file processing
export const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
const MAX_LINE_LENGTH_TEXT_FILE = 2000;

// Default values for encoding and separator format
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';

// --- Unicode BOM detection & decoding helpers --------------------------------

type UnicodeEncoding = 'utf8' | 'utf16le' | 'utf16be' | 'utf32le' | 'utf32be';

interface BOMInfo {
  encoding: UnicodeEncoding;
  bomLength: number;
}

/**
 * Detect a Unicode BOM (Byte Order Mark) if present.
 * Reads up to the first 4 bytes and returns encoding + BOM length, else null.
 */
export function detectBOM(buf: Buffer): BOMInfo | null {
  if (buf.length >= 4) {
    // UTF-32 LE: FF FE 00 00
    if (
      buf[0] === 0xff &&
      buf[1] === 0xfe &&
      buf[2] === 0x00 &&
      buf[3] === 0x00
    ) {
      return { encoding: 'utf32le', bomLength: 4 };
    }
    // UTF-32 BE: 00 00 FE FF
    if (
      buf[0] === 0x00 &&
      buf[1] === 0x00 &&
      buf[2] === 0xfe &&
      buf[3] === 0xff
    ) {
      return { encoding: 'utf32be', bomLength: 4 };
    }
  }
  if (buf.length >= 3) {
    // UTF-8: EF BB BF
    if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
      return { encoding: 'utf8', bomLength: 3 };
    }
  }
  if (buf.length >= 2) {
    // UTF-16 LE: FF FE  (but not UTF-32 LE already matched above)
    if (
      buf[0] === 0xff &&
      buf[1] === 0xfe &&
      (buf.length < 4 || buf[2] !== 0x00 || buf[3] !== 0x00)
    ) {
      return { encoding: 'utf16le', bomLength: 2 };
    }
    // UTF-16 BE: FE FF
    if (buf[0] === 0xfe && buf[1] === 0xff) {
      return { encoding: 'utf16be', bomLength: 2 };
    }
  }
  return null;
}

/**
 * Convert a UTF-16 BE buffer to a JS string by swapping to LE then using Node's decoder.
 * (Node has 'utf16le' but not 'utf16be'.)
 */
function decodeUTF16BE(buf: Buffer): string {
  if (buf.length === 0) return '';
  const swapped = Buffer.from(buf); // swap16 mutates in place, so copy
  swapped.swap16();
  return swapped.toString('utf16le');
}

/**
 * Decode a UTF-32 buffer (LE or BE) into a JS string.
 * Invalid code points are replaced with U+FFFD, partial trailing bytes are ignored.
 */
function decodeUTF32(buf: Buffer, littleEndian: boolean): string {
  if (buf.length < 4) return '';
  const usable = buf.length - (buf.length % 4);
  let out = '';
  for (let i = 0; i < usable; i += 4) {
    const cp = littleEndian
      ? (buf[i] |
          (buf[i + 1] << 8) |
          (buf[i + 2] << 16) |
          (buf[i + 3] << 24)) >>>
        0
      : (buf[i + 3] |
          (buf[i + 2] << 8) |
          (buf[i + 1] << 16) |
          (buf[i] << 24)) >>>
        0;
    // Valid planes: 0x0000..0x10FFFF excluding surrogates
    if (cp <= 0x10ffff && !(cp >= 0xd800 && cp <= 0xdfff)) {
      out += String.fromCodePoint(cp);
    } else {
      out += '\uFFFD';
    }
  }
  return out;
}

/**
 * Read a file as text, honoring BOM encodings (UTF‑8/16/32) and stripping the BOM.
 * Falls back to utf8 when no BOM is present.
 */
export async function readFileWithEncoding(filePath: string): Promise<string> {
  // Read the file once; detect BOM and decode from the single buffer.
  const full = await fs.promises.readFile(filePath);
  if (full.length === 0) return '';

  const bom = detectBOM(full);
  if (!bom) {
    // No BOM → treat as UTF‑8
    return full.toString('utf8');
  }

  // Strip BOM and decode per encoding
  const content = full.subarray(bom.bomLength);
  switch (bom.encoding) {
    case 'utf8':
      return content.toString('utf8');
    case 'utf16le':
      return content.toString('utf16le');
    case 'utf16be':
      return decodeUTF16BE(content);
    case 'utf32le':
      return decodeUTF32(content, true);
    case 'utf32be':
      return decodeUTF32(content, false);
    default:
      // Defensive fallback; should be unreachable
      return content.toString('utf8');
  }
}

/**
 * Looks up the specific MIME type for a file path.
 * @param filePath Path to the file.
 * @returns The specific MIME type string (e.g., 'text/python', 'application/javascript') or undefined if not found or ambiguous.
 */
export function getSpecificMimeType(filePath: string): string | undefined {
  const lookedUpMime = mime.getType(filePath);
  return typeof lookedUpMime === 'string' ? lookedUpMime : undefined;
}

/**
 * Checks if a path is within a given root directory.
 * @param pathToCheck The absolute path to check.
 * @param rootDirectory The absolute root directory.
 * @returns True if the path is within the root directory, false otherwise.
 */
export function isWithinRoot(
  pathToCheck: string,
  rootDirectory: string,
): boolean {
  const normalizedPathToCheck = path.resolve(pathToCheck);
  const normalizedRootDirectory = path.resolve(rootDirectory);

  // Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
  // unless it's the root path itself (e.g., '/' or 'C:\').
  const rootWithSeparator =
    normalizedRootDirectory === path.sep ||
    normalizedRootDirectory.endsWith(path.sep)
      ? normalizedRootDirectory
      : normalizedRootDirectory + path.sep;

  return (
    normalizedPathToCheck === normalizedRootDirectory ||
    normalizedPathToCheck.startsWith(rootWithSeparator)
  );
}

/**
 * Safely resolves a path to its real path if it exists, otherwise returns the absolute resolved path.
 */
export function getRealPath(filePath: string): string {
  try {
    return fs.realpathSync(filePath);
  } catch {
    return path.resolve(filePath);
  }
}

/**
 * Checks if a file's content is empty or contains only whitespace.
 * Efficiently checks file size first, and only samples the beginning of the file.
 * Honors Unicode BOM encodings.
 */
export async function isEmpty(filePath: string): Promise<boolean> {
  try {
    const stats = await fsPromises.stat(filePath);
    if (stats.size === 0) return true;

    // Sample up to 1KB to check for non-whitespace content.
    // If a file is larger than 1KB and contains only whitespace,
    // it's an extreme edge case we can afford to read slightly more of if needed,
    // but for most valid plans/files, this is sufficient.
    const fd = await fsPromises.open(filePath, 'r');
    try {
      const { buffer } = await fd.read({
        buffer: Buffer.alloc(Math.min(1024, stats.size)),
        offset: 0,
        length: Math.min(1024, stats.size),
        position: 0,
      });

      const bom = detectBOM(buffer);
      const content = bom
        ? buffer.subarray(bom.bomLength).toString('utf8')
        : buffer.toString('utf8');

      return content.trim().length === 0;
    } finally {
      await fd.close();
    }
  } catch {
    // If file is unreadable, we treat it as empty/invalid for validation purposes
    return true;
  }
}

/**
 * Heuristic: determine if a file is likely binary.
 * Now BOM-aware: if a Unicode BOM is detected, we treat it as text.
 * For non-BOM files, retain the existing null-byte and non-printable ratio checks.
 */
export async function isBinaryFile(filePath: string): Promise<boolean> {
  let fh: fs.promises.FileHandle | null = null;
  try {
    fh = await fs.promises.open(filePath, 'r');
    const stats = await fh.stat();
    const fileSize = stats.size;
    if (fileSize === 0) return false; // empty is not binary

    // Sample up to 4KB from the head (previous behavior)
    const sampleSize = Math.min(4096, fileSize);
    const buf = Buffer.alloc(sampleSize);
    const { bytesRead } = await fh.read(buf, 0, sampleSize, 0);
    if (bytesRead === 0) return false;

    // BOM → text (avoid false positives for UTF‑16/32 with nulls)
    const bom = detectBOM(buf.subarray(0, Math.min(4, bytesRead)));
    if (bom) return false;

    let nonPrintableCount = 0;
    for (let i = 0; i < bytesRead; i++) {
      if (buf[i] === 0) return true; // strong indicator of binary when no BOM
      if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) {
        nonPrintableCount++;
      }
    }
    // If >30% non-printable characters, consider it binary
    return nonPrintableCount / bytesRead > 0.3;
  } catch (error) {
    debugLogger.warn(
      `Failed to check if file is binary: ${filePath}`,
      error instanceof Error ? error.message : String(error),
    );
    return false;
  } finally {
    if (fh) {
      try {
        await fh.close();
      } catch (closeError) {
        debugLogger.warn(
          `Failed to close file handle for: ${filePath}`,
          closeError instanceof Error ? closeError.message : String(closeError),
        );
      }
    }
  }
}

/**
 * Detects the type of file based on extension and content.
 * @param filePath Path to the file.
 * @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
 */
export async function detectFileType(
  filePath: string,
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
  const ext = path.extname(filePath).toLowerCase();

  // The mimetype for various TypeScript extensions (ts, mts, cts, tsx) can be
  // MPEG transport stream (a video format), but we want to assume these are
  // TypeScript files instead.
  if (['.ts', '.mts', '.cts'].includes(ext)) {
    return 'text';
  }

  if (ext === '.svg') {
    return 'svg';
  }

  const lookedUpMimeType = mime.getType(filePath); // Returns null if not found, or the mime type string
  if (lookedUpMimeType) {
    if (lookedUpMimeType.startsWith('image/')) {
      return 'image';
    }
    // Verify audio/video with content check to avoid MIME misidentification (#16888)
    if (
      lookedUpMimeType.startsWith('audio/') ||
      lookedUpMimeType.startsWith('video/')
    ) {
      if (!(await isBinaryFile(filePath))) {
        return 'text';
      }
      return lookedUpMimeType.startsWith('audio/') ? 'audio' : 'video';
    }
    if (lookedUpMimeType === 'application/pdf') {
      return 'pdf';
    }
  }

  // Stricter binary check for common non-text extensions before content check
  // These are often not well-covered by mime-types or might be misidentified.
  if (BINARY_EXTENSIONS.includes(ext)) {
    return 'binary';
  }

  // Fall back to content-based check if mime type wasn't conclusive for image/pdf
  // and it's not a known binary extension.
  if (await isBinaryFile(filePath)) {
    return 'binary';
  }

  return 'text';
}

export interface ProcessedFileReadResult {
  llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
  returnDisplay: string;
  error?: string; // Optional error message for the LLM if file processing failed
  errorType?: ToolErrorType; // Structured error type
  isTruncated?: boolean; // For text files, indicates if content was truncated
  originalLineCount?: number; // For text files
  linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
}

/**
 * Reads and processes a single file, handling text, images, and PDFs.
 * @param filePath Absolute path to the file.
 * @param rootDirectory Absolute path to the project root for relative path display.
 * @param _fileSystemService Currently unused in this function; kept for signature stability.
 * @param startLine Optional 1-based line number to start reading from.
 * @param endLine Optional 1-based line number to end reading at (inclusive).
 * @returns ProcessedFileReadResult object.
 */
export async function processSingleFileContent(
  filePath: string,
  rootDirectory: string,
  _fileSystemService: FileSystemService,
  startLine?: number,
  endLine?: number,
): Promise<ProcessedFileReadResult> {
  try {
    if (!fs.existsSync(filePath)) {
      // Sync check is acceptable before async read
      return {
        llmContent:
          'Could not read file because no file was found at the specified path.',
        returnDisplay: 'File not found.',
        error: `File not found: ${filePath}`,
        errorType: ToolErrorType.FILE_NOT_FOUND,
      };
    }
    const stats = await fs.promises.stat(filePath);
    if (stats.isDirectory()) {
      return {
        llmContent:
          'Could not read file because the provided path is a directory, not a file.',
        returnDisplay: 'Path is a directory.',
        error: `Path is a directory, not a file: ${filePath}`,
        errorType: ToolErrorType.TARGET_IS_DIRECTORY,
      };
    }

    const fileSizeInMB = stats.size / (1024 * 1024);
    if (fileSizeInMB > 20) {
      return {
        llmContent: 'File size exceeds the 20MB limit.',
        returnDisplay: 'File size exceeds the 20MB limit.',
        error: `File size exceeds the 20MB limit: ${filePath} (${fileSizeInMB.toFixed(2)}MB)`,
        errorType: ToolErrorType.FILE_TOO_LARGE,
      };
    }

    const fileType = await detectFileType(filePath);
    const relativePathForDisplay = path
      .relative(rootDirectory, filePath)
      .replace(/\\/g, '/');

    switch (fileType) {
      case 'binary': {
        return {
          llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
          returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
        };
      }
      case 'svg': {
        const SVG_MAX_SIZE_BYTES = 1 * 1024 * 1024;
        if (stats.size > SVG_MAX_SIZE_BYTES) {
          return {
            llmContent: `Cannot display content of SVG file larger than 1MB: ${relativePathForDisplay}`,
            returnDisplay: `Skipped large SVG file (>1MB): ${relativePathForDisplay}`,
          };
        }
        const content = await readFileWithEncoding(filePath);
        return {
          llmContent: content,
          returnDisplay: `Read SVG as text: ${relativePathForDisplay}`,
        };
      }
      case 'text': {
        // Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently
        const content = await readFileWithEncoding(filePath);
        const lines = content.split('\n');
        const originalLineCount = lines.length;

        let sliceStart = 0;
        let sliceEnd = originalLineCount;

        if (startLine !== undefined || endLine !== undefined) {
          sliceStart = startLine ? startLine - 1 : 0;
          sliceEnd = endLine
            ? Math.min(endLine, originalLineCount)
            : Math.min(
                sliceStart + DEFAULT_MAX_LINES_TEXT_FILE,
                originalLineCount,
              );
        } else {
          sliceEnd = Math.min(DEFAULT_MAX_LINES_TEXT_FILE, originalLineCount);
        }

        // Ensure selectedLines doesn't try to slice beyond array bounds
        const actualStart = Math.min(sliceStart, originalLineCount);
        const selectedLines = lines.slice(actualStart, sliceEnd);

        let linesWereTruncatedInLength = false;
        const formattedLines = selectedLines.map((line) => {
          if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
            linesWereTruncatedInLength = true;
            return (
              line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
            );
          }
          return line;
        });

        const isTruncated =
          actualStart > 0 ||
          sliceEnd < originalLineCount ||
          linesWereTruncatedInLength;
        const llmContent = formattedLines.join('\n');

        // By default, return nothing to streamline the common case of a successful read_file.
        let returnDisplay = '';
        if (actualStart > 0 || sliceEnd < originalLineCount) {
          returnDisplay = `Read lines ${
            actualStart + 1
          }-${sliceEnd} of ${originalLineCount} from ${relativePathForDisplay}`;
          if (linesWereTruncatedInLength) {
            returnDisplay += ' (some lines were shortened)';
          }
        } else if (linesWereTruncatedInLength) {
          returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`;
        }

        return {
          llmContent,
          returnDisplay,
          isTruncated,
          originalLineCount,
          linesShown: [actualStart + 1, sliceEnd],
        };
      }
      case 'image':
      case 'pdf':
      case 'audio':
      case 'video': {
        const contentBuffer = await fs.promises.readFile(filePath);
        const base64Data = contentBuffer.toString('base64');
        return {
          llmContent: {
            inlineData: {
              data: base64Data,
              mimeType: mime.getType(filePath) || 'application/octet-stream',
            },
          },
          returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
        };
      }
      default: {
        // Should not happen with current detectFileType logic
        const exhaustiveCheck: never = fileType;
        return {
          llmContent: `Unhandled file type: ${exhaustiveCheck}`,
          returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
          error: `Unhandled file type for ${filePath}`,
        };
      }
    }
  } catch (error) {
    const errorMessage = error instanceof Error ? error.message : String(error);
    const displayPath = path
      .relative(rootDirectory, filePath)
      .replace(/\\/g, '/');
    return {
      llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
      returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
      error: `Error reading file ${filePath}: ${errorMessage}`,
      errorType: ToolErrorType.READ_CONTENT_FAILURE,
    };
  }
}

export async function fileExists(filePath: string): Promise<boolean> {
  try {
    await fsPromises.access(filePath, fs.constants.F_OK);
    return true;
  } catch (_: unknown) {
    return false;
  }
}

/**
 * Sanitizes a string for use as a filename part by removing path traversal
 * characters and other non-alphanumeric characters.
 */
export function sanitizeFilenamePart(part: string): string {
  return part.replace(/[^a-zA-Z0-9_-]/g, '_');
}

/**
 * Formats a truncated message for tool output.
 * Shows the first 20% and last 80% of the allowed characters with a marker in between.
 */
export function formatTruncatedToolOutput(
  contentStr: string,
  outputFile: string,
  maxChars: number,
): string {
  if (contentStr.length <= maxChars) return contentStr;

  const headChars = Math.floor(maxChars * 0.2);
  const tailChars = maxChars - headChars;

  const head = contentStr.slice(0, headChars);
  const tail = contentStr.slice(-tailChars);
  const omittedChars = contentStr.length - headChars - tailChars;

  return `Output too large. Showing first ${headChars.toLocaleString()} and last ${tailChars.toLocaleString()} characters. For full output see: ${outputFile}
${head}

... [${omittedChars.toLocaleString()} characters omitted] ...

${tail}`;
}

/**
 * Saves tool output to a temporary file for later retrieval.
 */
export const TOOL_OUTPUTS_DIR = 'tool-outputs';

export async function saveTruncatedToolOutput(
  content: string,
  toolName: string,
  id: string | number, // Accept string (callId) or number (truncationId)
  projectTempDir: string,
  sessionId?: string,
): Promise<{ outputFile: string }> {
  const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
  const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
  const fileName = safeId.startsWith(safeToolName)
    ? `${safeId}.txt`
    : `${safeToolName}_${safeId}.txt`;

  let toolOutputDir = path.join(projectTempDir, TOOL_OUTPUTS_DIR);
  if (sessionId) {
    const safeSessionId = sanitizeFilenamePart(sessionId);
    toolOutputDir = path.join(toolOutputDir, `session-${safeSessionId}`);
  }
  const outputFile = path.join(toolOutputDir, fileName);

  await fsPromises.mkdir(toolOutputDir, { recursive: true });
  await fsPromises.writeFile(outputFile, content);

  return { outputFile };
}
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								/**
 								 * @license
 								 * Copyright 2025 Google LLC
 								 * SPDX-License-Identifier: Apache-2.0
 								 */
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								import fs from 'node:fs';
-												feat(core): Download ripgrep at runtime, if enabled. (#7818)


											
										
										
											2025-09-08 14:44:56 -07:00
+								import fsPromises from 'node:fs/promises';
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								import path from 'node:path';
-												Explict imports & exports with `type` modifier (#3774)


											
										
										
											2025-08-26 00:04:53 +02:00
+								import type { PartUnion } from '@google/genai';
-												Reduce bundle size & check it in CI (#7395)


											
										
										
											2025-09-04 23:00:27 +02:00
+								// eslint-disable-next-line import/no-internal-modules
 								import mime from 'mime/lite';
-												Explict imports & exports with `type` modifier (#3774)


											
										
										
											2025-08-26 00:04:53 +02:00
+								import type { FileSystemService } from '../services/fileSystemService.js';
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								import { ToolErrorType } from '../tools/tool-error.js';
-												feat(core): share file list patterns between glob and grep tools (#6359)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Arya Gummadi <aryagummadi@google.com>
											
										
										
											2025-08-23 13:35:00 +09:00
+								import { BINARY_EXTENSIONS } from './ignorePatterns.js';
-												Inline tree-sitter wasm and add runtime fallback (#11157)


											
										
										
											2025-10-16 17:25:30 -07:00
+								import { createRequire as createModuleRequire } from 'node:module';
-												refactor(logging): Centralize console logging with debugLogger (#11590)


											
										
										
											2025-10-21 16:35:22 -04:00
+								import { debugLogger } from './debugLogger.js';
-												Inline tree-sitter wasm and add runtime fallback (#11157)


											
										
										
											2025-10-16 17:25:30 -07:00
 								const requireModule = createModuleRequire(import.meta.url);
 								export async function readWasmBinaryFromDisk(
 								  specifier: string,
 								): Promise<Uint8Array> {
 								  const resolvedPath = requireModule.resolve(specifier);
 								  const buffer = await fsPromises.readFile(resolvedPath);
 								  return new Uint8Array(buffer);
 								}
 								export async function loadWasmBinary(
 								  dynamicImport: () => Promise<{ default: Uint8Array }>,
 								  fallbackSpecifier: string,
 								): Promise<Uint8Array> {
 								  try {
 								    const module = await dynamicImport();
 								    if (module?.default instanceof Uint8Array) {
 								      return module.default;
 								    }
 								  } catch (error) {
 								    try {
 								      return await readWasmBinaryFromDisk(fallbackSpecifier);
 								    } catch {
 								      throw error;
 								    }
 								  }
 								  try {
 								    return await readWasmBinaryFromDisk(fallbackSpecifier);
 								  } catch (error) {
 								    throw new Error('WASM binary module did not provide a Uint8Array export', {
 								      cause: error,
 								    });
 								  }
 								}
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
 								// Constants for text file processing
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								export const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								const MAX_LINE_LENGTH_TEXT_FILE = 2000;
 								// Default values for encoding and separator format
 								export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								// --- Unicode BOM detection & decoding helpers --------------------------------
 								type UnicodeEncoding = 'utf8' | 'utf16le' | 'utf16be' | 'utf32le' | 'utf32be';
 								interface BOMInfo {
 								  encoding: UnicodeEncoding;
 								  bomLength: number;
 								}
 								/**
 								 * Detect a Unicode BOM (Byte Order Mark) if present.
 								 * Reads up to the first 4 bytes and returns encoding + BOM length, else null.
 								 */
 								export function detectBOM(buf: Buffer): BOMInfo | null {
 								  if (buf.length >= 4) {
 								    // UTF-32 LE: FF FE 00 00
 								    if (
 								      buf[0] === 0xff &&
 								      buf[1] === 0xfe &&
 								      buf[2] === 0x00 &&
 								      buf[3] === 0x00
 								    ) {
 								      return { encoding: 'utf32le', bomLength: 4 };
 								    }
 								    // UTF-32 BE: 00 00 FE FF
 								    if (
 								      buf[0] === 0x00 &&
 								      buf[1] === 0x00 &&
 								      buf[2] === 0xfe &&
 								      buf[3] === 0xff
 								    ) {
 								      return { encoding: 'utf32be', bomLength: 4 };
 								    }
 								  }
 								  if (buf.length >= 3) {
 								    // UTF-8: EF BB BF
 								    if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
 								      return { encoding: 'utf8', bomLength: 3 };
 								    }
 								  }
 								  if (buf.length >= 2) {
 								    // UTF-16 LE: FF FE  (but not UTF-32 LE already matched above)
 								    if (
 								      buf[0] === 0xff &&
 								      buf[1] === 0xfe &&
 								      (buf.length < 4 || buf[2] !== 0x00 || buf[3] !== 0x00)
 								    ) {
 								      return { encoding: 'utf16le', bomLength: 2 };
 								    }
 								    // UTF-16 BE: FE FF
 								    if (buf[0] === 0xfe && buf[1] === 0xff) {
 								      return { encoding: 'utf16be', bomLength: 2 };
 								    }
 								  }
 								  return null;
 								}
 								/**
 								 * Convert a UTF-16 BE buffer to a JS string by swapping to LE then using Node's decoder.
 								 * (Node has 'utf16le' but not 'utf16be'.)
 								 */
 								function decodeUTF16BE(buf: Buffer): string {
 								  if (buf.length === 0) return '';
 								  const swapped = Buffer.from(buf); // swap16 mutates in place, so copy
 								  swapped.swap16();
 								  return swapped.toString('utf16le');
 								}
 								/**
 								 * Decode a UTF-32 buffer (LE or BE) into a JS string.
 								 * Invalid code points are replaced with U+FFFD, partial trailing bytes are ignored.
 								 */
 								function decodeUTF32(buf: Buffer, littleEndian: boolean): string {
 								  if (buf.length < 4) return '';
 								  const usable = buf.length - (buf.length % 4);
 								  let out = '';
 								  for (let i = 0; i < usable; i += 4) {
 								    const cp = littleEndian
 								      ? (buf[i] |
 								          (buf[i + 1] << 8) |
 								          (buf[i + 2] << 16) |
 								          (buf[i + 3] << 24)) >>>
 
 								      : (buf[i + 3] |
 								          (buf[i + 2] << 8) |
 								          (buf[i + 1] << 16) |
 								          (buf[i] << 24)) >>>
 ;
 								    // Valid planes: 0x0000..0x10FFFF excluding surrogates
 								    if (cp <= 0x10ffff && !(cp >= 0xd800 && cp <= 0xdfff)) {
 								      out += String.fromCodePoint(cp);
 								    } else {
 								      out += '\uFFFD';
 								    }
 								  }
 								  return out;
 								}
 								/**
 								 * Read a file as text, honoring BOM encodings (UTF‑8/16/32) and stripping the BOM.
 								 * Falls back to utf8 when no BOM is present.
 								 */
 								export async function readFileWithEncoding(filePath: string): Promise<string> {
 								  // Read the file once; detect BOM and decode from the single buffer.
 								  const full = await fs.promises.readFile(filePath);
 								  if (full.length === 0) return '';
 								  const bom = detectBOM(full);
 								  if (!bom) {
 								    // No BOM → treat as UTF‑8
 								    return full.toString('utf8');
 								  }
 								  // Strip BOM and decode per encoding
 								  const content = full.subarray(bom.bomLength);
 								  switch (bom.encoding) {
 								    case 'utf8':
 								      return content.toString('utf8');
 								    case 'utf16le':
 								      return content.toString('utf16le');
 								    case 'utf16be':
 								      return decodeUTF16BE(content);
 								    case 'utf32le':
 								      return decodeUTF32(content, true);
 								    case 'utf32be':
 								      return decodeUTF32(content, false);
 								    default:
 								      // Defensive fallback; should be unreachable
 								      return content.toString('utf8');
 								  }
 								}
-												Add file operation telemetry (#1068)

Introduces telemetry for file create, read, and update operations.

This change adds the `gemini_cli.file.operation.count` metric, recorded by the `read-file`, `read-many-files`, and `write-file` tools.

The metric includes the following attributes:
    - `operation` (string: `create`, `read`, `update`): The type of file operation.
    - `lines` (optional, Int): Number of lines in the file.
    - `mimetype` (optional, string): Mimetype of the file.
    - `extension` (optional, string): File extension of the file.

Here is a stacked bar chart of file operations by extension (`js`, `ts`, `md`):
![image](https://github.com/user-attachments/assets/3e8f8ea9-6155-4186-863c-075cc47647c5)

Here is a stacked bar chart of file operations by type (`create`, `read`, `update`):
![image](https://github.com/user-attachments/assets/3fcf491d-31d0-4ba8-80e6-7fd2bd9c7c27)

#750 

cc @allenhutchison as discussed 
											
										
										
											2025-06-15 16:24:53 -04:00
+								/**
 								 * Looks up the specific MIME type for a file path.
 								 * @param filePath Path to the file.
 								 * @returns The specific MIME type string (e.g., 'text/python', 'application/javascript') or undefined if not found or ambiguous.
 								 */
 								export function getSpecificMimeType(filePath: string): string | undefined {
-												Reduce bundle size & check it in CI (#7395)


											
										
										
											2025-09-04 23:00:27 +02:00
+								  const lookedUpMime = mime.getType(filePath);
-												Add file operation telemetry (#1068)

Introduces telemetry for file create, read, and update operations.

This change adds the `gemini_cli.file.operation.count` metric, recorded by the `read-file`, `read-many-files`, and `write-file` tools.

The metric includes the following attributes:
    - `operation` (string: `create`, `read`, `update`): The type of file operation.
    - `lines` (optional, Int): Number of lines in the file.
    - `mimetype` (optional, string): Mimetype of the file.
    - `extension` (optional, string): File extension of the file.

Here is a stacked bar chart of file operations by extension (`js`, `ts`, `md`):
![image](https://github.com/user-attachments/assets/3e8f8ea9-6155-4186-863c-075cc47647c5)

Here is a stacked bar chart of file operations by type (`create`, `read`, `update`):
![image](https://github.com/user-attachments/assets/3fcf491d-31d0-4ba8-80e6-7fd2bd9c7c27)

#750 

cc @allenhutchison as discussed 
											
										
										
											2025-06-15 16:24:53 -04:00
+								  return typeof lookedUpMime === 'string' ? lookedUpMime : undefined;
 								}
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								/**
 								 * Checks if a path is within a given root directory.
 								 * @param pathToCheck The absolute path to check.
 								 * @param rootDirectory The absolute root directory.
 								 * @returns True if the path is within the root directory, false otherwise.
 								 */
 								export function isWithinRoot(
 								  pathToCheck: string,
 								  rootDirectory: string,
 								): boolean {
-												Pure refactor: Consolidate isWithinRoot() function calling. (#4163)


											
										
										
											2025-07-14 22:55:49 -07:00
+								  const normalizedPathToCheck = path.resolve(pathToCheck);
 								  const normalizedRootDirectory = path.resolve(rootDirectory);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
 								  // Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
 								  // unless it's the root path itself (e.g., '/' or 'C:\').
 								  const rootWithSeparator =
 								    normalizedRootDirectory === path.sep ||
 								    normalizedRootDirectory.endsWith(path.sep)
 								      ? normalizedRootDirectory
 								      : normalizedRootDirectory + path.sep;
 								  return (
 								    normalizedPathToCheck === normalizedRootDirectory ||
 								    normalizedPathToCheck.startsWith(rootWithSeparator)
 								  );
 								}
-												feat(plan): add core logic and `exit_plan_mode` tool definition (#18110)


											
										
										
											2026-02-02 22:30:03 -05:00
+								/**
 								 * Safely resolves a path to its real path if it exists, otherwise returns the absolute resolved path.
 								 */
 								export function getRealPath(filePath: string): string {
 								  try {
 								    return fs.realpathSync(filePath);
 								  } catch {
 								    return path.resolve(filePath);
 								  }
 								}
 								/**
 								 * Checks if a file's content is empty or contains only whitespace.
 								 * Efficiently checks file size first, and only samples the beginning of the file.
 								 * Honors Unicode BOM encodings.
 								 */
 								export async function isEmpty(filePath: string): Promise<boolean> {
 								  try {
 								    const stats = await fsPromises.stat(filePath);
 								    if (stats.size === 0) return true;
 								    // Sample up to 1KB to check for non-whitespace content.
 								    // If a file is larger than 1KB and contains only whitespace,
 								    // it's an extreme edge case we can afford to read slightly more of if needed,
 								    // but for most valid plans/files, this is sufficient.
 								    const fd = await fsPromises.open(filePath, 'r');
 								    try {
 								      const { buffer } = await fd.read({
 								        buffer: Buffer.alloc(Math.min(1024, stats.size)),
 								        offset: 0,
 								        length: Math.min(1024, stats.size),
 								        position: 0,
 								      });
 								      const bom = detectBOM(buffer);
 								      const content = bom
 								        ? buffer.subarray(bom.bomLength).toString('utf8')
 								        : buffer.toString('utf8');
 								      return content.trim().length === 0;
 								    } finally {
 								      await fd.close();
 								    }
 								  } catch {
 								    // If file is unreadable, we treat it as empty/invalid for validation purposes
 								    return true;
 								  }
 								}
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								/**
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								 * Heuristic: determine if a file is likely binary.
 								 * Now BOM-aware: if a Unicode BOM is detected, we treat it as text.
 								 * For non-BOM files, retain the existing null-byte and non-printable ratio checks.
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								 */
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								export async function isBinaryFile(filePath: string): Promise<boolean> {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								  let fh: fs.promises.FileHandle | null = null;
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  try {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								    fh = await fs.promises.open(filePath, 'r');
 								    const stats = await fh.stat();
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								    const fileSize = stats.size;
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								    if (fileSize === 0) return false; // empty is not binary
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								    // Sample up to 4KB from the head (previous behavior)
 								    const sampleSize = Math.min(4096, fileSize);
 								    const buf = Buffer.alloc(sampleSize);
 								    const { bytesRead } = await fh.read(buf, 0, sampleSize, 0);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    if (bytesRead === 0) return false;
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								    // BOM → text (avoid false positives for UTF‑16/32 with nulls)
 								    const bom = detectBOM(buf.subarray(0, Math.min(4, bytesRead)));
 								    if (bom) return false;
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    let nonPrintableCount = 0;
 								    for (let i = 0; i < bytesRead; i++) {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								      if (buf[i] === 0) return true; // strong indicator of binary when no BOM
 								      if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) {
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        nonPrintableCount++;
 								      }
 								    }
 								    // If >30% non-printable characters, consider it binary
 								    return nonPrintableCount / bytesRead > 0.3;
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								  } catch (error) {
-												refactor(logging): Centralize console logging with debugLogger (#11590)


											
										
										
											2025-10-21 16:35:22 -04:00
+								    debugLogger.warn(
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								      `Failed to check if file is binary: ${filePath}`,
 								      error instanceof Error ? error.message : String(error),
 								    );
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    return false;
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								  } finally {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								    if (fh) {
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								      try {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								        await fh.close();
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								      } catch (closeError) {
-												refactor(logging): Centralize console logging with debugLogger (#11590)


											
										
										
											2025-10-21 16:35:22 -04:00
+								        debugLogger.warn(
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								          `Failed to close file handle for: ${filePath}`,
 								          closeError instanceof Error ? closeError.message : String(closeError),
 								        );
 								      }
 								    }
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  }
 								}
 								/**
 								 * Detects the type of file based on extension and content.
 								 * @param filePath Path to the file.
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								 * @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								 */
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								export async function detectFileType(
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  filePath: string,
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  const ext = path.extname(filePath).toLowerCase();
-												fix(core): Treat .mts files as TypeScript modules instead of video files (#5492)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2025-08-05 23:15:53 -07:00
+								  // The mimetype for various TypeScript extensions (ts, mts, cts, tsx) can be
 								  // MPEG transport stream (a video format), but we want to assume these are
 								  // TypeScript files instead.
 								  if (['.ts', '.mts', '.cts'].includes(ext)) {
-												Special case mime type for ts file. (#2902)


											
										
										
											2025-07-01 16:05:33 -07:00
+								    return 'text';
 								  }
-												feat: add .svg support (#3229)


											
										
										
											2025-07-07 11:21:32 +05:30
+								  if (ext === '.svg') {
 								    return 'svg';
 								  }
-												Reduce bundle size & check it in CI (#7395)


											
										
										
											2025-09-04 23:00:27 +02:00
+								  const lookedUpMimeType = mime.getType(filePath); // Returns null if not found, or the mime type string
-												feat: add audio and video support to read_file (#2556)


											
										
										
											2025-07-02 00:52:32 +05:30
+								  if (lookedUpMimeType) {
 								    if (lookedUpMimeType.startsWith('image/')) {
 								      return 'image';
 								    }
-												fix: verify audio/video MIME types with content check (#16907)


											
										
										
											2026-01-28 23:58:39 +09:00
+								    // Verify audio/video with content check to avoid MIME misidentification (#16888)
 								    if (
 								      lookedUpMimeType.startsWith('audio/') ||
 								      lookedUpMimeType.startsWith('video/')
 								    ) {
 								      if (!(await isBinaryFile(filePath))) {
 								        return 'text';
 								      }
 								      return lookedUpMimeType.startsWith('audio/') ? 'audio' : 'video';
-												feat: add audio and video support to read_file (#2556)


											
										
										
											2025-07-02 00:52:32 +05:30
+								    }
 								    if (lookedUpMimeType === 'application/pdf') {
 								      return 'pdf';
 								    }
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  }
 								  // Stricter binary check for common non-text extensions before content check
 								  // These are often not well-covered by mime-types or might be misidentified.
-												feat(core): share file list patterns between glob and grep tools (#6359)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Arya Gummadi <aryagummadi@google.com>
											
										
										
											2025-08-23 13:35:00 +09:00
+								  if (BINARY_EXTENSIONS.includes(ext)) {
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    return 'binary';
 								  }
-												Various spelling improvements (#3497)

Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Co-authored-by: Sandy Tao <sandytao520@icloud.com>
											
										
										
											2025-07-21 17:54:44 -04:00
+								  // Fall back to content-based check if mime type wasn't conclusive for image/pdf
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  // and it's not a known binary extension.
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								  if (await isBinaryFile(filePath)) {
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    return 'binary';
 								  }
 								  return 'text';
 								}
 								export interface ProcessedFileReadResult {
 								  llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
 								  returnDisplay: string;
 								  error?: string; // Optional error message for the LLM if file processing failed
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								  errorType?: ToolErrorType; // Structured error type
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								  isTruncated?: boolean; // For text files, indicates if content was truncated
 								  originalLineCount?: number; // For text files
 								  linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
 								}
 								/**
 								 * Reads and processes a single file, handling text, images, and PDFs.
 								 * @param filePath Absolute path to the file.
 								 * @param rootDirectory Absolute path to the project root for relative path display.
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								 * @param _fileSystemService Currently unused in this function; kept for signature stability.
 								 * @param startLine Optional 1-based line number to start reading from.
 								 * @param endLine Optional 1-based line number to end reading at (inclusive).
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								 * @returns ProcessedFileReadResult object.
 								 */
 								export async function processSingleFileContent(
 								  filePath: string,
 								  rootDirectory: string,
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								  _fileSystemService: FileSystemService,
 								  startLine?: number,
 								  endLine?: number,
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								): Promise<ProcessedFileReadResult> {
 								  try {
 								    if (!fs.existsSync(filePath)) {
 								      // Sync check is acceptable before async read
 								      return {
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								        llmContent:
 								          'Could not read file because no file was found at the specified path.',
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        returnDisplay: 'File not found.',
 								        error: `File not found: ${filePath}`,
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								        errorType: ToolErrorType.FILE_NOT_FOUND,
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								      };
 								    }
-												feat: add audio and video support to read_file (#2556)


											
										
										
											2025-07-02 00:52:32 +05:30
+								    const stats = await fs.promises.stat(filePath);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    if (stats.isDirectory()) {
 								      return {
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								        llmContent:
 								          'Could not read file because the provided path is a directory, not a file.',
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        returnDisplay: 'Path is a directory.',
 								        error: `Path is a directory, not a file: ${filePath}`,
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								        errorType: ToolErrorType.TARGET_IS_DIRECTORY,
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								      };
 								    }
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								    const fileSizeInMB = stats.size / (1024 * 1024);
 								    if (fileSizeInMB > 20) {
 								      return {
 								        llmContent: 'File size exceeds the 20MB limit.',
 								        returnDisplay: 'File size exceeds the 20MB limit.',
 								        error: `File size exceeds the 20MB limit: ${filePath} (${fileSizeInMB.toFixed(2)}MB)`,
 								        errorType: ToolErrorType.FILE_TOO_LARGE,
 								      };
-												feat: add audio and video support to read_file (#2556)


											
										
										
											2025-07-02 00:52:32 +05:30
+								    }
-												feat: Make file type detection and binary checks asynchronous (#3286) (#3288)


											
										
										
											2025-07-21 08:16:42 +09:00
+								    const fileType = await detectFileType(filePath);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    const relativePathForDisplay = path
 								      .relative(rootDirectory, filePath)
 								      .replace(/\\/g, '/');
 								    switch (fileType) {
 								      case 'binary': {
 								        return {
 								          llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
 								          returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
 								        };
 								      }
-												feat: add .svg support (#3229)


											
										
										
											2025-07-07 11:21:32 +05:30
+								      case 'svg': {
 								        const SVG_MAX_SIZE_BYTES = 1 * 1024 * 1024;
 								        if (stats.size > SVG_MAX_SIZE_BYTES) {
 								          return {
 								            llmContent: `Cannot display content of SVG file larger than 1MB: ${relativePathForDisplay}`,
 								            returnDisplay: `Skipped large SVG file (>1MB): ${relativePathForDisplay}`,
 								          };
 								        }
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								        const content = await readFileWithEncoding(filePath);
-												feat: add .svg support (#3229)


											
										
										
											2025-07-07 11:21:32 +05:30
+								        return {
 								          llmContent: content,
 								          returnDisplay: `Read SVG as text: ${relativePathForDisplay}`,
 								        };
 								      }
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								      case 'text': {
-												fix(core): treat UTF16/32 BOM files as text and decode correctly (#6081)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
Co-authored-by: jacob314 <jacob314@gmail.com>
											
										
										
											2025-08-28 21:13:46 -04:00
+								        // Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently
 								        const content = await readFileWithEncoding(filePath);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        const lines = content.split('\n');
 								        const originalLineCount = lines.length;
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								        let sliceStart = 0;
 								        let sliceEnd = originalLineCount;
 								        if (startLine !== undefined || endLine !== undefined) {
 								          sliceStart = startLine ? startLine - 1 : 0;
 								          sliceEnd = endLine
 								            ? Math.min(endLine, originalLineCount)
 								            : Math.min(
 								                sliceStart + DEFAULT_MAX_LINES_TEXT_FILE,
 								                originalLineCount,
 								              );
 								        } else {
 								          sliceEnd = Math.min(DEFAULT_MAX_LINES_TEXT_FILE, originalLineCount);
 								        }
 								        // Ensure selectedLines doesn't try to slice beyond array bounds
 								        const actualStart = Math.min(sliceStart, originalLineCount);
 								        const selectedLines = lines.slice(actualStart, sliceEnd);
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
 								        let linesWereTruncatedInLength = false;
 								        const formattedLines = selectedLines.map((line) => {
 								          if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
 								            linesWereTruncatedInLength = true;
 								            return (
 								              line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
 								            );
 								          }
 								          return line;
 								        });
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								        const isTruncated =
 								          actualStart > 0 ||
 								          sliceEnd < originalLineCount ||
 								          linesWereTruncatedInLength;
-												bug(core): Prompt engineering for truncated read_file. (#5161)


											
										
										
											2025-08-06 13:52:04 -07:00
+								        const llmContent = formattedLines.join('\n');
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
-												bug(core): UI reporting for truncated read_file. (#5155)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2025-07-31 09:31:14 -07:00
+								        // By default, return nothing to streamline the common case of a successful read_file.
 								        let returnDisplay = '';
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								        if (actualStart > 0 || sliceEnd < originalLineCount) {
-												bug(core): UI reporting for truncated read_file. (#5155)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2025-07-31 09:31:14 -07:00
+								          returnDisplay = `Read lines ${
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								            actualStart + 1
 								          }-${sliceEnd} of ${originalLineCount} from ${relativePathForDisplay}`;
-												bug(core): UI reporting for truncated read_file. (#5155)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2025-07-31 09:31:14 -07:00
+								          if (linesWereTruncatedInLength) {
 								            returnDisplay += ' (some lines were shortened)';
 								          }
 								        } else if (linesWereTruncatedInLength) {
 								          returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`;
 								        }
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        return {
-												bug(core): Prompt engineering for truncated read_file. (#5161)


											
										
										
											2025-08-06 13:52:04 -07:00
+								          llmContent,
-												bug(core): UI reporting for truncated read_file. (#5155)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
											
										
										
											2025-07-31 09:31:14 -07:00
+								          returnDisplay,
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								          isTruncated,
 								          originalLineCount,
-												feat(core): migrate read_file to 1-based start_line/end_line parameters (#19526)


											
										
										
											2026-02-20 17:59:18 -05:00
+								          linesShown: [actualStart + 1, sliceEnd],
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        };
 								      }
 								      case 'image':
-												feat: add audio and video support to read_file (#2556)


											
										
										
											2025-07-02 00:52:32 +05:30
+								      case 'pdf':
 								      case 'audio':
 								      case 'video': {
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								        const contentBuffer = await fs.promises.readFile(filePath);
 								        const base64Data = contentBuffer.toString('base64');
 								        return {
 								          llmContent: {
 								            inlineData: {
 								              data: base64Data,
-												Reduce bundle size & check it in CI (#7395)


											
										
										
											2025-09-04 23:00:27 +02:00
+								              mimeType: mime.getType(filePath) || 'application/octet-stream',
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								            },
 								          },
 								          returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
 								        };
 								      }
 								      default: {
 								        // Should not happen with current detectFileType logic
 								        const exhaustiveCheck: never = fileType;
 								        return {
 								          llmContent: `Unhandled file type: ${exhaustiveCheck}`,
 								          returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
 								          error: `Unhandled file type for ${filePath}`,
 								        };
 								      }
 								    }
 								  } catch (error) {
 								    const errorMessage = error instanceof Error ? error.message : String(error);
 								    const displayPath = path
 								      .relative(rootDirectory, filePath)
 								      .replace(/\\/g, '/');
 								    return {
 								      llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
 								      returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
 								      error: `Error reading file ${filePath}: ${errorMessage}`,
-												Remove unnecessary FileErrorType. (#6697)


											
										
										
											2025-08-20 16:13:29 -07:00
+								      errorType: ToolErrorType.READ_CONTENT_FAILURE,
-												Refactor read-file and support images. (#480)


											
										
										
											2025-05-29 22:30:18 +00:00
+								    };
 								  }
 								}
-												feat(core): Download ripgrep at runtime, if enabled. (#7818)


											
										
										
											2025-09-08 14:44:56 -07:00
 								export async function fileExists(filePath: string): Promise<boolean> {
 								  try {
 								    await fsPromises.access(filePath, fs.constants.F_OK);
 								    return true;
 								  } catch (_: unknown) {
 								    return false;
 								  }
 								}
-												refactor(core): extract static concerns from CoreToolScheduler (#15589)


											
										
										
											2025-12-26 15:51:39 -05:00
-												feat(context): implement observation masking for tool outputs (#18389)


											
										
										
											2026-02-05 20:53:11 -05:00
+								/**
 								 * Sanitizes a string for use as a filename part by removing path traversal
 								 * characters and other non-alphanumeric characters.
 								 */
 								export function sanitizeFilenamePart(part: string): string {
 								  return part.replace(/[^a-zA-Z0-9_-]/g, '_');
 								}
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								/**
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								 * Formats a truncated message for tool output.
 								 * Shows the first 20% and last 80% of the allowed characters with a marker in between.
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								 */
 								export function formatTruncatedToolOutput(
 								  contentStr: string,
 								  outputFile: string,
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								  maxChars: number,
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								): string {
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								  if (contentStr.length <= maxChars) return contentStr;
-												refactor(core): extract static concerns from CoreToolScheduler (#15589)


											
										
										
											2025-12-26 15:51:39 -05:00
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								  const headChars = Math.floor(maxChars * 0.2);
 								  const tailChars = maxChars - headChars;
 								  const head = contentStr.slice(0, headChars);
 								  const tail = contentStr.slice(-tailChars);
 								  const omittedChars = contentStr.length - headChars - tailChars;
 								  return `Output too large. Showing first ${headChars.toLocaleString()} and last ${tailChars.toLocaleString()} characters. For full output see: ${outputFile}
 								${head}
 								... [${omittedChars.toLocaleString()} characters omitted] ...
 								${tail}`;
-												refactor(core): extract static concerns from CoreToolScheduler (#15589)


											
										
										
											2025-12-26 15:51:39 -05:00
+								}
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
 								/**
 								 * Saves tool output to a temporary file for later retrieval.
 								 */
-												feat(core,cli): implement session-linked tool output storage and cleanup (#18416)


											
										
										
											2026-02-06 01:36:42 -05:00
+								export const TOOL_OUTPUTS_DIR = 'tool-outputs';
-												feat(core): Isolate and cleanup truncated tool outputs (#17594)


											
										
										
											2026-01-29 15:20:11 -08:00
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								export async function saveTruncatedToolOutput(
 								  content: string,
 								  toolName: string,
 								  id: string | number, // Accept string (callId) or number (truncationId)
 								  projectTempDir: string,
-												feat(core,cli): implement session-linked tool output storage and cleanup (#18416)


											
										
										
											2026-02-06 01:36:42 -05:00
+								  sessionId?: string,
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								): Promise<{ outputFile: string }> {
-												feat(context): implement observation masking for tool outputs (#18389)


											
										
										
											2026-02-05 20:53:11 -05:00
+								  const safeToolName = sanitizeFilenamePart(toolName).toLowerCase();
 								  const safeId = sanitizeFilenamePart(id.toString()).toLowerCase();
-												fix: shorten tool call IDs and fix duplicate tool name in truncated output filenames (#18600)


											
										
										
											2026-02-09 09:09:17 -08:00
+								  const fileName = safeId.startsWith(safeToolName)
 								    ? `${safeId}.txt`
 								    : `${safeToolName}_${safeId}.txt`;
-												feat(core,cli): implement session-linked tool output storage and cleanup (#18416)


											
										
										
											2026-02-06 01:36:42 -05:00
 								  let toolOutputDir = path.join(projectTempDir, TOOL_OUTPUTS_DIR);
 								  if (sessionId) {
 								    const safeSessionId = sanitizeFilenamePart(sessionId);
 								    toolOutputDir = path.join(toolOutputDir, `session-${safeSessionId}`);
 								  }
-												feat(core): Isolate and cleanup truncated tool outputs (#17594)


											
										
										
											2026-01-29 15:20:11 -08:00
+								  const outputFile = path.join(toolOutputDir, fileName);
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
-												feat(core): Isolate and cleanup truncated tool outputs (#17594)


											
										
										
											2026-01-29 15:20:11 -08:00
+								  await fsPromises.mkdir(toolOutputDir, { recursive: true });
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								  await fsPromises.writeFile(outputFile, content);
-												refactor: simplify tool output truncation to single config (#18446)


											
										
										
											2026-02-06 13:41:19 -08:00
+								  return { outputFile };
-												refactor(core): foundational truncation refactoring and token estimation optimization (#16824)


											
										
										
											2026-01-16 15:57:47 -08:00
+								}