Refactor read-file and support images. (#480)

2026-04-26 13:04:49 -07:00 · 2025-05-29 22:30:18 +00:00
parent f21abdd1f0
commit dab7517622
13 changed files with 1475 additions and 260 deletions
@@ -0,0 +1,228 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { vi, describe, it, expect, beforeEach, afterEach, Mock } from 'vitest';
+import { ReadFileTool, ReadFileToolParams } from './read-file.js';
+import * as fileUtils from '../utils/fileUtils.js';
+import path from 'path';
+import os from 'os';
+import fs from 'fs'; // For actual fs operations in setup
+
+// Mock fileUtils.processSingleFileContent
+vi.mock('../utils/fileUtils', async () => {
+  const actualFileUtils =
+    await vi.importActual<typeof fileUtils>('../utils/fileUtils');
+  return {
+    ...actualFileUtils, // Spread actual implementations
+    processSingleFileContent: vi.fn(), // Mock specific function
+  };
+});
+
+const mockProcessSingleFileContent = fileUtils.processSingleFileContent as Mock;
+
+describe('ReadFileTool', () => {
+  let tempRootDir: string;
+  let tool: ReadFileTool;
+  const abortSignal = new AbortController().signal;
+
+  beforeEach(() => {
+    // Create a unique temporary root directory for each test run
+    tempRootDir = fs.mkdtempSync(
+      path.join(os.tmpdir(), 'read-file-tool-root-'),
+    );
+    tool = new ReadFileTool(tempRootDir);
+    mockProcessSingleFileContent.mockReset();
+  });
+
+  afterEach(() => {
+    // Clean up the temporary root directory
+    if (fs.existsSync(tempRootDir)) {
+      fs.rmSync(tempRootDir, { recursive: true, force: true });
+    }
+  });
+
+  describe('validateToolParams', () => {
+    it('should return null for valid params (absolute path within root)', () => {
+      const params: ReadFileToolParams = {
+        path: path.join(tempRootDir, 'test.txt'),
+      };
+      expect(tool.validateToolParams(params)).toBeNull();
+    });
+
+    it('should return null for valid params with offset and limit', () => {
+      const params: ReadFileToolParams = {
+        path: path.join(tempRootDir, 'test.txt'),
+        offset: 0,
+        limit: 10,
+      };
+      expect(tool.validateToolParams(params)).toBeNull();
+    });
+
+    it('should return error for relative path', () => {
+      const params: ReadFileToolParams = { path: 'test.txt' };
+      expect(tool.validateToolParams(params)).toMatch(
+        /File path must be absolute/,
+      );
+    });
+
+    it('should return error for path outside root', () => {
+      const outsidePath = path.resolve(os.tmpdir(), 'outside-root.txt');
+      const params: ReadFileToolParams = { path: outsidePath };
+      expect(tool.validateToolParams(params)).toMatch(
+        /File path must be within the root directory/,
+      );
+    });
+
+    it('should return error for negative offset', () => {
+      const params: ReadFileToolParams = {
+        path: path.join(tempRootDir, 'test.txt'),
+        offset: -1,
+        limit: 10,
+      };
+      expect(tool.validateToolParams(params)).toBe(
+        'Offset must be a non-negative number',
+      );
+    });
+
+    it('should return error for non-positive limit', () => {
+      const paramsZero: ReadFileToolParams = {
+        path: path.join(tempRootDir, 'test.txt'),
+        offset: 0,
+        limit: 0,
+      };
+      expect(tool.validateToolParams(paramsZero)).toBe(
+        'Limit must be a positive number',
+      );
+      const paramsNegative: ReadFileToolParams = {
+        path: path.join(tempRootDir, 'test.txt'),
+        offset: 0,
+        limit: -5,
+      };
+      expect(tool.validateToolParams(paramsNegative)).toBe(
+        'Limit must be a positive number',
+      );
+    });
+
+    it('should return error for schema validation failure (e.g. missing path)', () => {
+      const params = { offset: 0 } as unknown as ReadFileToolParams;
+      expect(tool.validateToolParams(params)).toBe(
+        'Parameters failed schema validation.',
+      );
+    });
+  });
+
+  describe('getDescription', () => {
+    it('should return a shortened, relative path', () => {
+      const filePath = path.join(tempRootDir, 'sub', 'dir', 'file.txt');
+      const params: ReadFileToolParams = { path: filePath };
+      // Assuming tempRootDir is something like /tmp/read-file-tool-root-XXXXXX
+      // The relative path would be sub/dir/file.txt
+      expect(tool.getDescription(params)).toBe('sub/dir/file.txt');
+    });
+
+    it('should return . if path is the root directory', () => {
+      const params: ReadFileToolParams = { path: tempRootDir };
+      expect(tool.getDescription(params)).toBe('.');
+    });
+  });
+
+  describe('execute', () => {
+    it('should return validation error if params are invalid', async () => {
+      const params: ReadFileToolParams = { path: 'relative/path.txt' };
+      const result = await tool.execute(params, abortSignal);
+      expect(result.llmContent).toMatch(/Error: Invalid parameters provided/);
+      expect(result.returnDisplay).toMatch(/File path must be absolute/);
+    });
+
+    it('should return error from processSingleFileContent if it fails', async () => {
+      const filePath = path.join(tempRootDir, 'error.txt');
+      const params: ReadFileToolParams = { path: filePath };
+      const errorMessage = 'Simulated read error';
+      mockProcessSingleFileContent.mockResolvedValue({
+        llmContent: `Error reading file ${filePath}: ${errorMessage}`,
+        returnDisplay: `Error reading file ${filePath}: ${errorMessage}`,
+        error: errorMessage,
+      });
+
+      const result = await tool.execute(params, abortSignal);
+      expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
+        filePath,
+        tempRootDir,
+        undefined,
+        undefined,
+      );
+      expect(result.llmContent).toContain(errorMessage);
+      expect(result.returnDisplay).toContain(errorMessage);
+    });
+
+    it('should return success result for a text file', async () => {
+      const filePath = path.join(tempRootDir, 'textfile.txt');
+      const fileContent = 'This is a test file.';
+      const params: ReadFileToolParams = { path: filePath };
+      mockProcessSingleFileContent.mockResolvedValue({
+        llmContent: fileContent,
+        returnDisplay: `Read text file: ${path.basename(filePath)}`,
+      });
+
+      const result = await tool.execute(params, abortSignal);
+      expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
+        filePath,
+        tempRootDir,
+        undefined,
+        undefined,
+      );
+      expect(result.llmContent).toBe(fileContent);
+      expect(result.returnDisplay).toBe(
+        `Read text file: ${path.basename(filePath)}`,
+      );
+    });
+
+    it('should return success result for an image file', async () => {
+      const filePath = path.join(tempRootDir, 'image.png');
+      const imageData = {
+        inlineData: { mimeType: 'image/png', data: 'base64...' },
+      };
+      const params: ReadFileToolParams = { path: filePath };
+      mockProcessSingleFileContent.mockResolvedValue({
+        llmContent: imageData,
+        returnDisplay: `Read image file: ${path.basename(filePath)}`,
+      });
+
+      const result = await tool.execute(params, abortSignal);
+      expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
+        filePath,
+        tempRootDir,
+        undefined,
+        undefined,
+      );
+      expect(result.llmContent).toEqual(imageData);
+      expect(result.returnDisplay).toBe(
+        `Read image file: ${path.basename(filePath)}`,
+      );
+    });
+
+    it('should pass offset and limit to processSingleFileContent', async () => {
+      const filePath = path.join(tempRootDir, 'paginated.txt');
+      const params: ReadFileToolParams = {
+        path: filePath,
+        offset: 10,
+        limit: 5,
+      };
+      mockProcessSingleFileContent.mockResolvedValue({
+        llmContent: 'some lines',
+        returnDisplay: 'Read text file (paginated)',
+      });
+
+      await tool.execute(params, abortSignal);
+      expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
+        filePath,
+        tempRootDir,
+        10,
+        5,
+      );
+    });
+  });
+});
@@ -4,11 +4,11 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import fs from 'fs';
 import path from 'path';
 import { SchemaValidator } from '../utils/schemaValidator.js';
 import { makeRelative, shortenPath } from '../utils/paths.js';
 import { BaseTool, ToolResult } from './tools.js';
+import { isWithinRoot, processSingleFileContent } from '../utils/fileUtils.js';

 /**
 * Parameters for the ReadFile tool
@@ -35,14 +35,12 @@ export interface ReadFileToolParams {
 */
 export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
  static readonly Name: string = 'read_file';
-  private static readonly DEFAULT_MAX_LINES = 2000;
-  private static readonly MAX_LINE_LENGTH = 2000;

  constructor(private rootDirectory: string) {
    super(
      ReadFileTool.Name,
      'ReadFile',
-      'Reads and returns the content of a specified file from the local filesystem. Handles large files by allowing reading specific line ranges.',
+      'Reads and returns the content of a specified file from the local filesystem. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.',
      {
        properties: {
          path: {
@@ -52,12 +50,12 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
          },
          offset: {
            description:
-              "Optional: The 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
+              "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
            type: 'number',
          },
          limit: {
            description:
-              "Optional: Maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible).",
+              "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
            type: 'number',
          },
        },
@@ -68,28 +66,6 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
    this.rootDirectory = path.resolve(rootDirectory);
  }

-  /**
-   * Checks if a path is within the root directory
-   * @param pathToCheck The path to check
-   * @returns True if the path is within the root directory, false otherwise
-   */
-  private isWithinRoot(pathToCheck: string): boolean {
-    const normalizedPath = path.normalize(pathToCheck);
-    const normalizedRoot = path.normalize(this.rootDirectory);
-    const rootWithSep = normalizedRoot.endsWith(path.sep)
-      ? normalizedRoot
-      : normalizedRoot + path.sep;
-    return (
-      normalizedPath === normalizedRoot ||
-      normalizedPath.startsWith(rootWithSep)
-    );
-  }
-
-  /**
-   * Validates the parameters for the ReadFile tool
-   * @param params Parameters to validate
-   * @returns True if parameters are valid, false otherwise
-   */
  validateToolParams(params: ReadFileToolParams): string | null {
    if (
      this.schema.parameters &&
@@ -104,7 +80,7 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
    if (!path.isAbsolute(filePath)) {
      return `File path must be absolute: ${filePath}`;
    }
-    if (!this.isWithinRoot(filePath)) {
+    if (!isWithinRoot(filePath, this.rootDirectory)) {
      return `File path must be within the root directory (${this.rootDirectory}): ${filePath}`;
    }
    if (params.offset !== undefined && params.offset < 0) {
@@ -116,83 +92,11 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
    return null;
  }

-  /**
-   * Determines if a file is likely binary based on content sampling
-   * @param filePath Path to the file
-   * @returns True if the file appears to be binary
-   */
-  private isBinaryFile(filePath: string): boolean {
-    try {
-      // Read the first 4KB of the file
-      const fd = fs.openSync(filePath, 'r');
-      const buffer = Buffer.alloc(4096);
-      const bytesRead = fs.readSync(fd, buffer, 0, 4096, 0);
-      fs.closeSync(fd);
-
-      // Check for null bytes or high concentration of non-printable characters
-      let nonPrintableCount = 0;
-      for (let i = 0; i < bytesRead; i++) {
-        // Null byte is a strong indicator of binary data
-        if (buffer[i] === 0) {
-          return true;
-        }
-
-        // Count non-printable characters
-        if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
-          nonPrintableCount++;
-        }
-      }
-
-      // If more than 30% are non-printable, likely binary
-      return nonPrintableCount / bytesRead > 0.3;
-    } catch {
-      return false;
-    }
-  }
-
-  /**
-   * Detects the type of file based on extension and content
-   * @param filePath Path to the file
-   * @returns File type description
-   */
-  private detectFileType(filePath: string): string {
-    const ext = path.extname(filePath).toLowerCase();
-
-    // Common image formats
-    if (
-      ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'].includes(ext)
-    ) {
-      return 'image';
-    }
-
-    // Other known binary formats
-    if (['.pdf', '.zip', '.tar', '.gz', '.exe', '.dll', '.so'].includes(ext)) {
-      return 'binary';
-    }
-
-    // Check content for binary indicators
-    if (this.isBinaryFile(filePath)) {
-      return 'binary';
-    }
-
-    return 'text';
-  }
-
-  /**
-   * Gets a description of the file reading operation
-   * @param params Parameters for the file reading
-   * @returns A string describing the file being read
-   */
  getDescription(params: ReadFileToolParams): string {
    const relativePath = makeRelative(params.path, this.rootDirectory);
    return shortenPath(relativePath);
  }

-  /**
-   * Reads a file and returns its contents with line numbers
-   * @param params Parameters for the file reading
-   * @returns Result with file contents
-   */
  async execute(
    params: ReadFileToolParams,
    _signal: AbortSignal,
@@ -205,75 +109,23 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
      };
    }

-    const filePath = params.path;
-    try {
-      if (!fs.existsSync(filePath)) {
-        return {
-          llmContent: `File not found: ${filePath}`,
-          returnDisplay: `File not found.`,
-        };
-      }
+    const result = await processSingleFileContent(
+      params.path,
+      this.rootDirectory,
+      params.offset,
+      params.limit,
+    );

-      const stats = fs.statSync(filePath);
-      if (stats.isDirectory()) {
-        return {
-          llmContent: `Path is a directory, not a file: ${filePath}`,
-          returnDisplay: `File is directory.`,
-        };
-      }
-
-      const fileType = this.detectFileType(filePath);
-      if (fileType !== 'text') {
-        return {
-          llmContent: `Binary file: ${filePath} (${fileType})`,
-          // For binary files, maybe returnDisplay should be empty or indicate binary?
-          // Keeping it empty for now.
-          returnDisplay: ``,
-        };
-      }
-
-      const content = fs.readFileSync(filePath, 'utf8');
-      const lines = content.split('\n');
-
-      const startLine = params.offset || 0;
-      const endLine = params.limit
-        ? startLine + params.limit
-        : Math.min(startLine + ReadFileTool.DEFAULT_MAX_LINES, lines.length);
-      const selectedLines = lines.slice(startLine, endLine);
-
-      let truncated = false;
-      const formattedLines = selectedLines.map((line) => {
-        let processedLine = line;
-        if (line.length > ReadFileTool.MAX_LINE_LENGTH) {
-          processedLine =
-            line.substring(0, ReadFileTool.MAX_LINE_LENGTH) + '... [truncated]';
-          truncated = true;
-        }
-
-        return processedLine;
-      });
-
-      const contentTruncated = endLine < lines.length || truncated;
-
-      let llmContent = '';
-      if (contentTruncated) {
-        llmContent += `[File truncated: showing lines ${startLine + 1}-${endLine} of ${lines.length} total lines. Use offset parameter to view more.]\n`;
-      }
-      llmContent += formattedLines.join('\n');
-
-      // Here, returnDisplay could potentially be enhanced, but for now,
-      // it's kept empty as the LLM content itself is descriptive.
+    if (result.error) {
      return {
-        llmContent,
-        returnDisplay: '',
-      };
-    } catch (error) {
-      const errorMsg = `Error reading file: ${error instanceof Error ? error.message : String(error)}`;
-
-      return {
-        llmContent: `Error reading file ${filePath}: ${errorMsg}`,
-        returnDisplay: `Failed to read file: ${errorMsg}`,
+        llmContent: result.error, // The detailed error for LLM
+        returnDisplay: result.returnDisplay, // User-friendly error
      };
    }
+
+    return {
+      llmContent: result.llmContent,
+      returnDisplay: result.returnDisplay,
+    };
  }
 }
@@ -115,6 +115,33 @@ describe('ReadManyFilesTool', () => {
      };
      expect(tool.validateParams(params)).toBeNull();
    });
+
+    it('should return error if paths array contains an empty string', () => {
+      const params = { paths: ['file1.txt', ''] };
+      expect(tool.validateParams(params)).toBe(
+        'Each item in "paths" must be a non-empty string/glob pattern.',
+      );
+    });
+
+    it('should return error if include array contains non-string elements', () => {
+      const params = {
+        paths: ['file1.txt'],
+        include: ['*.ts', 123] as string[],
+      };
+      expect(tool.validateParams(params)).toBe(
+        'If provided, "include" must be an array of strings/glob patterns.',
+      );
+    });
+
+    it('should return error if exclude array contains non-string elements', () => {
+      const params = {
+        paths: ['file1.txt'],
+        exclude: ['*.log', {}] as string[],
+      };
+      expect(tool.validateParams(params)).toBe(
+        'If provided, "exclude" must be an array of strings/glob patterns.',
+      );
+    });
  });

  describe('execute', () => {
@@ -7,13 +7,16 @@
 import { BaseTool, ToolResult } from './tools.js';
 import { SchemaValidator } from '../utils/schemaValidator.js';
 import { getErrorMessage } from '../utils/errors.js';
-import * as fs from 'fs/promises';
 import * as path from 'path';
 import fg from 'fast-glob';
 import { GEMINI_MD_FILENAME } from './memoryTool.js';
-
+import {
+  detectFileType,
+  processSingleFileContent,
+  DEFAULT_ENCODING,
+} from '../utils/fileUtils.js';
 import { PartListUnion } from '@google/genai';
-import mime from 'mime-types';
+
 /**
 * Parameters for the ReadManyFilesTool.
 */
@@ -98,8 +101,6 @@ const DEFAULT_EXCLUDES: string[] = [
  `**/${GEMINI_MD_FILENAME}`,
 ];

-// Default values for encoding and separator format
-const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
 const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---';

 /**
@@ -256,11 +257,10 @@ Use this tool when the user's query implies needing the content of several files
    } = params;

    const toolBaseDir = this.targetDir;
-
    const filesToConsider = new Set<string>();
    const skippedFiles: Array<{ path: string; reason: string }> = [];
    const processedFilesRelativePaths: string[] = [];
-    const content: PartListUnion = [];
+    const contentParts: PartListUnion = [];

    const effectiveExcludes = useDefaultExcludes
      ? [...DEFAULT_EXCLUDES, ...exclude]
@@ -315,69 +315,50 @@ Use this tool when the user's query implies needing the content of several files
      const relativePathForDisplay = path
        .relative(toolBaseDir, filePath)
        .replace(/\\/g, '/');
-      try {
-        const mimeType = mime.lookup(filePath);
-        if (
-          mimeType &&
-          (mimeType.startsWith('image/') || mimeType === 'application/pdf')
-        ) {
-          const fileExtension = path.extname(filePath);
-          const fileNameWithoutExtension = path.basename(
-            filePath,
-            fileExtension,
-          );
-          const requestedExplicitly = inputPatterns.some(
-            (pattern: string) =>
-              pattern.toLowerCase().includes(fileExtension) ||
-              pattern.includes(fileNameWithoutExtension),
-          );

-          if (!requestedExplicitly) {
-            skippedFiles.push({
-              path: relativePathForDisplay,
-              reason:
-                'asset file (image/pdf) was not explicitly requested by name or extension',
-            });
-            continue;
-          }
-          const contentBuffer = await fs.readFile(filePath);
-          const base64Data = contentBuffer.toString('base64');
-          content.push({
-            inlineData: {
-              data: base64Data,
-              mimeType,
-            },
+      const fileType = detectFileType(filePath);
+
+      if (fileType === 'image' || fileType === 'pdf') {
+        const fileExtension = path.extname(filePath).toLowerCase();
+        const fileNameWithoutExtension = path.basename(filePath, fileExtension);
+        const requestedExplicitly = inputPatterns.some(
+          (pattern: string) =>
+            pattern.toLowerCase().includes(fileExtension) ||
+            pattern.includes(fileNameWithoutExtension),
+        );
+
+        if (!requestedExplicitly) {
+          skippedFiles.push({
+            path: relativePathForDisplay,
+            reason:
+              'asset file (image/pdf) was not explicitly requested by name or extension',
          });
-          processedFilesRelativePaths.push(relativePathForDisplay);
-        } else {
-          const contentBuffer = await fs.readFile(filePath);
-          // Basic binary detection: check for null bytes in the first 1KB
-          const sample = contentBuffer.subarray(
-            0,
-            Math.min(contentBuffer.length, 1024),
-          );
-          if (sample.includes(0)) {
-            skippedFiles.push({
-              path: relativePathForDisplay,
-              reason: 'appears to be binary',
-            });
-            continue;
-          }
-          // Using default encoding
-          const fileContent = contentBuffer.toString(DEFAULT_ENCODING);
-          // Using default separator format
+          continue;
+        }
+      }
+
+      // Use processSingleFileContent for all file types now
+      const fileReadResult = await processSingleFileContent(
+        filePath,
+        toolBaseDir,
+      );
+
+      if (fileReadResult.error) {
+        skippedFiles.push({
+          path: relativePathForDisplay,
+          reason: `Read error: ${fileReadResult.error}`,
+        });
+      } else {
+        if (typeof fileReadResult.llmContent === 'string') {
          const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
            '{filePath}',
            relativePathForDisplay,
          );
-          content.push(`${separator}\n\n${fileContent}\n\n`);
-          processedFilesRelativePaths.push(relativePathForDisplay);
+          contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
+        } else {
+          contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
        }
-      } catch (error) {
-        skippedFiles.push({
-          path: relativePathForDisplay,
-          reason: `Read error: ${getErrorMessage(error)}`,
-        });
+        processedFilesRelativePaths.push(relativePathForDisplay);
      }
    }

@@ -422,13 +403,13 @@ Use this tool when the user's query implies needing the content of several files
      displayMessage += `No files were read and concatenated based on the criteria.\n`;
    }

-    if (content.length === 0) {
-      content.push(
+    if (contentParts.length === 0) {
+      contentParts.push(
        'No files matching the criteria were found or all were skipped.',
      );
    }
    return {
-      llmContent: content,
+      llmContent: contentParts,
      returnDisplay: displayMessage.trim(),
    };
  }