Refactor read-file and support images. (#480)

This commit is contained in:
Jacob Richman
2025-05-29 22:30:18 +00:00
committed by GitHub
parent f21abdd1f0
commit dab7517622
13 changed files with 1475 additions and 260 deletions
+228
View File
@@ -0,0 +1,228 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, afterEach, Mock } from 'vitest';
import { ReadFileTool, ReadFileToolParams } from './read-file.js';
import * as fileUtils from '../utils/fileUtils.js';
import path from 'path';
import os from 'os';
import fs from 'fs'; // For actual fs operations in setup
// Mock fileUtils.processSingleFileContent
vi.mock('../utils/fileUtils', async () => {
const actualFileUtils =
await vi.importActual<typeof fileUtils>('../utils/fileUtils');
return {
...actualFileUtils, // Spread actual implementations
processSingleFileContent: vi.fn(), // Mock specific function
};
});
const mockProcessSingleFileContent = fileUtils.processSingleFileContent as Mock;
describe('ReadFileTool', () => {
let tempRootDir: string;
let tool: ReadFileTool;
const abortSignal = new AbortController().signal;
beforeEach(() => {
// Create a unique temporary root directory for each test run
tempRootDir = fs.mkdtempSync(
path.join(os.tmpdir(), 'read-file-tool-root-'),
);
tool = new ReadFileTool(tempRootDir);
mockProcessSingleFileContent.mockReset();
});
afterEach(() => {
// Clean up the temporary root directory
if (fs.existsSync(tempRootDir)) {
fs.rmSync(tempRootDir, { recursive: true, force: true });
}
});
describe('validateToolParams', () => {
it('should return null for valid params (absolute path within root)', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
};
expect(tool.validateToolParams(params)).toBeNull();
});
it('should return null for valid params with offset and limit', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: 10,
};
expect(tool.validateToolParams(params)).toBeNull();
});
it('should return error for relative path', () => {
const params: ReadFileToolParams = { path: 'test.txt' };
expect(tool.validateToolParams(params)).toMatch(
/File path must be absolute/,
);
});
it('should return error for path outside root', () => {
const outsidePath = path.resolve(os.tmpdir(), 'outside-root.txt');
const params: ReadFileToolParams = { path: outsidePath };
expect(tool.validateToolParams(params)).toMatch(
/File path must be within the root directory/,
);
});
it('should return error for negative offset', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: -1,
limit: 10,
};
expect(tool.validateToolParams(params)).toBe(
'Offset must be a non-negative number',
);
});
it('should return error for non-positive limit', () => {
const paramsZero: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: 0,
};
expect(tool.validateToolParams(paramsZero)).toBe(
'Limit must be a positive number',
);
const paramsNegative: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: -5,
};
expect(tool.validateToolParams(paramsNegative)).toBe(
'Limit must be a positive number',
);
});
it('should return error for schema validation failure (e.g. missing path)', () => {
const params = { offset: 0 } as unknown as ReadFileToolParams;
expect(tool.validateToolParams(params)).toBe(
'Parameters failed schema validation.',
);
});
});
describe('getDescription', () => {
it('should return a shortened, relative path', () => {
const filePath = path.join(tempRootDir, 'sub', 'dir', 'file.txt');
const params: ReadFileToolParams = { path: filePath };
// Assuming tempRootDir is something like /tmp/read-file-tool-root-XXXXXX
// The relative path would be sub/dir/file.txt
expect(tool.getDescription(params)).toBe('sub/dir/file.txt');
});
it('should return . if path is the root directory', () => {
const params: ReadFileToolParams = { path: tempRootDir };
expect(tool.getDescription(params)).toBe('.');
});
});
describe('execute', () => {
it('should return validation error if params are invalid', async () => {
const params: ReadFileToolParams = { path: 'relative/path.txt' };
const result = await tool.execute(params, abortSignal);
expect(result.llmContent).toMatch(/Error: Invalid parameters provided/);
expect(result.returnDisplay).toMatch(/File path must be absolute/);
});
it('should return error from processSingleFileContent if it fails', async () => {
const filePath = path.join(tempRootDir, 'error.txt');
const params: ReadFileToolParams = { path: filePath };
const errorMessage = 'Simulated read error';
mockProcessSingleFileContent.mockResolvedValue({
llmContent: `Error reading file ${filePath}: ${errorMessage}`,
returnDisplay: `Error reading file ${filePath}: ${errorMessage}`,
error: errorMessage,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toContain(errorMessage);
expect(result.returnDisplay).toContain(errorMessage);
});
it('should return success result for a text file', async () => {
const filePath = path.join(tempRootDir, 'textfile.txt');
const fileContent = 'This is a test file.';
const params: ReadFileToolParams = { path: filePath };
mockProcessSingleFileContent.mockResolvedValue({
llmContent: fileContent,
returnDisplay: `Read text file: ${path.basename(filePath)}`,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toBe(fileContent);
expect(result.returnDisplay).toBe(
`Read text file: ${path.basename(filePath)}`,
);
});
it('should return success result for an image file', async () => {
const filePath = path.join(tempRootDir, 'image.png');
const imageData = {
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const params: ReadFileToolParams = { path: filePath };
mockProcessSingleFileContent.mockResolvedValue({
llmContent: imageData,
returnDisplay: `Read image file: ${path.basename(filePath)}`,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toEqual(imageData);
expect(result.returnDisplay).toBe(
`Read image file: ${path.basename(filePath)}`,
);
});
it('should pass offset and limit to processSingleFileContent', async () => {
const filePath = path.join(tempRootDir, 'paginated.txt');
const params: ReadFileToolParams = {
path: filePath,
offset: 10,
limit: 5,
};
mockProcessSingleFileContent.mockResolvedValue({
llmContent: 'some lines',
returnDisplay: 'Read text file (paginated)',
});
await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
10,
5,
);
});
});
});
+19 -167
View File
@@ -4,11 +4,11 @@
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'fs';
import path from 'path';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { BaseTool, ToolResult } from './tools.js';
import { isWithinRoot, processSingleFileContent } from '../utils/fileUtils.js';
/**
* Parameters for the ReadFile tool
@@ -35,14 +35,12 @@ export interface ReadFileToolParams {
*/
export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
static readonly Name: string = 'read_file';
private static readonly DEFAULT_MAX_LINES = 2000;
private static readonly MAX_LINE_LENGTH = 2000;
constructor(private rootDirectory: string) {
super(
ReadFileTool.Name,
'ReadFile',
'Reads and returns the content of a specified file from the local filesystem. Handles large files by allowing reading specific line ranges.',
'Reads and returns the content of a specified file from the local filesystem. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.',
{
properties: {
path: {
@@ -52,12 +50,12 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
},
offset: {
description:
"Optional: The 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
type: 'number',
},
limit: {
description:
"Optional: Maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible).",
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
type: 'number',
},
},
@@ -68,28 +66,6 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
this.rootDirectory = path.resolve(rootDirectory);
}
/**
* Checks if a path is within the root directory
* @param pathToCheck The path to check
* @returns True if the path is within the root directory, false otherwise
*/
private isWithinRoot(pathToCheck: string): boolean {
const normalizedPath = path.normalize(pathToCheck);
const normalizedRoot = path.normalize(this.rootDirectory);
const rootWithSep = normalizedRoot.endsWith(path.sep)
? normalizedRoot
: normalizedRoot + path.sep;
return (
normalizedPath === normalizedRoot ||
normalizedPath.startsWith(rootWithSep)
);
}
/**
* Validates the parameters for the ReadFile tool
* @param params Parameters to validate
* @returns True if parameters are valid, false otherwise
*/
validateToolParams(params: ReadFileToolParams): string | null {
if (
this.schema.parameters &&
@@ -104,7 +80,7 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
if (!path.isAbsolute(filePath)) {
return `File path must be absolute: ${filePath}`;
}
if (!this.isWithinRoot(filePath)) {
if (!isWithinRoot(filePath, this.rootDirectory)) {
return `File path must be within the root directory (${this.rootDirectory}): ${filePath}`;
}
if (params.offset !== undefined && params.offset < 0) {
@@ -116,83 +92,11 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
return null;
}
/**
* Determines if a file is likely binary based on content sampling
* @param filePath Path to the file
* @returns True if the file appears to be binary
*/
private isBinaryFile(filePath: string): boolean {
try {
// Read the first 4KB of the file
const fd = fs.openSync(filePath, 'r');
const buffer = Buffer.alloc(4096);
const bytesRead = fs.readSync(fd, buffer, 0, 4096, 0);
fs.closeSync(fd);
// Check for null bytes or high concentration of non-printable characters
let nonPrintableCount = 0;
for (let i = 0; i < bytesRead; i++) {
// Null byte is a strong indicator of binary data
if (buffer[i] === 0) {
return true;
}
// Count non-printable characters
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
nonPrintableCount++;
}
}
// If more than 30% are non-printable, likely binary
return nonPrintableCount / bytesRead > 0.3;
} catch {
return false;
}
}
/**
* Detects the type of file based on extension and content
* @param filePath Path to the file
* @returns File type description
*/
private detectFileType(filePath: string): string {
const ext = path.extname(filePath).toLowerCase();
// Common image formats
if (
['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'].includes(ext)
) {
return 'image';
}
// Other known binary formats
if (['.pdf', '.zip', '.tar', '.gz', '.exe', '.dll', '.so'].includes(ext)) {
return 'binary';
}
// Check content for binary indicators
if (this.isBinaryFile(filePath)) {
return 'binary';
}
return 'text';
}
/**
* Gets a description of the file reading operation
* @param params Parameters for the file reading
* @returns A string describing the file being read
*/
getDescription(params: ReadFileToolParams): string {
const relativePath = makeRelative(params.path, this.rootDirectory);
return shortenPath(relativePath);
}
/**
* Reads a file and returns its contents with line numbers
* @param params Parameters for the file reading
* @returns Result with file contents
*/
async execute(
params: ReadFileToolParams,
_signal: AbortSignal,
@@ -205,75 +109,23 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
};
}
const filePath = params.path;
try {
if (!fs.existsSync(filePath)) {
return {
llmContent: `File not found: ${filePath}`,
returnDisplay: `File not found.`,
};
}
const result = await processSingleFileContent(
params.path,
this.rootDirectory,
params.offset,
params.limit,
);
const stats = fs.statSync(filePath);
if (stats.isDirectory()) {
return {
llmContent: `Path is a directory, not a file: ${filePath}`,
returnDisplay: `File is directory.`,
};
}
const fileType = this.detectFileType(filePath);
if (fileType !== 'text') {
return {
llmContent: `Binary file: ${filePath} (${fileType})`,
// For binary files, maybe returnDisplay should be empty or indicate binary?
// Keeping it empty for now.
returnDisplay: ``,
};
}
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
const startLine = params.offset || 0;
const endLine = params.limit
? startLine + params.limit
: Math.min(startLine + ReadFileTool.DEFAULT_MAX_LINES, lines.length);
const selectedLines = lines.slice(startLine, endLine);
let truncated = false;
const formattedLines = selectedLines.map((line) => {
let processedLine = line;
if (line.length > ReadFileTool.MAX_LINE_LENGTH) {
processedLine =
line.substring(0, ReadFileTool.MAX_LINE_LENGTH) + '... [truncated]';
truncated = true;
}
return processedLine;
});
const contentTruncated = endLine < lines.length || truncated;
let llmContent = '';
if (contentTruncated) {
llmContent += `[File truncated: showing lines ${startLine + 1}-${endLine} of ${lines.length} total lines. Use offset parameter to view more.]\n`;
}
llmContent += formattedLines.join('\n');
// Here, returnDisplay could potentially be enhanced, but for now,
// it's kept empty as the LLM content itself is descriptive.
if (result.error) {
return {
llmContent,
returnDisplay: '',
};
} catch (error) {
const errorMsg = `Error reading file: ${error instanceof Error ? error.message : String(error)}`;
return {
llmContent: `Error reading file ${filePath}: ${errorMsg}`,
returnDisplay: `Failed to read file: ${errorMsg}`,
llmContent: result.error, // The detailed error for LLM
returnDisplay: result.returnDisplay, // User-friendly error
};
}
return {
llmContent: result.llmContent,
returnDisplay: result.returnDisplay,
};
}
}
@@ -115,6 +115,33 @@ describe('ReadManyFilesTool', () => {
};
expect(tool.validateParams(params)).toBeNull();
});
it('should return error if paths array contains an empty string', () => {
const params = { paths: ['file1.txt', ''] };
expect(tool.validateParams(params)).toBe(
'Each item in "paths" must be a non-empty string/glob pattern.',
);
});
it('should return error if include array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
include: ['*.ts', 123] as string[],
};
expect(tool.validateParams(params)).toBe(
'If provided, "include" must be an array of strings/glob patterns.',
);
});
it('should return error if exclude array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
exclude: ['*.log', {}] as string[],
};
expect(tool.validateParams(params)).toBe(
'If provided, "exclude" must be an array of strings/glob patterns.',
);
});
});
describe('execute', () => {
+47 -66
View File
@@ -7,13 +7,16 @@
import { BaseTool, ToolResult } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { getErrorMessage } from '../utils/errors.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import fg from 'fast-glob';
import { GEMINI_MD_FILENAME } from './memoryTool.js';
import {
detectFileType,
processSingleFileContent,
DEFAULT_ENCODING,
} from '../utils/fileUtils.js';
import { PartListUnion } from '@google/genai';
import mime from 'mime-types';
/**
* Parameters for the ReadManyFilesTool.
*/
@@ -98,8 +101,6 @@ const DEFAULT_EXCLUDES: string[] = [
`**/${GEMINI_MD_FILENAME}`,
];
// Default values for encoding and separator format
const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---';
/**
@@ -256,11 +257,10 @@ Use this tool when the user's query implies needing the content of several files
} = params;
const toolBaseDir = this.targetDir;
const filesToConsider = new Set<string>();
const skippedFiles: Array<{ path: string; reason: string }> = [];
const processedFilesRelativePaths: string[] = [];
const content: PartListUnion = [];
const contentParts: PartListUnion = [];
const effectiveExcludes = useDefaultExcludes
? [...DEFAULT_EXCLUDES, ...exclude]
@@ -315,69 +315,50 @@ Use this tool when the user's query implies needing the content of several files
const relativePathForDisplay = path
.relative(toolBaseDir, filePath)
.replace(/\\/g, '/');
try {
const mimeType = mime.lookup(filePath);
if (
mimeType &&
(mimeType.startsWith('image/') || mimeType === 'application/pdf')
) {
const fileExtension = path.extname(filePath);
const fileNameWithoutExtension = path.basename(
filePath,
fileExtension,
);
const requestedExplicitly = inputPatterns.some(
(pattern: string) =>
pattern.toLowerCase().includes(fileExtension) ||
pattern.includes(fileNameWithoutExtension),
);
if (!requestedExplicitly) {
skippedFiles.push({
path: relativePathForDisplay,
reason:
'asset file (image/pdf) was not explicitly requested by name or extension',
});
continue;
}
const contentBuffer = await fs.readFile(filePath);
const base64Data = contentBuffer.toString('base64');
content.push({
inlineData: {
data: base64Data,
mimeType,
},
const fileType = detectFileType(filePath);
if (fileType === 'image' || fileType === 'pdf') {
const fileExtension = path.extname(filePath).toLowerCase();
const fileNameWithoutExtension = path.basename(filePath, fileExtension);
const requestedExplicitly = inputPatterns.some(
(pattern: string) =>
pattern.toLowerCase().includes(fileExtension) ||
pattern.includes(fileNameWithoutExtension),
);
if (!requestedExplicitly) {
skippedFiles.push({
path: relativePathForDisplay,
reason:
'asset file (image/pdf) was not explicitly requested by name or extension',
});
processedFilesRelativePaths.push(relativePathForDisplay);
} else {
const contentBuffer = await fs.readFile(filePath);
// Basic binary detection: check for null bytes in the first 1KB
const sample = contentBuffer.subarray(
0,
Math.min(contentBuffer.length, 1024),
);
if (sample.includes(0)) {
skippedFiles.push({
path: relativePathForDisplay,
reason: 'appears to be binary',
});
continue;
}
// Using default encoding
const fileContent = contentBuffer.toString(DEFAULT_ENCODING);
// Using default separator format
continue;
}
}
// Use processSingleFileContent for all file types now
const fileReadResult = await processSingleFileContent(
filePath,
toolBaseDir,
);
if (fileReadResult.error) {
skippedFiles.push({
path: relativePathForDisplay,
reason: `Read error: ${fileReadResult.error}`,
});
} else {
if (typeof fileReadResult.llmContent === 'string') {
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
'{filePath}',
relativePathForDisplay,
);
content.push(`${separator}\n\n${fileContent}\n\n`);
processedFilesRelativePaths.push(relativePathForDisplay);
contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
} else {
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
}
} catch (error) {
skippedFiles.push({
path: relativePathForDisplay,
reason: `Read error: ${getErrorMessage(error)}`,
});
processedFilesRelativePaths.push(relativePathForDisplay);
}
}
@@ -422,13 +403,13 @@ Use this tool when the user's query implies needing the content of several files
displayMessage += `No files were read and concatenated based on the criteria.\n`;
}
if (content.length === 0) {
content.push(
if (contentParts.length === 0) {
contentParts.push(
'No files matching the criteria were found or all were skipped.',
);
}
return {
llmContent: content,
llmContent: contentParts,
returnDisplay: displayMessage.trim(),
};
}