2025-04-23 17:25:47 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { BaseTool , ToolResult } from './tools.js' ;
import { SchemaValidator } from '../utils/schemaValidator.js' ;
import { getErrorMessage } from '../utils/errors.js' ;
import * as fs from 'fs/promises' ;
import * as path from 'path' ;
import fg from 'fast-glob' ;
/**
* Parameters for the ReadManyFilesTool.
*/
export interface ReadManyFilesParams {
/**
* An array of file paths or directory paths to search within.
* Paths are relative to the tool's configured target directory.
* Glob patterns can be used directly in these paths.
*/
paths : string [ ] ;
/**
* Optional. Glob patterns for files to include.
* These are effectively combined with the `paths`.
* Example: ["*.ts", "src/** /*.md"]
*/
include? : string [ ] ;
/**
* Optional. Glob patterns for files/directories to exclude.
* Applied as ignore patterns.
* Example: ["*.log", "dist/**"]
*/
exclude? : string [ ] ;
/**
* Optional. Search directories recursively.
* This is generally controlled by glob patterns (e.g., `**`).
* The glob implementation is recursive by default for `**`.
* For simplicity, we'll rely on `**` for recursion.
*/
recursive? : boolean ;
/**
* Optional. Apply default exclusion patterns. Defaults to true.
*/
useDefaultExcludes? : boolean ;
}
/**
* Default exclusion patterns for commonly ignored directories and binary file types.
* These are compatible with glob ignore patterns.
* TODO(adh): Consider making this configurable or extendable through a command line arguement.
* TODO(adh): Look into sharing this list with the glob tool.
*/
const DEFAULT_EXCLUDES : string [ ] = [
'**/node_modules/**' ,
'**/.git/**' ,
'**/.vscode/**' ,
'**/.idea/**' ,
'**/dist/**' ,
'**/build/**' ,
'**/coverage/**' ,
'**/__pycache__/**' ,
'**/*.pyc' ,
'**/*.pyo' ,
'**/*.bin' ,
'**/*.exe' ,
'**/*.dll' ,
'**/*.so' ,
'**/*.dylib' ,
'**/*.class' ,
'**/*.jar' ,
'**/*.war' ,
'**/*.zip' ,
'**/*.tar' ,
'**/*.gz' ,
'**/*.bz2' ,
'**/*.rar' ,
'**/*.7z' ,
'**/*.png' ,
'**/*.jpg' ,
'**/*.jpeg' ,
'**/*.gif' ,
'**/*.bmp' ,
'**/*.tiff' ,
'**/*.ico' ,
'**/*.pdf' ,
'**/*.doc' ,
'**/*.docx' ,
'**/*.xls' ,
'**/*.xlsx' ,
'**/*.ppt' ,
'**/*.pptx' ,
'**/*.odt' ,
'**/*.ods' ,
'**/*.odp' ,
'**/*.DS_Store' ,
'**/.env' ,
2025-05-14 12:37:17 -07:00
'**/GEMINI.md' ,
2025-04-23 17:25:47 -07:00
] ;
// Default values for encoding and separator format
const DEFAULT_ENCODING : BufferEncoding = 'utf-8' ;
2025-04-24 15:42:18 -07:00
const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---' ;
2025-04-23 17:25:47 -07:00
/**
* Tool implementation for finding and reading multiple text files from the local filesystem
* within a specified target directory. The content is concatenated.
* It is intended to run in an environment with access to the local file system (e.g., a Node.js backend).
*/
export class ReadManyFilesTool extends BaseTool <
ReadManyFilesParams ,
ToolResult
> {
2025-04-24 12:15:49 -07:00
static readonly Name : string = 'read_many_files' ;
2025-04-23 17:25:47 -07:00
/**
* Creates an instance of ReadManyFilesTool.
* @param targetDir The absolute root directory within which this tool is allowed to operate.
* All paths provided in `params` will be resolved relative to this directory.
*/
2025-05-02 09:31:18 -07:00
constructor ( readonly targetDir : string ) {
2025-04-23 17:25:47 -07:00
const parameterSchema : Record < string , unknown > = {
type : 'object' ,
properties : {
paths : {
type : 'array' ,
items : { type : 'string' } ,
description :
"Required. An array of glob patterns or paths relative to the tool's target directory. Examples: ['src/**/*.ts'], ['README.md', 'docs/']" ,
} ,
include : {
type : 'array' ,
items : { type : 'string' } ,
description :
'Optional. Additional glob patterns to include. These are merged with `paths`. Example: ["*.test.ts"] to specifically add test files if they were broadly excluded.' ,
default : [ ] ,
} ,
exclude : {
type : 'array' ,
items : { type : 'string' } ,
description :
'Optional. Glob patterns for files/directories to exclude. Added to default excludes if useDefaultExcludes is true. Example: ["**/*.log", "temp/"]' ,
default : [ ] ,
} ,
recursive : {
type : 'boolean' ,
description :
'Optional. Whether to search recursively (primarily controlled by `**` in glob patterns). Defaults to true.' ,
default : true ,
} ,
useDefaultExcludes : {
type : 'boolean' ,
description :
'Optional. Whether to apply a list of default exclusion patterns (e.g., node_modules, .git, binary files). Defaults to true.' ,
default : true ,
} ,
} ,
required : [ 'paths' ] ,
} ;
super (
ReadManyFilesTool . Name ,
2025-04-24 12:15:49 -07:00
'ReadManyFiles' ,
2025-04-23 17:25:47 -07:00
` Reads content from multiple text files specified by paths or glob patterns within a configured target directory and concatenates them into a single string.
This tool is useful when you need to understand or analyze a collection of files, such as:
- Getting an overview of a codebase or parts of it (e.g., all TypeScript files in the 'src' directory).
- Finding where specific functionality is implemented if the user asks broad questions about code.
- Reviewing documentation files (e.g., all Markdown files in the 'docs' directory).
- Gathering context from multiple configuration files.
- When the user asks to "read all files in X directory" or "show me the content of all Y files".
Use this tool when the user's query implies needing the content of several files simultaneously for context, analysis, or summarization.
It uses default UTF-8 encoding and a '--- {filePath} ---' separator between file contents.
Ensure paths are relative to the target directory. Glob patterns like 'src/**/*.js' are supported.
Avoid using for single files if a more specific single-file reading tool is available, unless the user specifically requests to process a list containing just one file via this tool.
This tool should NOT be used for binary files; it attempts to skip them.
Default excludes apply to common non-text files and large dependency directories unless 'useDefaultExcludes' is false. ` ,
parameterSchema ,
) ;
this . targetDir = path . resolve ( targetDir ) ;
}
validateParams ( params : ReadManyFilesParams ) : string | null {
if (
this . schema . parameters &&
! SchemaValidator . validate (
this . schema . parameters as Record < string , unknown > ,
params ,
)
) {
if (
! params . paths ||
! Array . isArray ( params . paths ) ||
params . paths . length === 0
) {
return 'The "paths" parameter is required and must be a non-empty array of strings/glob patterns.' ;
}
return 'Parameters failed schema validation. Ensure "paths" is a non-empty array and other parameters match their expected types.' ;
}
for ( const p of params . paths ) {
if ( typeof p !== 'string' || p . trim ( ) === '' ) {
return 'Each item in "paths" must be a non-empty string/glob pattern.' ;
}
}
if (
params . include &&
( ! Array . isArray ( params . include ) ||
! params . include . every ( ( item ) = > typeof item === 'string' ) )
) {
return 'If provided, "include" must be an array of strings/glob patterns.' ;
}
if (
params . exclude &&
( ! Array . isArray ( params . exclude ) ||
! params . exclude . every ( ( item ) = > typeof item === 'string' ) )
) {
return 'If provided, "exclude" must be an array of strings/glob patterns.' ;
}
return null ;
}
getDescription ( params : ReadManyFilesParams ) : string {
const allPatterns = [ . . . params . paths , . . . ( params . include || [ ] ) ] ;
const pathDesc = ` using patterns: \` ${ allPatterns . join ( '`, `' ) } \` (within target directory: \` ${ this . targetDir } \` ) ` ;
let effectiveExcludes =
params . useDefaultExcludes !== false ? [ . . . DEFAULT_EXCLUDES ] : [ ] ;
if ( params . exclude && params . exclude . length > 0 ) {
effectiveExcludes = [ . . . effectiveExcludes , . . . params . exclude ] ;
}
const excludeDesc = ` Excluding: ${ effectiveExcludes . length > 0 ? ` patterns like \` ${ effectiveExcludes . slice ( 0 , 2 ) . join ( '`, `' ) } ${ effectiveExcludes . length > 2 ? '...`' : '`' } ` : 'none explicitly (beyond default non-text file avoidance).' } ` ;
return ` Will attempt to read and concatenate files ${ pathDesc } . ${ excludeDesc } . File encoding: ${ DEFAULT_ENCODING } . Separator: " ${ DEFAULT_OUTPUT_SEPARATOR_FORMAT . replace ( '{filePath}' , 'path/to/file.ext' ) } ". ` ;
}
2025-05-09 23:29:02 -07:00
async execute (
params : ReadManyFilesParams ,
_signal : AbortSignal ,
) : Promise < ToolResult > {
2025-04-23 17:25:47 -07:00
const validationError = this . validateParams ( params ) ;
if ( validationError ) {
return {
llmContent : ` Error: Invalid parameters for ${ this . displayName } . Reason: ${ validationError } ` ,
returnDisplay : ` ## Parameter Error \ n \ n ${ validationError } ` ,
} ;
}
const {
paths : inputPatterns ,
include = [ ] ,
exclude = [ ] ,
useDefaultExcludes = true ,
} = params ;
const toolBaseDir = this . targetDir ;
const filesToConsider = new Set < string > ( ) ;
2025-04-24 15:42:18 -07:00
const skippedFiles : Array < { path : string ; reason : string } > = [ ] ;
2025-04-23 17:25:47 -07:00
const processedFilesRelativePaths : string [ ] = [ ] ;
let concatenatedContent = '' ;
const effectiveExcludes = useDefaultExcludes
? [ . . . DEFAULT_EXCLUDES , . . . exclude ]
: [ . . . exclude ] ;
const searchPatterns = [ . . . inputPatterns , . . . include ] ;
if ( searchPatterns . length === 0 ) {
return {
llmContent : 'No search paths or include patterns provided.' ,
returnDisplay : ` ## Information \ n \ nNo search paths or include patterns were specified. Nothing to read or concatenate. ` ,
} ;
}
try {
// Using fast-glob (fg) for file searching based on patterns.
// The `cwd` option scopes the search to the toolBaseDir.
// `ignore` handles exclusions.
// `onlyFiles` ensures only files are returned.
// `dot` allows matching dotfiles (which can still be excluded by patterns).
// `absolute` returns absolute paths for consistent handling.
const entries = await fg ( searchPatterns , {
cwd : toolBaseDir ,
ignore : effectiveExcludes ,
onlyFiles : true ,
dot : true ,
absolute : true ,
caseSensitiveMatch : false ,
} ) ;
for ( const absoluteFilePath of entries ) {
// Security check: ensure the glob library didn't return something outside targetDir.
// This should be guaranteed by `cwd` and the library's sandboxing, but an extra check is good practice.
if ( ! absoluteFilePath . startsWith ( toolBaseDir ) ) {
skippedFiles . push ( {
path : absoluteFilePath ,
reason : ` Security: Glob library returned path outside target directory. Base: ${ toolBaseDir } , Path: ${ absoluteFilePath } ` ,
} ) ;
continue ;
}
filesToConsider . add ( absoluteFilePath ) ;
}
} catch ( error ) {
return {
llmContent : ` Error during file search: ${ getErrorMessage ( error ) } ` ,
returnDisplay : ` ## File Search Error \ n \ nAn error occurred while searching for files: \ n \` \` \` \ n ${ getErrorMessage ( error ) } \ n \` \` \` ` ,
} ;
}
const sortedFiles = Array . from ( filesToConsider ) . sort ( ) ;
for ( const filePath of sortedFiles ) {
const relativePathForDisplay = path
. relative ( toolBaseDir , filePath )
. replace ( /\\/g , '/' ) ;
try {
const contentBuffer = await fs . readFile ( filePath ) ;
// Basic binary detection: check for null bytes in the first 1KB
const sample = contentBuffer . subarray (
0 ,
Math . min ( contentBuffer . length , 1024 ) ,
) ;
if ( sample . includes ( 0 ) ) {
skippedFiles . push ( {
path : relativePathForDisplay ,
reason : 'Skipped (appears to be binary)' ,
} ) ;
continue ;
}
// Using default encoding
const fileContent = contentBuffer . toString ( DEFAULT_ENCODING ) ;
// Using default separator format
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT . replace (
'{filePath}' ,
relativePathForDisplay ,
) ;
concatenatedContent += ` ${ separator } \ n \ n ${ fileContent } \ n \ n ` ;
processedFilesRelativePaths . push ( relativePathForDisplay ) ;
} catch ( error ) {
skippedFiles . push ( {
path : relativePathForDisplay ,
reason : ` Read error: ${ getErrorMessage ( error ) } ` ,
} ) ;
}
}
2025-04-24 12:15:49 -07:00
let displayMessage = ` ### ReadManyFiles Result (Target Dir: \` ${ this . targetDir } \` ) \ n \ n ` ;
2025-04-23 17:25:47 -07:00
if ( processedFilesRelativePaths . length > 0 ) {
displayMessage += ` Successfully read and concatenated content from ** ${ processedFilesRelativePaths . length } file(s)**. \ n ` ;
2025-05-14 23:39:54 +00:00
displayMessage += ` \ n**Processed Files:** \ n ` ;
2025-04-23 17:25:47 -07:00
processedFilesRelativePaths
. slice ( 0 , 10 )
. forEach ( ( p ) = > ( displayMessage += ` - \` ${ p } \` \ n ` ) ) ;
if ( processedFilesRelativePaths . length > 10 ) {
displayMessage += ` - ...and ${ processedFilesRelativePaths . length - 10 } more. \ n ` ;
}
} else {
displayMessage += ` No files were read and concatenated based on the criteria. \ n ` ;
}
if ( skippedFiles . length > 0 ) {
displayMessage += ` \ n**Skipped ${ skippedFiles . length } item(s) (up to 5 shown):** \ n ` ;
skippedFiles
. slice ( 0 , 5 )
. forEach (
( f ) = > ( displayMessage += ` - \` ${ f . path } \` (Reason: ${ f . reason } ) \ n ` ) ,
) ;
if ( skippedFiles . length > 5 ) {
displayMessage += ` - ...and ${ skippedFiles . length - 5 } more. \ n ` ;
}
}
if (
concatenatedContent . length === 0 &&
processedFilesRelativePaths . length === 0
) {
concatenatedContent =
'No files matching the criteria were found or all were skipped.' ;
}
return {
llmContent : concatenatedContent ,
returnDisplay : displayMessage ,
} ;
}
}