2025-05-29 22:30:18 +00:00
/ * *
* @license
* Copyright 2025 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
2025-07-21 08:16:42 +09:00
import fs from 'node:fs' ;
import path from 'node:path' ;
2025-05-29 22:30:18 +00:00
import { PartUnion } from '@google/genai' ;
import mime from 'mime-types' ;
// Constants for text file processing
const DEFAULT_MAX_LINES_TEXT_FILE = 2000 ;
const MAX_LINE_LENGTH_TEXT_FILE = 2000 ;
// Default values for encoding and separator format
export const DEFAULT_ENCODING : BufferEncoding = 'utf-8' ;
Add file operation telemetry (#1068)
Introduces telemetry for file create, read, and update operations.
This change adds the `gemini_cli.file.operation.count` metric, recorded by the `read-file`, `read-many-files`, and `write-file` tools.
The metric includes the following attributes:
- `operation` (string: `create`, `read`, `update`): The type of file operation.
- `lines` (optional, Int): Number of lines in the file.
- `mimetype` (optional, string): Mimetype of the file.
- `extension` (optional, string): File extension of the file.
Here is a stacked bar chart of file operations by extension (`js`, `ts`, `md`):

Here is a stacked bar chart of file operations by type (`create`, `read`, `update`):

#750
cc @allenhutchison as discussed
2025-06-15 16:24:53 -04:00
/ * *
* Looks up the specific MIME type for a file path .
* @param filePath Path to the file .
* @returns The specific MIME type string ( e . g . , 'text/python' , 'application/javascript' ) or undefined if not found or ambiguous .
* /
export function getSpecificMimeType ( filePath : string ) : string | undefined {
const lookedUpMime = mime . lookup ( filePath ) ;
return typeof lookedUpMime === 'string' ? lookedUpMime : undefined ;
}
2025-05-29 22:30:18 +00:00
/ * *
* Checks if a path is within a given root directory .
* @param pathToCheck The absolute path to check .
* @param rootDirectory The absolute root directory .
* @returns True if the path is within the root directory , false otherwise .
* /
export function isWithinRoot (
pathToCheck : string ,
rootDirectory : string ,
) : boolean {
2025-07-14 22:55:49 -07:00
const normalizedPathToCheck = path . resolve ( pathToCheck ) ;
const normalizedRootDirectory = path . resolve ( rootDirectory ) ;
2025-05-29 22:30:18 +00:00
// Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
// unless it's the root path itself (e.g., '/' or 'C:\').
const rootWithSeparator =
normalizedRootDirectory === path . sep ||
normalizedRootDirectory . endsWith ( path . sep )
? normalizedRootDirectory
: normalizedRootDirectory + path . sep ;
return (
normalizedPathToCheck === normalizedRootDirectory ||
normalizedPathToCheck . startsWith ( rootWithSeparator )
) ;
}
/ * *
* Determines if a file is likely binary based on content sampling .
* @param filePath Path to the file .
2025-07-21 08:16:42 +09:00
* @returns Promise that resolves to true if the file appears to be binary .
2025-05-29 22:30:18 +00:00
* /
2025-07-21 08:16:42 +09:00
export async function isBinaryFile ( filePath : string ) : Promise < boolean > {
let fileHandle : fs.promises.FileHandle | undefined ;
2025-05-29 22:30:18 +00:00
try {
2025-07-21 08:16:42 +09:00
fileHandle = await fs . promises . open ( filePath , 'r' ) ;
2025-05-29 22:30:18 +00:00
// Read up to 4KB or file size, whichever is smaller
2025-07-21 08:16:42 +09:00
const stats = await fileHandle . stat ( ) ;
const fileSize = stats . size ;
2025-05-29 22:30:18 +00:00
if ( fileSize === 0 ) {
// Empty file is not considered binary for content checking
return false ;
}
const bufferSize = Math . min ( 4096 , fileSize ) ;
const buffer = Buffer . alloc ( bufferSize ) ;
2025-07-21 08:16:42 +09:00
const result = await fileHandle . read ( buffer , 0 , buffer . length , 0 ) ;
const bytesRead = result . bytesRead ;
2025-05-29 22:30:18 +00:00
if ( bytesRead === 0 ) return false ;
let nonPrintableCount = 0 ;
for ( let i = 0 ; i < bytesRead ; i ++ ) {
if ( buffer [ i ] === 0 ) return true ; // Null byte is a strong indicator
if ( buffer [ i ] < 9 || ( buffer [ i ] > 13 && buffer [ i ] < 32 ) ) {
nonPrintableCount ++ ;
}
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / bytesRead > 0.3 ;
2025-07-21 08:16:42 +09:00
} catch ( error ) {
// Log error for debugging while maintaining existing behavior
console . warn (
` Failed to check if file is binary: ${ filePath } ` ,
error instanceof Error ? error.message : String ( error ) ,
) ;
2025-05-29 22:30:18 +00:00
// If any error occurs (e.g. file not found, permissions),
// treat as not binary here; let higher-level functions handle existence/access errors.
return false ;
2025-07-21 08:16:42 +09:00
} finally {
// Safely close the file handle if it was successfully opened
if ( fileHandle ) {
try {
await fileHandle . close ( ) ;
} catch ( closeError ) {
// Log close errors for debugging while continuing with cleanup
console . warn (
` Failed to close file handle for: ${ filePath } ` ,
closeError instanceof Error ? closeError.message : String ( closeError ) ,
) ;
// The important thing is that we attempted to clean up
}
}
2025-05-29 22:30:18 +00:00
}
}
/ * *
* Detects the type of file based on extension and content .
* @param filePath Path to the file .
2025-07-21 08:16:42 +09:00
* @returns Promise that resolves to 'text' , 'image' , 'pdf' , 'audio' , 'video' , 'binary' or 'svg' .
2025-05-29 22:30:18 +00:00
* /
2025-07-21 08:16:42 +09:00
export async function detectFileType (
2025-05-29 22:30:18 +00:00
filePath : string ,
2025-07-21 08:16:42 +09:00
) : Promise < 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg' > {
2025-05-29 22:30:18 +00:00
const ext = path . extname ( filePath ) . toLowerCase ( ) ;
2025-07-01 16:05:33 -07:00
// The mimetype for "ts" is MPEG transport stream (a video format) but we want
// to assume these are typescript files instead.
if ( ext === '.ts' ) {
return 'text' ;
}
2025-07-07 11:21:32 +05:30
if ( ext === '.svg' ) {
return 'svg' ;
}
2025-07-01 16:05:33 -07:00
const lookedUpMimeType = mime . lookup ( filePath ) ; // Returns false if not found, or the mime type string
2025-07-02 00:52:32 +05:30
if ( lookedUpMimeType ) {
if ( lookedUpMimeType . startsWith ( 'image/' ) ) {
return 'image' ;
}
if ( lookedUpMimeType . startsWith ( 'audio/' ) ) {
return 'audio' ;
}
if ( lookedUpMimeType . startsWith ( 'video/' ) ) {
return 'video' ;
}
if ( lookedUpMimeType === 'application/pdf' ) {
return 'pdf' ;
}
2025-05-29 22:30:18 +00:00
}
// Stricter binary check for common non-text extensions before content check
// These are often not well-covered by mime-types or might be misidentified.
if (
[
'.zip' ,
'.tar' ,
'.gz' ,
'.exe' ,
'.dll' ,
'.so' ,
'.class' ,
'.jar' ,
'.war' ,
'.7z' ,
'.doc' ,
'.docx' ,
'.xls' ,
'.xlsx' ,
'.ppt' ,
'.pptx' ,
'.odt' ,
'.ods' ,
'.odp' ,
'.bin' ,
'.dat' ,
'.obj' ,
'.o' ,
'.a' ,
'.lib' ,
'.wasm' ,
'.pyc' ,
'.pyo' ,
] . includes ( ext )
) {
return 'binary' ;
}
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
// and it's not a known binary extension.
2025-07-21 08:16:42 +09:00
if ( await isBinaryFile ( filePath ) ) {
2025-05-29 22:30:18 +00:00
return 'binary' ;
}
return 'text' ;
}
export interface ProcessedFileReadResult {
llmContent : PartUnion ; // string for text, Part for image/pdf/unreadable binary
returnDisplay : string ;
error? : string ; // Optional error message for the LLM if file processing failed
isTruncated? : boolean ; // For text files, indicates if content was truncated
originalLineCount? : number ; // For text files
linesShown ? : [ number , number ] ; // For text files [startLine, endLine] (1-based for display)
}
/ * *
* Reads and processes a single file , handling text , images , and PDFs .
* @param filePath Absolute path to the file .
* @param rootDirectory Absolute path to the project root for relative path display .
* @param offset Optional offset for text files ( 0 - based line number ) .
* @param limit Optional limit for text files ( number of lines to read ) .
* @returns ProcessedFileReadResult object .
* /
export async function processSingleFileContent (
filePath : string ,
rootDirectory : string ,
offset? : number ,
limit? : number ,
) : Promise < ProcessedFileReadResult > {
try {
if ( ! fs . existsSync ( filePath ) ) {
// Sync check is acceptable before async read
return {
llmContent : '' ,
returnDisplay : 'File not found.' ,
error : ` File not found: ${ filePath } ` ,
} ;
}
2025-07-02 00:52:32 +05:30
const stats = await fs . promises . stat ( filePath ) ;
2025-05-29 22:30:18 +00:00
if ( stats . isDirectory ( ) ) {
return {
llmContent : '' ,
returnDisplay : 'Path is a directory.' ,
error : ` Path is a directory, not a file: ${ filePath } ` ,
} ;
}
2025-07-02 00:52:32 +05:30
const fileSizeInBytes = stats . size ;
// 20MB limit
const maxFileSize = 20 * 1024 * 1024 ;
if ( fileSizeInBytes > maxFileSize ) {
throw new Error (
` File size exceeds the 20MB limit: ${ filePath } ( ${ (
fileSizeInBytes /
( 1024 * 1024 )
) . toFixed ( 2 ) } MB ) ` ,
) ;
}
2025-07-21 08:16:42 +09:00
const fileType = await detectFileType ( filePath ) ;
2025-05-29 22:30:18 +00:00
const relativePathForDisplay = path
. relative ( rootDirectory , filePath )
. replace ( /\\/g , '/' ) ;
switch ( fileType ) {
case 'binary' : {
return {
llmContent : ` Cannot display content of binary file: ${ relativePathForDisplay } ` ,
returnDisplay : ` Skipped binary file: ${ relativePathForDisplay } ` ,
} ;
}
2025-07-07 11:21:32 +05:30
case 'svg' : {
const SVG_MAX_SIZE_BYTES = 1 * 1024 * 1024 ;
if ( stats . size > SVG_MAX_SIZE_BYTES ) {
return {
llmContent : ` Cannot display content of SVG file larger than 1MB: ${ relativePathForDisplay } ` ,
returnDisplay : ` Skipped large SVG file (>1MB): ${ relativePathForDisplay } ` ,
} ;
}
const content = await fs . promises . readFile ( filePath , 'utf8' ) ;
return {
llmContent : content ,
returnDisplay : ` Read SVG as text: ${ relativePathForDisplay } ` ,
} ;
}
2025-05-29 22:30:18 +00:00
case 'text' : {
const content = await fs . promises . readFile ( filePath , 'utf8' ) ;
const lines = content . split ( '\n' ) ;
const originalLineCount = lines . length ;
const startLine = offset || 0 ;
const effectiveLimit =
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit ;
// Ensure endLine does not exceed originalLineCount
const endLine = Math . min ( startLine + effectiveLimit , originalLineCount ) ;
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
const actualStartLine = Math . min ( startLine , originalLineCount ) ;
const selectedLines = lines . slice ( actualStartLine , endLine ) ;
let linesWereTruncatedInLength = false ;
const formattedLines = selectedLines . map ( ( line ) = > {
if ( line . length > MAX_LINE_LENGTH_TEXT_FILE ) {
linesWereTruncatedInLength = true ;
return (
line . substring ( 0 , MAX_LINE_LENGTH_TEXT_FILE ) + '... [truncated]'
) ;
}
return line ;
} ) ;
const contentRangeTruncated = endLine < originalLineCount ;
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength ;
let llmTextContent = '' ;
if ( contentRangeTruncated ) {
llmTextContent += ` [File content truncated: showing lines ${ actualStartLine + 1 } - ${ endLine } of ${ originalLineCount } total lines. Use offset/limit parameters to view more.] \ n ` ;
} else if ( linesWereTruncatedInLength ) {
llmTextContent += ` [File content partially truncated: some lines exceeded maximum length of ${ MAX_LINE_LENGTH_TEXT_FILE } characters.] \ n ` ;
}
llmTextContent += formattedLines . join ( '\n' ) ;
return {
llmContent : llmTextContent ,
2025-05-29 21:57:25 -07:00
returnDisplay : isTruncated ? '(truncated)' : '' ,
2025-05-29 22:30:18 +00:00
isTruncated ,
originalLineCount ,
linesShown : [ actualStartLine + 1 , endLine ] ,
} ;
}
case 'image' :
2025-07-02 00:52:32 +05:30
case 'pdf' :
case 'audio' :
case 'video' : {
2025-05-29 22:30:18 +00:00
const contentBuffer = await fs . promises . readFile ( filePath ) ;
const base64Data = contentBuffer . toString ( 'base64' ) ;
return {
llmContent : {
inlineData : {
data : base64Data ,
mimeType : mime.lookup ( filePath ) || 'application/octet-stream' ,
} ,
} ,
returnDisplay : ` Read ${ fileType } file: ${ relativePathForDisplay } ` ,
} ;
}
default : {
// Should not happen with current detectFileType logic
const exhaustiveCheck : never = fileType ;
return {
llmContent : ` Unhandled file type: ${ exhaustiveCheck } ` ,
returnDisplay : ` Skipped unhandled file type: ${ relativePathForDisplay } ` ,
error : ` Unhandled file type for ${ filePath } ` ,
} ;
}
}
} catch ( error ) {
const errorMessage = error instanceof Error ? error.message : String ( error ) ;
const displayPath = path
. relative ( rootDirectory , filePath )
. replace ( /\\/g , '/' ) ;
return {
llmContent : ` Error reading file ${ displayPath } : ${ errorMessage } ` ,
returnDisplay : ` Error reading file ${ displayPath } : ${ errorMessage } ` ,
error : ` Error reading file ${ filePath } : ${ errorMessage } ` ,
} ;
}
}