mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-11 22:51:00 -07:00
# PR Description: Refactor read_file Tool to Discourage Pagination
## Changes - Removed `offset` and `limit` parameters from the `read_file` tool and the underlying `processSingleFileContent` utility. - Updated `read_file` to always return the first 2000 lines of a text file if it exceeds that length. - Added explicit guidance in the tool's truncation message and description, distinguishing between `grep_search` for pattern finding and `run_shell_command` (specifically `sed`) for extracting specific line ranges. - Simplified `ProcessedFileReadResult` by removing `linesShown`. - Updated all internal callers (including `ReadManyFilesTool`, `planCommand`, and `ExitPlanModeDialog`) to match the new signature of `processSingleFileContent`. - Updated unit tests in `read-file.test.ts` and `fileUtils.test.ts` to verify the new 2000-line default truncation and remove pagination-related test cases. ## Motivation & Feedback These changes were made to address feedback regarding how the model interacts with large files: - **Discouraging Inefficient Pagination:** Feedback noted that providing `offset` and `limit` encouraged models to "paginate" through files chunk-by-chunk, which is often an inefficient "looping" behavior. - **Encouraging Targeted Search:** By removing pagination parameters, we push the model toward more powerful and standard search behaviors. As the feedback stated: *"In my experience it's better to encourage the model to use grep or more complex bash commands to find what it needs."* - **Compatibility with Search Results:** Models frequently use `grep -n` to find points of interest. Standard Unix tools like `sed` and `grep` are more compatible with 1-based line numbers and sophisticated searching than a manual 0-based `offset`/`limit` implementation. - **Clarity on Truncation:** Addressed the feedback *"Could be useful to specify how will it be truncated"* by explicitly stating the 2000-line limit in the tool's output and description.
This commit is contained in:
@@ -129,26 +129,6 @@ describe('ReadFileTool', () => {
|
||||
/The 'file_path' parameter must be non-empty./,
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw error if offset is negative', () => {
|
||||
const params: ReadFileToolParams = {
|
||||
file_path: path.join(tempRootDir, 'test.txt'),
|
||||
offset: -1,
|
||||
};
|
||||
expect(() => tool.build(params)).toThrow(
|
||||
'Offset must be a non-negative number',
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw error if limit is zero or negative', () => {
|
||||
const params: ReadFileToolParams = {
|
||||
file_path: path.join(tempRootDir, 'test.txt'),
|
||||
limit: 0,
|
||||
};
|
||||
expect(() => tool.build(params)).toThrow(
|
||||
'Limit must be a positive number',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getDescription', () => {
|
||||
@@ -393,34 +373,6 @@ describe('ReadFileTool', () => {
|
||||
expect(result.returnDisplay).toBe('');
|
||||
});
|
||||
|
||||
it('should support offset and limit for text files', async () => {
|
||||
const filePath = path.join(tempRootDir, 'paginated.txt');
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
|
||||
const fileContent = lines.join('\n');
|
||||
await fsp.writeFile(filePath, fileContent, 'utf-8');
|
||||
|
||||
const params: ReadFileToolParams = {
|
||||
file_path: filePath,
|
||||
offset: 5, // Start from line 6
|
||||
limit: 3,
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
|
||||
const result = await invocation.execute(abortSignal);
|
||||
expect(result.llmContent).toContain(
|
||||
'IMPORTANT: The file content has been truncated',
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'Status: Showing lines 6-8 of 20 total lines',
|
||||
);
|
||||
expect(result.llmContent).toContain('Line 6');
|
||||
expect(result.llmContent).toContain('Line 7');
|
||||
expect(result.llmContent).toContain('Line 8');
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Read lines 6-8 of 20 from paginated.txt',
|
||||
);
|
||||
});
|
||||
|
||||
it('should successfully read files from project temp directory', async () => {
|
||||
const tempDir = path.join(tempRootDir, '.temp');
|
||||
await fsp.mkdir(tempDir, { recursive: true });
|
||||
|
||||
@@ -32,16 +32,6 @@ export interface ReadFileToolParams {
|
||||
* The path to the file to read
|
||||
*/
|
||||
file_path: string;
|
||||
|
||||
/**
|
||||
* The line number to start reading from (optional)
|
||||
*/
|
||||
offset?: number;
|
||||
|
||||
/**
|
||||
* The number of lines to read (optional)
|
||||
*/
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
class ReadFileToolInvocation extends BaseToolInvocation<
|
||||
@@ -72,7 +62,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
|
||||
override toolLocations(): ToolLocation[] {
|
||||
return [{ path: this.resolvedPath, line: this.params.offset }];
|
||||
return [{ path: this.resolvedPath }];
|
||||
}
|
||||
|
||||
async execute(): Promise<ToolResult> {
|
||||
@@ -91,9 +81,6 @@ class ReadFileToolInvocation extends BaseToolInvocation<
|
||||
const result = await processSingleFileContent(
|
||||
this.resolvedPath,
|
||||
this.config.getTargetDir(),
|
||||
this.config.getFileSystemService(),
|
||||
this.params.offset,
|
||||
this.params.limit,
|
||||
);
|
||||
|
||||
if (result.error) {
|
||||
@@ -109,15 +96,14 @@ class ReadFileToolInvocation extends BaseToolInvocation<
|
||||
|
||||
let llmContent: PartUnion;
|
||||
if (result.isTruncated) {
|
||||
const [start, end] = result.linesShown!;
|
||||
const total = result.originalLineCount!;
|
||||
const nextOffset = this.params.offset
|
||||
? this.params.offset + end - start + 1
|
||||
: end;
|
||||
llmContent = `
|
||||
IMPORTANT: The file content has been truncated.
|
||||
Status: Showing lines ${start}-${end} of ${total} total lines.
|
||||
Action: To read more of the file, you can use the 'offset' and 'limit' parameters in a subsequent 'read_file' call. For example, to read the next section of the file, use offset: ${nextOffset}.
|
||||
Status: Showing the first 2000 lines of ${total} total lines.
|
||||
Action:
|
||||
- To find specific patterns, use the 'grep_search' tool.
|
||||
- To read a specific line range, use 'run_shell_command' with 'sed'. For example: 'sed -n "500,600p" ${this.params.file_path}'.
|
||||
- You can also use other Unix utilities like 'awk', 'head', or 'tail' via 'run_shell_command'.
|
||||
|
||||
--- FILE CONTENT (truncated) ---
|
||||
${result.llmContent}`;
|
||||
@@ -169,7 +155,7 @@ export class ReadFileTool extends BaseDeclarativeTool<
|
||||
super(
|
||||
ReadFileTool.Name,
|
||||
'ReadFile',
|
||||
`Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.`,
|
||||
`Reads and returns the content of a specified file. If the file is large (exceeding 2000 lines), the content will be truncated to the first 2000 lines. The tool's response will clearly indicate if truncation has occurred. To examine specific sections of large files, use the 'run_shell_command' tool with standard Unix utilities like 'grep', 'sed', 'awk', 'head', or 'tail'. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.`,
|
||||
Kind.Read,
|
||||
{
|
||||
properties: {
|
||||
@@ -177,16 +163,6 @@ export class ReadFileTool extends BaseDeclarativeTool<
|
||||
description: 'The path to the file to read.',
|
||||
type: 'string',
|
||||
},
|
||||
offset: {
|
||||
description:
|
||||
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
|
||||
type: 'number',
|
||||
},
|
||||
limit: {
|
||||
description:
|
||||
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
required: ['file_path'],
|
||||
type: 'object',
|
||||
@@ -218,13 +194,6 @@ export class ReadFileTool extends BaseDeclarativeTool<
|
||||
return validationError;
|
||||
}
|
||||
|
||||
if (params.offset !== undefined && params.offset < 0) {
|
||||
return 'Offset must be a non-negative number';
|
||||
}
|
||||
if (params.limit !== undefined && params.limit <= 0) {
|
||||
return 'Limit must be a positive number';
|
||||
}
|
||||
|
||||
const fileFilteringOptions = this.config.getFileFilteringOptions();
|
||||
if (
|
||||
this.fileDiscoveryService.shouldIgnoreFile(
|
||||
|
||||
@@ -765,7 +765,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(result.llmContent).toBe(content);
|
||||
expect(result.returnDisplay).toBe('');
|
||||
@@ -776,7 +775,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
nonexistentFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(result.error).toContain('File not found');
|
||||
expect(result.returnDisplay).toContain('File not found');
|
||||
@@ -790,7 +788,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(result.error).toContain('Simulated read error');
|
||||
expect(result.returnDisplay).toContain('Simulated read error');
|
||||
@@ -805,7 +802,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(result.error).toContain('Simulated image read error');
|
||||
expect(result.returnDisplay).toContain('Simulated image read error');
|
||||
@@ -818,7 +814,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
@@ -840,7 +835,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testPdfFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
@@ -865,7 +859,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testAudioFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
@@ -894,7 +887,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testSvgFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
|
||||
expect(result.llmContent).toBe(svgContent);
|
||||
@@ -912,7 +904,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testBinaryFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'Cannot display content of binary file',
|
||||
@@ -921,74 +912,26 @@ describe('fileUtils', () => {
|
||||
});
|
||||
|
||||
it('should handle path being a directory', async () => {
|
||||
const result = await processSingleFileContent(
|
||||
directoryPath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
const result = await processSingleFileContent(directoryPath, tempRootDir);
|
||||
expect(result.error).toContain('Path is a directory');
|
||||
expect(result.returnDisplay).toContain('Path is a directory');
|
||||
});
|
||||
|
||||
it('should paginate text files correctly (offset and limit)', async () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
|
||||
it('should truncate text files exceeding 2000 lines by default', async () => {
|
||||
const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`);
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
5,
|
||||
5,
|
||||
); // Read lines 6-10
|
||||
const expectedContent = lines.slice(5, 10).join('\n');
|
||||
);
|
||||
|
||||
expect(result.llmContent).toBe(expectedContent);
|
||||
expect(result.returnDisplay).toBe('Read lines 6-10 of 20 from test.txt');
|
||||
expect(result.llmContent.toString().split('\n').length).toBe(2000);
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Read first 2000 lines of 2500 from test.txt',
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
expect(result.linesShown).toEqual([6, 10]);
|
||||
});
|
||||
|
||||
it('should identify truncation when reading the end of a file', async () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
// Read from line 11 to 20. The start is not 0, so it's truncated.
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
10,
|
||||
10,
|
||||
);
|
||||
const expectedContent = lines.slice(10, 20).join('\n');
|
||||
|
||||
expect(result.llmContent).toContain(expectedContent);
|
||||
expect(result.returnDisplay).toBe('Read lines 11-20 of 20 from test.txt');
|
||||
expect(result.isTruncated).toBe(true); // This is the key check for the bug
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
expect(result.linesShown).toEqual([11, 20]);
|
||||
});
|
||||
|
||||
it('should handle limit exceeding file length', async () => {
|
||||
const lines = ['Line 1', 'Line 2'];
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
0,
|
||||
10,
|
||||
);
|
||||
const expectedContent = lines.join('\n');
|
||||
|
||||
expect(result.llmContent).toBe(expectedContent);
|
||||
expect(result.returnDisplay).toBe('');
|
||||
expect(result.isTruncated).toBe(false);
|
||||
expect(result.originalLineCount).toBe(2);
|
||||
expect(result.linesShown).toEqual([1, 2]);
|
||||
expect(result.originalLineCount).toBe(2500);
|
||||
});
|
||||
|
||||
it('should truncate long lines in text files', async () => {
|
||||
@@ -1001,7 +944,6 @@ describe('fileUtils', () => {
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
);
|
||||
|
||||
expect(result.llmContent).toContain('Short line');
|
||||
@@ -1015,69 +957,6 @@ describe('fileUtils', () => {
|
||||
expect(result.isTruncated).toBe(true);
|
||||
});
|
||||
|
||||
it('should truncate when line count exceeds the limit', async () => {
|
||||
const lines = Array.from({ length: 11 }, (_, i) => `Line ${i + 1}`);
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
// Read 5 lines, but there are 11 total
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
0,
|
||||
5,
|
||||
);
|
||||
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.returnDisplay).toBe('Read lines 1-5 of 11 from test.txt');
|
||||
});
|
||||
|
||||
it('should truncate when a line length exceeds the character limit', async () => {
|
||||
const longLine = 'b'.repeat(2500);
|
||||
const lines = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`);
|
||||
lines.push(longLine); // Total 11 lines
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
// Read all 11 lines, including the long one
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
0,
|
||||
11,
|
||||
);
|
||||
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Read all 11 lines from test.txt (some lines were shortened)',
|
||||
);
|
||||
});
|
||||
|
||||
it('should truncate both line count and line length when both exceed limits', async () => {
|
||||
const linesWithLongInMiddle = Array.from(
|
||||
{ length: 20 },
|
||||
(_, i) => `Line ${i + 1}`,
|
||||
);
|
||||
linesWithLongInMiddle[4] = 'c'.repeat(2500);
|
||||
actualNodeFs.writeFileSync(
|
||||
testTextFilePath,
|
||||
linesWithLongInMiddle.join('\n'),
|
||||
);
|
||||
|
||||
// Read 10 lines out of 20, including the long line
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
new StandardFileSystemService(),
|
||||
0,
|
||||
10,
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Read lines 1-10 of 20 from test.txt (some lines were shortened)',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return an error if the file size exceeds 20MB', async () => {
|
||||
// Create a small test file
|
||||
actualNodeFs.writeFileSync(testTextFilePath, 'test content');
|
||||
|
||||
@@ -392,23 +392,21 @@ export interface ProcessedFileReadResult {
|
||||
errorType?: ToolErrorType; // Structured error type
|
||||
isTruncated?: boolean; // For text files, indicates if content was truncated
|
||||
originalLineCount?: number; // For text files
|
||||
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and processes a single file, handling text, images, and PDFs.
|
||||
* @param filePath Absolute path to the file.
|
||||
* @param rootDirectory Absolute path to the project root for relative path display.
|
||||
* @param offset Optional offset for text files (0-based line number).
|
||||
* @param limit Optional limit for text files (number of lines to read).
|
||||
* @returns ProcessedFileReadResult object.
|
||||
*/
|
||||
export async function processSingleFileContent(
|
||||
filePath: string,
|
||||
rootDirectory: string,
|
||||
fileSystemService: FileSystemService,
|
||||
offset?: number,
|
||||
limit?: number,
|
||||
// TODO: remove unused vars from other areas
|
||||
_fileSystemService?: FileSystemService,
|
||||
_offset?: number,
|
||||
_limit?: number,
|
||||
): Promise<ProcessedFileReadResult> {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
@@ -474,14 +472,11 @@ export async function processSingleFileContent(
|
||||
const lines = content.split('\n');
|
||||
const originalLineCount = lines.length;
|
||||
|
||||
const startLine = offset || 0;
|
||||
const effectiveLimit =
|
||||
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
|
||||
// Ensure endLine does not exceed originalLineCount
|
||||
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
|
||||
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
|
||||
const actualStartLine = Math.min(startLine, originalLineCount);
|
||||
const selectedLines = lines.slice(actualStartLine, endLine);
|
||||
const isTruncatedInLines =
|
||||
originalLineCount > DEFAULT_MAX_LINES_TEXT_FILE;
|
||||
const selectedLines = isTruncatedInLines
|
||||
? lines.slice(0, DEFAULT_MAX_LINES_TEXT_FILE)
|
||||
: lines;
|
||||
|
||||
let linesWereTruncatedInLength = false;
|
||||
const formattedLines = selectedLines.map((line) => {
|
||||
@@ -494,17 +489,13 @@ export async function processSingleFileContent(
|
||||
return line;
|
||||
});
|
||||
|
||||
const contentRangeTruncated =
|
||||
startLine > 0 || endLine < originalLineCount;
|
||||
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
|
||||
const isTruncated = isTruncatedInLines || linesWereTruncatedInLength;
|
||||
const llmContent = formattedLines.join('\n');
|
||||
|
||||
// By default, return nothing to streamline the common case of a successful read_file.
|
||||
let returnDisplay = '';
|
||||
if (contentRangeTruncated) {
|
||||
returnDisplay = `Read lines ${
|
||||
actualStartLine + 1
|
||||
}-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`;
|
||||
if (isTruncatedInLines) {
|
||||
returnDisplay = `Read first ${DEFAULT_MAX_LINES_TEXT_FILE} lines of ${originalLineCount} from ${relativePathForDisplay}`;
|
||||
if (linesWereTruncatedInLength) {
|
||||
returnDisplay += ' (some lines were shortened)';
|
||||
}
|
||||
@@ -517,7 +508,6 @@ export async function processSingleFileContent(
|
||||
returnDisplay,
|
||||
isTruncated,
|
||||
originalLineCount,
|
||||
linesShown: [actualStartLine + 1, endLine],
|
||||
};
|
||||
}
|
||||
case 'image':
|
||||
|
||||
Reference in New Issue
Block a user