From 19140f66d625e12915d9c538aef4d35aa2c1e98c Mon Sep 17 00:00:00 2001
From: Christian Gunderman <gundermanc@google.com>
Date: Wed, 11 Feb 2026 15:38:26 -0800
Subject: [PATCH] Use grep over large files.

---
 evals/frugalSearch.eval.ts            | 76 +++++++++++++++++++++++++++
 packages/core/src/prompts/snippets.ts |  2 +
 packages/core/src/tools/ls.test.ts    |  4 +-
 packages/core/src/tools/ls.ts         |  7 ++-
 4 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts
index 11c51e8529..26b196e8c0 100644
--- a/evals/frugalSearch.eval.ts
+++ b/evals/frugalSearch.eval.ts
@@ -141,4 +141,80 @@ describe('Frugal Search', () => {
       ).toBe(true);
     },
   });
+
+  /**
+   * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
+   * The task is specifically phrased to not evoke "view" or "search" specifically because
+   * the model implicitly understands that such tasks are searches. This covers the case of
+   * an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or
+   * ranged reads.
+   */
+  evalTest('ALWAYS_PASSES', {
+    name: 'should use grep or ranged read for large files',
+    prompt: 'What year was legacy_processor.ts written?',
+    files: {
+      'src/utils.ts': 'export const add = (a, b) => a + b;',
+      'src/types.ts': 'export type ID = string;',
+      'src/legacy_processor.ts': [
+        '// Copyright 2005 Legacy Systems Inc.',
+        ...Array.from(
+          { length: 5000 },
+          (_, i) =>
+            `// Legacy code block ${i} - strictly preserved for backward compatibility`,
+        ),
+      ].join('\\n'),
+      'README.md': '# Project documentation',
+    },
+    assert: async (rig) => {
+      const toolCalls = rig.readToolLogs();
+      const getParams = (call: any) => {
+        let args = call.toolRequest.args;
+        if (typeof args === 'string') {
+          try {
+            args = JSON.parse(args);
+          } catch (e) {
+            // Ignore parse errors
+          }
+        }
+        return args;
+      };
+
+      // Check for wasteful full file reads
+      const fullReads = toolCalls.filter((call) => {
+        if (call.toolRequest.name !== 'read_file') return false;
+        const args = getParams(call);
+        return (
+          args.file_path === 'src/legacy_processor.ts' &&
+          (args.limit === undefined || args.limit === null)
+        );
+      });
+
+      expect(
+        fullReads.length,
+        'Agent should not attempt to read the entire large file at once',
+      ).toBe(0);
+
+      // Check that it actually tried to find it using appropriate tools
+      const validAttempts = toolCalls.filter((call) => {
+        const args = getParams(call);
+        if (
+          call.toolRequest.name === 'grep_search' &&
+          (args.total_max_matches || args.max_matches_per_file)
+        ) {
+          return true;
+        }
+
+        if (
+          call.toolRequest.name === 'read_file' &&
+          args.file_path === 'src/legacy_processor.ts' &&
+          args.limit !== undefined
+        ) {
+          return true;
+        }
+        return false;
+      });
+
+      expect(validAttempts.length).toBeGreaterThan(0);
+    },
+  });
 });
diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts
index 3dcf346de6..613a704a1e 100644
--- a/packages/core/src/prompts/snippets.ts
+++ b/packages/core/src/prompts/snippets.ts
@@ -167,6 +167,8 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
 ## Context Efficiency:
 - Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
 - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} to search large files (> 1kb) or ${READ_FILE_TOOL_NAME} with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
 
 ## Engineering Standards
 - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts
index 4bc57b8d32..63d7693123 100644
--- a/packages/core/src/tools/ls.test.ts
+++ b/packages/core/src/tools/ls.test.ts
@@ -235,8 +235,8 @@ describe('LSTool', () => {
 
       expect(entries[0]).toBe('[DIR] x-dir');
       expect(entries[1]).toBe('[DIR] y-dir');
-      expect(entries[2]).toBe('a-file.txt');
-      expect(entries[3]).toBe('b-file.txt');
+      expect(entries[2]).toBe('a-file.txt (8 bytes)');
+      expect(entries[3]).toBe('b-file.txt (8 bytes)');
     });
 
     it('should handle permission errors gracefully', async () => {
diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts
index 9ca2918b2c..b98dfb9e38 100644
--- a/packages/core/src/tools/ls.ts
+++ b/packages/core/src/tools/ls.ts
@@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation<LSToolParams, ToolResult> {
 
       // Create formatted content for LLM
       const directoryContent = entries
-        .map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`)
+        .map((entry) => {
+          if (entry.isDirectory) {
+            return `[DIR] ${entry.name}`;
+          }
+          return `${entry.name} (${entry.size} bytes)`;
+        })
         .join('\n');
 
       let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`;