From 19140f66d625e12915d9c538aef4d35aa2c1e98c Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 15:38:26 -0800 Subject: [PATCH 01/21] Use grep over large files. --- evals/frugalSearch.eval.ts | 76 +++++++++++++++++++++++++++ packages/core/src/prompts/snippets.ts | 2 + packages/core/src/tools/ls.test.ts | 4 +- packages/core/src/tools/ls.ts | 7 ++- 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index 11c51e8529..26b196e8c0 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -141,4 +141,80 @@ describe('Frugal Search', () => { ).toBe(true); }, }); + + /** + * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task. + * The task is specifically phrased to not evoke "view" or "search" specifically because + * the model implicitly understands that such tasks are searches. This covers the case of + * an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or + * ranged reads. + */ + evalTest('ALWAYS_PASSES', { + name: 'should use grep or ranged read for large files', + prompt: 'What year was legacy_processor.ts written?', + files: { + 'src/utils.ts': 'export const add = (a, b) => a + b;', + 'src/types.ts': 'export type ID = string;', + 'src/legacy_processor.ts': [ + '// Copyright 2005 Legacy Systems Inc.', + ...Array.from( + { length: 5000 }, + (_, i) => + `// Legacy code block ${i} - strictly preserved for backward compatibility`, + ), + ].join('\\n'), + 'README.md': '# Project documentation', + }, + assert: async (rig) => { + const toolCalls = rig.readToolLogs(); + const getParams = (call: any) => { + let args = call.toolRequest.args; + if (typeof args === 'string') { + try { + args = JSON.parse(args); + } catch (e) { + // Ignore parse errors + } + } + return args; + }; + + // Check for wasteful full file reads + const fullReads = toolCalls.filter((call) => { + if (call.toolRequest.name !== 'read_file') return false; + const args = getParams(call); + return ( + args.file_path === 'src/legacy_processor.ts' && + (args.limit === undefined || args.limit === null) + ); + }); + + expect( + fullReads.length, + 'Agent should not attempt to read the entire large file at once', + ).toBe(0); + + // Check that it actually tried to find it using appropriate tools + const validAttempts = toolCalls.filter((call) => { + const args = getParams(call); + if ( + call.toolRequest.name === 'grep_search' && + (args.total_max_matches || args.max_matches_per_file) + ) { + return true; + } + + if ( + call.toolRequest.name === 'read_file' && + args.file_path === 'src/legacy_processor.ts' && + args.limit !== undefined + ) { + return true; + } + return false; + }); + + expect(validAttempts.length).toBeGreaterThan(0); + }, + }); }); diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 3dcf346de6..613a704a1e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -167,6 +167,8 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { ## Context Efficiency: - Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} to search large files (> 1kb) or ${READ_FILE_TOOL_NAME} with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts index 4bc57b8d32..63d7693123 100644 --- a/packages/core/src/tools/ls.test.ts +++ b/packages/core/src/tools/ls.test.ts @@ -235,8 +235,8 @@ describe('LSTool', () => { expect(entries[0]).toBe('[DIR] x-dir'); expect(entries[1]).toBe('[DIR] y-dir'); - expect(entries[2]).toBe('a-file.txt'); - expect(entries[3]).toBe('b-file.txt'); + expect(entries[2]).toBe('a-file.txt (8 bytes)'); + expect(entries[3]).toBe('b-file.txt (8 bytes)'); }); it('should handle permission errors gracefully', async () => { diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts index 9ca2918b2c..b98dfb9e38 100644 --- a/packages/core/src/tools/ls.ts +++ b/packages/core/src/tools/ls.ts @@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation { // Create formatted content for LLM const directoryContent = entries - .map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`) + .map((entry) => { + if (entry.isDirectory) { + return `[DIR] ${entry.name}`; + } + return `${entry.name} (${entry.size} bytes)`; + }) .join('\n'); let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`; From 80017bf986d277f29f21158ff666b3dda64c2a67 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 16:25:24 -0800 Subject: [PATCH 02/21] Firmer language. --- packages/core/src/prompts/snippets.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 613a704a1e..8fd97f333e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -165,9 +165,9 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. -- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} to search large files (> 1kb) or ${READ_FILE_TOOL_NAME} with the desired offset and limit. +- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb) or ${READ_FILE_TOOL_NAME} with the desired offset and limit. - If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards From c1a954341c397b89a7d1b0aa31bc688d9f0b1db7 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 17:14:57 -0800 Subject: [PATCH 03/21] Simpler assert. --- evals/frugalSearch.eval.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index 26b196e8c0..56664d7a01 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -197,10 +197,7 @@ describe('Frugal Search', () => { // Check that it actually tried to find it using appropriate tools const validAttempts = toolCalls.filter((call) => { const args = getParams(call); - if ( - call.toolRequest.name === 'grep_search' && - (args.total_max_matches || args.max_matches_per_file) - ) { + if (call.toolRequest.name === 'grep_search') { return true; } From 0b5c652548494ac1e384502cb6394b9dcfdec60c Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 17:26:20 -0800 Subject: [PATCH 04/21] Fix test. --- .../core/__snapshots__/prompts.test.ts.snap | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index e944565366..a6a1e01aeb 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -520,8 +520,10 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -651,8 +653,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -747,8 +751,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1312,8 +1318,10 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills with - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1439,8 +1447,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1557,8 +1567,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1675,8 +1687,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1789,8 +1803,10 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1903,8 +1919,10 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2256,8 +2274,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2370,8 +2390,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2595,8 +2617,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2709,8 +2733,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. - For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit. +- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From ac179551a86e5354e91df17f9fccdb9ba2c4d666 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 17:59:34 -0800 Subject: [PATCH 05/21] Revert prompt. --- packages/core/src/prompts/snippets.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 8fd97f333e..9a2d6411ba 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -165,10 +165,9 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. -- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb) or ${READ_FILE_TOOL_NAME} with the desired offset and limit. -- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly. +- Always minimize wasted context window by aggressively scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: always pass total_max_matches, include, and max_matches_per_file. +- Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. +- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`). ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From 2b6676d7dcb34d73cad97acd79dfd1c17008d140 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 18:09:11 -0800 Subject: [PATCH 06/21] Important word. --- packages/core/src/prompts/snippets.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 9a2d6411ba..41441ca512 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -167,7 +167,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { ## Context Efficiency: - Always minimize wasted context window by aggressively scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: always pass total_max_matches, include, and max_matches_per_file. - Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. -- Limit unnecessary context consumption from file reads by using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`). +- Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb). ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From b005b33ca76da3a2a2416cd5426ceb016c0d23d3 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 21:50:45 -0800 Subject: [PATCH 07/21] Fix eval. --- evals/frugalSearch.eval.ts | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index 56664d7a01..fefe042ca2 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -25,7 +25,7 @@ describe('Frugal Search', () => { return args; }; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: 'should use targeted search with limit', prompt: 'find me a sample usage of path.resolve() in the codebase', files: { @@ -128,17 +128,6 @@ describe('Frugal Search', () => { grepParams.map((p) => p.total_max_matches), )}`, ).toBe(true); - - const hasMaxMatchesPerFileLimit = grepParams.some( - (p) => - p.max_matches_per_file !== undefined && p.max_matches_per_file <= 5, - ); - expect( - hasMaxMatchesPerFileLimit, - `Expected agent to use a small max_matches_per_file (<= 5) for a sample usage request. Actual values: ${JSON.stringify( - grepParams.map((p) => p.max_matches_per_file), - )}`, - ).toBe(true); }, }); From 9d1220bbb095249d213442e2afb7ab1b3d20dd2f Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Wed, 11 Feb 2026 23:14:59 -0800 Subject: [PATCH 08/21] Ranged file reads. --- evals/frugalReads.eval.ts | 270 ++++++++++++++++++++++++++ packages/core/src/prompts/snippets.ts | 4 + 2 files changed, 274 insertions(+) create mode 100644 evals/frugalReads.eval.ts diff --git a/evals/frugalReads.eval.ts b/evals/frugalReads.eval.ts new file mode 100644 index 0000000000..454ba84cbd --- /dev/null +++ b/evals/frugalReads.eval.ts @@ -0,0 +1,270 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import { READ_FILE_TOOL_NAME, EDIT_TOOL_NAME } from '@google/gemini-cli-core'; + +describe('Frugal reads eval', () => { + /** + * Ensures that the agent is frugal in its use of context by relying + * primarily on ranged reads when the line number is known, and combining + * nearby ranges into a single contiguous read to save tool calls. + */ + evalTest('ALWAYS_PASSES', { + name: 'should use ranged read when nearby lines are targeted', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'linter_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i === 500 || i === 510 || i === 520) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in linter_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + // Check if the agent read the whole file + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('linter_mess.ts'); + }); + + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // We expect a single contiguous range covering all errors since they are near each other. + // Some models re-verify or read more than once, so we allow up to 4. + expect( + targetFileReads.length, + 'Agent should have been efficient with ranged reads for near errors', + ).toEqual(1); + + let totalLinesRead = 0; + const readRanges: { offset: number; limit: number }[] = []; + + for (const call of targetFileReads) { + const args = JSON.parse(call.toolRequest.args); + + expect( + args.limit, + 'Agent read the entire file (missing limit) instead of using ranged read', + ).toBeDefined(); + + const limit = args.limit; + const offset = args.offset ?? 0; + totalLinesRead += limit; + readRanges.push({ offset, limit }); + + expect(args.limit, 'Agent read too many lines at once').toBeLessThan( + 1001, + ); + } + + // Ranged read shoud be frugal and just enough to satisfy the task at hand. + expect( + totalLinesRead, + 'Agent read more of the file than expected', + ).toBeLessThan(1000); + + // Check that we read around the error lines + const errorLines = [500, 510, 520]; + for (const line of errorLines) { + const covered = readRanges.some( + (range) => line >= range.offset && line < range.offset + range.limit, + ); + expect(covered, `Agent should have read around line ${line}`).toBe( + true, + ); + } + + const editCalls = logs.filter( + (log) => log.toolRequest?.name === EDIT_TOOL_NAME, + ); + const targetEditCalls = editCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('linter_mess.ts'); + }); + expect( + targetEditCalls.length, + 'Agent should have made replacement calls on the target file', + ).toBeGreaterThanOrEqual(3); + }, + }); + + /** + * Ensures the agent uses multiple ranged reads when the targets are far + * apart to avoid the need to read the whole file. + */ + evalTest('ALWAYS_PASSES', { + name: 'should use ranged read when targets are far apart', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'far_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i === 100 || i === 900) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in far_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('far_mess.ts'); + }); + + // The agent should use ranged reads to be frugal with context tokens, + // even if it requires multiple calls for far-apart errors. + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // We allow multiple calls since the errors are far apart. + expect( + targetFileReads.length, + 'Agent should have used separate reads for far apart errors', + ).toBeLessThanOrEqual(4); + + for (const call of targetFileReads) { + const args = JSON.parse(call.toolRequest.args); + expect( + args.limit, + 'Agent should have used ranged read (limit) to save tokens', + ).toBeDefined(); + } + }, + }); + + /** + * Validates that the agent reads the entire file if there are lots of matches + * (e.g.: 10), as it's more efficient than many small ranged reads. + */ + evalTest('ALWAYS_PASSES', { + name: 'should read the entire file when there are many matches', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'many_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i % 100 === 0) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in many_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('many_mess.ts'); + }); + + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // In this case, we expect the agent to realize there are many scattered errors + // and just read the whole file to be efficient with tool calls. + const readEntireFile = targetFileReads.some((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.limit === undefined; + }); + + expect( + readEntireFile, + 'Agent should have read the entire file because of the high number of scattered matches', + ).toBe(true); + + // Check that the agent actually fixed the errors + const editCalls = logs.filter( + (log) => log.toolRequest?.name === EDIT_TOOL_NAME, + ); + const targetEditCalls = editCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('many_mess.ts'); + }); + expect( + targetEditCalls.length, + 'Agent should have made replacement calls on the target file', + ).toBeGreaterThanOrEqual(1); + }, + }); +}); diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 41441ca512..b1f770c559 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -168,6 +168,10 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - Always minimize wasted context window by aggressively scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: always pass total_max_matches, include, and max_matches_per_file. - Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. - Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb). +- Conserve context when reading files by reading just enough context to definitively answer the question by passing offset and limit to ${READ_FILE_TOOL_NAME} or by searching with ${GREP_TOOL_NAME} and before=50 and after=50 and total_max_matches + - Always read at most one range from the file to avoid chatty "scrolling" or "pagination" style reads which waste tokens by adding extra turns. + - Always read at least 100 lines to avoid degrading the ${WRITE_FILE_TOOL_NAME} reliability. + - If you have multiple ranges that you want to read, always combine them into a single range to avoid wasting tokens on another tool call. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From cb230cc9896b8ed590591af4dee32d9aeaf08103 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Thu, 12 Feb 2026 06:45:05 -0800 Subject: [PATCH 09/21] Prompt tweaks. --- packages/core/src/prompts/snippets.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index b1f770c559..fd1e90df00 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -169,9 +169,12 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. - Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb). - Conserve context when reading files by reading just enough context to definitively answer the question by passing offset and limit to ${READ_FILE_TOOL_NAME} or by searching with ${GREP_TOOL_NAME} and before=50 and after=50 and total_max_matches - - Always read at most one range from the file to avoid chatty "scrolling" or "pagination" style reads which waste tokens by adding extra turns. - - Always read at least 100 lines to avoid degrading the ${WRITE_FILE_TOOL_NAME} reliability. + - Always request a large enough range (e.g., 100-500 lines) to ensure you see sufficient details of the code. - If you have multiple ranges that you want to read, always combine them into a single range to avoid wasting tokens on another tool call. + - Avoid "paging" by requesting a generous buffer around your target; it is more token-efficient to + read 500 lines in one turn than 100 lines across two turns. + - If you identify multiple relevant sections in a file, combine them into a single wide range + covering all of them. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From 89e81f220271650ebe7ed96f9279c50f2b862584 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Thu, 12 Feb 2026 08:00:27 -0800 Subject: [PATCH 10/21] Tweaks. --- packages/core/src/prompts/snippets.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index fd1e90df00..d91933809a 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -167,14 +167,13 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { ## Context Efficiency: - Always minimize wasted context window by aggressively scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: always pass total_max_matches, include, and max_matches_per_file. - Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. -- Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 1kb). +- Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 5kb). - Conserve context when reading files by reading just enough context to definitively answer the question by passing offset and limit to ${READ_FILE_TOOL_NAME} or by searching with ${GREP_TOOL_NAME} and before=50 and after=50 and total_max_matches - Always request a large enough range (e.g., 100-500 lines) to ensure you see sufficient details of the code. - If you have multiple ranges that you want to read, always combine them into a single range to avoid wasting tokens on another tool call. - Avoid "paging" by requesting a generous buffer around your target; it is more token-efficient to read 500 lines in one turn than 100 lines across two turns. - - If you identify multiple relevant sections in a file, combine them into a single wide range - covering all of them. + - If you identify multiple relevant sections in a file, combine them into a single wide range covering all of them. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From 1ee6900190668ac1a6eb6d2213ab45ee4ed5a521 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Thu, 12 Feb 2026 17:15:34 -0800 Subject: [PATCH 11/21] Agent provided prompt suggestions. --- packages/core/src/prompts/snippets.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index d91933809a..8c7ef80ad4 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -164,16 +164,15 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Credential Protection:** Never log, print, or commit secrets, API keys, or sensitive credentials. Rigorously protect \`.env\` files, \`.git\`, and system configuration folders. - **Source Control:** Do not stage or commit changes unless specifically requested by the user. -## Context Efficiency: -- Always minimize wasted context window by aggressively scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: always pass total_max_matches, include, and max_matches_per_file. -- Use names_only=true or max_matches_per_file=1 to find a list of files that contain a pattern. -- Limit unnecessary context consumption from file reads by always using ${GREP_TOOL_NAME} (configured with \`max_matches_per_file\`) to search large files (> 5kb). -- Conserve context when reading files by reading just enough context to definitively answer the question by passing offset and limit to ${READ_FILE_TOOL_NAME} or by searching with ${GREP_TOOL_NAME} and before=50 and after=50 and total_max_matches - - Always request a large enough range (e.g., 100-500 lines) to ensure you see sufficient details of the code. - - If you have multiple ranges that you want to read, always combine them into a single range to avoid wasting tokens on another tool call. - - Avoid "paging" by requesting a generous buffer around your target; it is more token-efficient to - read 500 lines in one turn than 100 lines across two turns. - - If you identify multiple relevant sections in a file, combine them into a single wide range covering all of them. +## Context Efficiency & Research Decisiveness: + - **Primary Discovery:** Utilize \`grep_search\` as your primary investigative tool to pinpoint specific lines and anchor points before using \`read_file\`. + - **Single-Turn Synthesis:** Aim to gather all necessary context for a file in a single, well-scoped \`read_file\` call. Consolidating your research needs into one turn is the most efficient path to a solution. + - **Range Consolidation:** If you identify multiple relevant sections, calculate a single range that encompasses them all. Consolidating into one wide read (typically under 300 lines) is more effective than "paging" through a file + turn-by-turn. + - **Strategic Buffering:** When preparing an edit, request a range that provides approximately 10-15 lines of context above and below your target. This ensures the \`replace\` tool has the stable, unique markers required to succeed in one + attempt. + - **Edit Confirmation:** Rely on the "Success" output of your editing tools. Proceed directly to behavioral validation (running tests) after a successful modification, as test results provide the most accurate verification of the file's + state. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From f0f47df01e0a27282075c62d6e98d0d8bed0f3b2 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 13 Feb 2026 09:05:28 -0800 Subject: [PATCH 12/21] Add line. --- packages/core/src/prompts/snippets.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 8c7ef80ad4..bcbff2d429 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -165,7 +165,8 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency & Research Decisiveness: - - **Primary Discovery:** Utilize \`grep_search\` as your primary investigative tool to pinpoint specific lines and anchor points before using \`read_file\`. + - **Primary Discovery:** Utilize \`grep_search\` as your primary investigative tool to pinpoint specific lines and anchor points before using \`read_file\`. Maintaining a lean, high-signal context window is best achieved by always applying precise scoping (\`include\`, \`max_matches_per_file\`, + \`names_only\`) and strictly limiting results (\`total_max_matches\`). - **Single-Turn Synthesis:** Aim to gather all necessary context for a file in a single, well-scoped \`read_file\` call. Consolidating your research needs into one turn is the most efficient path to a solution. - **Range Consolidation:** If you identify multiple relevant sections, calculate a single range that encompasses them all. Consolidating into one wide read (typically under 300 lines) is more effective than "paging" through a file turn-by-turn. From e0f0b7b56e29e63f6f971ed5ee79f97624bf99aa Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 13 Feb 2026 12:07:11 -0800 Subject: [PATCH 13/21] Push limits. --- packages/core/src/prompts/snippets.ts | 2 +- packages/core/src/tools/ripGrep.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index bcbff2d429..af94353b3f 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -166,7 +166,7 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { ## Context Efficiency & Research Decisiveness: - **Primary Discovery:** Utilize \`grep_search\` as your primary investigative tool to pinpoint specific lines and anchor points before using \`read_file\`. Maintaining a lean, high-signal context window is best achieved by always applying precise scoping (\`include\`, \`max_matches_per_file\`, - \`names_only\`) and strictly limiting results (\`total_max_matches\`). + \`names_only\`) and IN PARTICULAR remembering to strictly limit results to just enough to answer the question (\`total_max_matches\`). - **Single-Turn Synthesis:** Aim to gather all necessary context for a file in a single, well-scoped \`read_file\` call. Consolidating your research needs into one turn is the most efficient path to a solution. - **Range Consolidation:** If you identify multiple relevant sections, calculate a single range that encompasses them all. Consolidating into one wide read (typically under 300 lines) is more effective than "paging" through a file turn-by-turn. diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index c7855c2808..f372ef98b9 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -633,7 +633,7 @@ export class RipGrepTool extends BaseDeclarativeTool< minimum: 1, }, }, - required: ['pattern'], + required: ['pattern', 'total_max_matches'], type: 'object', }, messageBus, From 1e4cdfd6912f5e22dd262f986cbe56ec38ef1c6b Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 13 Feb 2026 17:07:59 -0800 Subject: [PATCH 14/21] Make max matches optional. --- packages/core/src/tools/ripGrep.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index f372ef98b9..c7855c2808 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -633,7 +633,7 @@ export class RipGrepTool extends BaseDeclarativeTool< minimum: 1, }, }, - required: ['pattern', 'total_max_matches'], + required: ['pattern'], type: 'object', }, messageBus, From 1a6c3ec9ef0b15035a7a8da6fe53ec36a235636f Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sat, 14 Feb 2026 13:29:03 -0800 Subject: [PATCH 15/21] Add edit correction. --- packages/core/src/tools/edit.test.ts | 47 ++++++++++ packages/core/src/tools/edit.ts | 131 +++++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index 56dc2cb2c4..fa4bfa282a 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -373,6 +373,53 @@ describe('EditTool', () => { expect(result.occurrences).toBe(1); }); + it('should perform a fuzzy replacement when exact match fails but similarity is high', async () => { + const content = + 'const myConfig = {\n enableFeature: true,\n retries: 3\n};'; + // Typo: missing comma after true + const oldString = + 'const myConfig = {\n enableFeature: true\n retries: 3\n};'; + const newString = + 'const myConfig = {\n enableFeature: false,\n retries: 5\n};'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'config.ts', + instruction: 'update config', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(1); + expect(result.newContent).toBe(newString); + }); + + it('should NOT perform a fuzzy replacement when similarity is below threshold', async () => { + const content = + 'const myConfig = {\n enableFeature: true,\n retries: 3\n};'; + // Completely different string + const oldString = 'function somethingElse() {\n return false;\n}'; + const newString = + 'const myConfig = {\n enableFeature: false,\n retries: 5\n};'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'config.ts', + instruction: 'update config', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(0); + expect(result.newContent).toBe(content); + }); + it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => { const content = '\n function oldFunc() {\n // some code\n }'; const result = await calculateReplacement(mockConfig, { diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index d7c8973a91..e351102db1 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -44,6 +44,12 @@ import { logEditCorrectionEvent } from '../telemetry/loggers.js'; import { correctPath } from '../utils/pathCorrector.js'; import { EDIT_TOOL_NAME, READ_FILE_TOOL_NAME } from './tool-names.js'; import { debugLogger } from '../utils/debugLogger.js'; +import levenshtein from 'fast-levenshtein'; + +const ENABLE_FUZZY_MATCH_RECOVERY = true; +const FUZZY_MATCH_THRESHOLD = 0.15; // Allow up to 15% weighted difference +const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference + interface ReplacementContext { params: EditToolParams; currentContent: string; @@ -298,6 +304,14 @@ export async function calculateReplacement( return regexResult; } + let fuzzyResult; + if ( + ENABLE_FUZZY_MATCH_RECOVERY && + (fuzzyResult = await calculateFuzzyReplacement(config, context)) + ) { + return fuzzyResult; + } + return { newContent: currentContent, occurrences: 0, @@ -1054,3 +1068,120 @@ A good instruction should concisely answer: }; } } + +function stripWhitespace(str: string): string { + return str.replace(/\s/g, ''); +} + +async function calculateFuzzyReplacement( + config: Config, + context: ReplacementContext, +): Promise { + const { currentContent, params } = context; + const { old_string, new_string } = params; + + // Pre-check: Don't fuzzy match very short strings to avoid false positives + if (old_string.length < 10) { + return null; + } + + const normalizedCode = currentContent.replace(/\r\n/g, '\n'); + const normalizedSearch = old_string.replace(/\r\n/g, '\n'); + const normalizedReplace = new_string.replace(/\r\n/g, '\n'); + + const sourceLines = normalizedCode.match(/.*(?:\n|$)/g)?.slice(0, -1) ?? []; + const searchLines = normalizedSearch + .match(/.*(?:\n|$)/g) + ?.slice(0, -1) + .map((l) => l.trimEnd()); // Trim end of search lines to be more robust + + if (!searchLines || searchLines.length === 0) { + return null; + } + + const N = searchLines.length; + let bestWindowStartIndex = -1; + let minScore = Infinity; + + const searchBlock = searchLines.join('\n'); + + // Sliding window + for (let i = 0; i <= sourceLines.length - N; i++) { + const windowLines = sourceLines.slice(i, i + N); + // Join window lines same way we treat search lines (trim end or just raw join?) + // Let's keep it simple: join the raw window lines for comparison + // But we might want to trim end of window lines too to match our searchLines processing? + // Let's stick to the plan: join the window lines. + // However, sourceLines includes the newline chars. + const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison + + // Length Heuristic Optimization + const lengthDiff = Math.abs(windowText.length - searchBlock.length); + if (lengthDiff / searchBlock.length > FUZZY_MATCH_THRESHOLD) { + continue; + } + + // Tiered Scoring + const d_raw = levenshtein.get(windowText, searchBlock); + const d_norm = levenshtein.get( + stripWhitespace(windowText), + stripWhitespace(searchBlock), + ); + + const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR; + const score = weightedDist / searchBlock.length; + + if (score < minScore) { + minScore = score; + bestWindowStartIndex = i; + } + } + + if (bestWindowStartIndex !== -1 && minScore <= FUZZY_MATCH_THRESHOLD) { + const event = new EditStrategyEvent('fuzzy'); + logEditStrategy(config, event); + + // Apply replacement + // We need to be careful to preserve indentation of the first line if possible, + // or just replace the block entirely. The "flexible" strategy tried to preserve indentation. + // Here, we just replace the found block with the new string. + // If the user provided indentation in new_string, it will be used. + // If we want to be smarter, we could detect indentation of the matched window's first line + // and apply it to new_string, but `new_string` is "exact literal text", so we probably shouldn't mess with it too much unless necessary. + // For now, simple replacement of the lines. + + const newLines = normalizedReplace.split('\n'); + // If we want to preserve the indentation of the first line of the match: + const firstLineMatch = sourceLines[bestWindowStartIndex]; + const indentationMatch = firstLineMatch.match(/^([ \t]*)/); + const indentation = indentationMatch ? indentationMatch[1] : ''; + + // If the new string doesn't seem to have indentation relative to the old string, we might want to apply it. + // But typically the user provides the new block with correct relative indentation or full indentation. + // Let's follow the "flexible" strategy's approach: apply the indentation of the start of the match to every line of the replacement. + // EXCEPT if the new string already looks fully indented. + // Let's stick to the flexible replacement logic for indentation application to be consistent. + + const indentedReplaceLines = newLines.map( + (line) => `${indentation}${line}`, + ); + + sourceLines.splice( + bestWindowStartIndex, + N, + indentedReplaceLines.join('\n'), // Use the indented version + ); + + let modifiedCode = sourceLines.join(''); + modifiedCode = restoreTrailingNewline(currentContent, modifiedCode); + + return { + newContent: modifiedCode, + occurrences: 1, + finalOldString: normalizedSearch, + finalNewString: normalizedReplace, + }; + } + + return null; +} From 74bd9342c3b88d5efa3f9950a3ab5a26aaf7dc6f Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sat, 14 Feb 2026 14:14:09 -0800 Subject: [PATCH 16/21] Drop threshold to 5%. --- packages/core/src/tools/edit.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index e351102db1..78d8dfe87e 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -47,7 +47,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import levenshtein from 'fast-levenshtein'; const ENABLE_FUZZY_MATCH_RECOVERY = true; -const FUZZY_MATCH_THRESHOLD = 0.15; // Allow up to 15% weighted difference +const FUZZY_MATCH_THRESHOLD = 0.05; // Allow up to 5% weighted difference const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference interface ReplacementContext { From c5d01784ffb023d9fd7f053276dcac9cf0ddf2f1 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sat, 14 Feb 2026 15:35:54 -0800 Subject: [PATCH 17/21] More edits improvements. --- packages/core/src/tools/edit.test.ts | 48 +++++++++++++++ packages/core/src/tools/edit.ts | 92 ++++++++++++++++------------ 2 files changed, 100 insertions(+), 40 deletions(-) diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index fa4bfa282a..3e199ae52b 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -420,6 +420,54 @@ describe('EditTool', () => { expect(result.newContent).toBe(content); }); + it('should perform multiple fuzzy replacements if multiple valid matches are found', async () => { + const content = ` +function doIt() { + console.log("hello"); +} + +function doIt() { + console.log("hello"); +} +`; + // old_string uses single quotes, file uses double. + // This is a fuzzy match (quote difference). + const oldString = ` +function doIt() { + console.log('hello'); +} +`.trim(); + + const newString = ` +function doIt() { + console.log("bye"); +} +`.trim(); + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'test.ts', + instruction: 'update', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(2); + const expectedContent = ` +function doIt() { + console.log("bye"); +} + +function doIt() { + console.log("bye"); +} +`; + expect(result.newContent).toBe(expectedContent); + }); + it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => { const content = '\n function oldFunc() {\n // some code\n }'; const result = await calculateReplacement(mockConfig, { diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 78d8dfe87e..30808c959d 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -47,7 +47,7 @@ import { debugLogger } from '../utils/debugLogger.js'; import levenshtein from 'fast-levenshtein'; const ENABLE_FUZZY_MATCH_RECOVERY = true; -const FUZZY_MATCH_THRESHOLD = 0.05; // Allow up to 5% weighted difference +const FUZZY_MATCH_THRESHOLD = 0.1; // Allow up to 10% weighted difference const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference interface ReplacementContext { @@ -1100,24 +1100,20 @@ async function calculateFuzzyReplacement( } const N = searchLines.length; - let bestWindowStartIndex = -1; - let minScore = Infinity; - + const candidates: Array<{ index: number; score: number }> = []; const searchBlock = searchLines.join('\n'); // Sliding window for (let i = 0; i <= sourceLines.length - N; i++) { const windowLines = sourceLines.slice(i, i + N); - // Join window lines same way we treat search lines (trim end or just raw join?) - // Let's keep it simple: join the raw window lines for comparison - // But we might want to trim end of window lines too to match our searchLines processing? - // Let's stick to the plan: join the window lines. - // However, sourceLines includes the newline chars. const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison // Length Heuristic Optimization const lengthDiff = Math.abs(windowText.length - searchBlock.length); - if (lengthDiff / searchBlock.length > FUZZY_MATCH_THRESHOLD) { + if ( + lengthDiff / searchBlock.length > + FUZZY_MATCH_THRESHOLD / WHITESPACE_PENALTY_FACTOR + ) { continue; } @@ -1131,53 +1127,69 @@ async function calculateFuzzyReplacement( const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR; const score = weightedDist / searchBlock.length; - if (score < minScore) { - minScore = score; - bestWindowStartIndex = i; + if (score <= FUZZY_MATCH_THRESHOLD) { + candidates.push({ index: i, score }); } } - if (bestWindowStartIndex !== -1 && minScore <= FUZZY_MATCH_THRESHOLD) { + if (candidates.length === 0) { + return null; + } + + // Select best non-overlapping matches + // Sort by score ascending. If scores equal, prefer earlier index (stable sort). + candidates.sort((a, b) => a.score - b.score || a.index - b.index); + + const selectedMatches: Array<{ index: number; score: number }> = []; + for (const candidate of candidates) { + // Check for overlap with already selected matches + // Two windows overlap if their start indices are within N lines of each other + // (Assuming window size N. Actually overlap is |i - j| < N) + const overlaps = selectedMatches.some( + (m) => Math.abs(m.index - candidate.index) < N, + ); + if (!overlaps) { + selectedMatches.push(candidate); + } + } + + // If we found matches, apply them + if (selectedMatches.length > 0) { const event = new EditStrategyEvent('fuzzy'); logEditStrategy(config, event); - // Apply replacement - // We need to be careful to preserve indentation of the first line if possible, - // or just replace the block entirely. The "flexible" strategy tried to preserve indentation. - // Here, we just replace the found block with the new string. - // If the user provided indentation in new_string, it will be used. - // If we want to be smarter, we could detect indentation of the matched window's first line - // and apply it to new_string, but `new_string` is "exact literal text", so we probably shouldn't mess with it too much unless necessary. - // For now, simple replacement of the lines. + // Sort matches by index descending to apply replacements from bottom to top + // so that indices remain valid + selectedMatches.sort((a, b) => b.index - a.index); const newLines = normalizedReplace.split('\n'); - // If we want to preserve the indentation of the first line of the match: - const firstLineMatch = sourceLines[bestWindowStartIndex]; - const indentationMatch = firstLineMatch.match(/^([ \t]*)/); - const indentation = indentationMatch ? indentationMatch[1] : ''; - // If the new string doesn't seem to have indentation relative to the old string, we might want to apply it. - // But typically the user provides the new block with correct relative indentation or full indentation. - // Let's follow the "flexible" strategy's approach: apply the indentation of the start of the match to every line of the replacement. - // EXCEPT if the new string already looks fully indented. - // Let's stick to the flexible replacement logic for indentation application to be consistent. + for (const match of selectedMatches) { + // If we want to preserve the indentation of the first line of the match: + const firstLineMatch = sourceLines[match.index]; + const indentationMatch = firstLineMatch.match(/^([ \t]*)/); + const indentation = indentationMatch ? indentationMatch[1] : ''; - const indentedReplaceLines = newLines.map( - (line) => `${indentation}${line}`, - ); + const indentedReplaceLines = newLines.map( + (line) => `${indentation}${line}`, + ); - sourceLines.splice( - bestWindowStartIndex, - N, - indentedReplaceLines.join('\n'), // Use the indented version - ); + let replacementText = indentedReplaceLines.join('\n'); + // If the last line of the match had a newline, preserve it in the replacement + // to avoid merging with the next line or losing a blank line separator. + if (sourceLines[match.index + N - 1].endsWith('\n')) { + replacementText += '\n'; + } + + sourceLines.splice(match.index, N, replacementText); + } let modifiedCode = sourceLines.join(''); modifiedCode = restoreTrailingNewline(currentContent, modifiedCode); return { newContent: modifiedCode, - occurrences: 1, + occurrences: selectedMatches.length, finalOldString: normalizedSearch, finalNewString: normalizedReplace, }; From e85e24fd208e69728b0d301e5a29d146682c6861 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sun, 15 Feb 2026 12:46:38 -0800 Subject: [PATCH 18/21] Provide feedback on fuzzy matches. --- packages/core/src/tools/edit.ts | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 30808c959d..9706d160f9 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -61,6 +61,8 @@ interface ReplacementResult { occurrences: number; finalOldString: string; finalNewString: string; + strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy'; + matchRanges?: Array<{ start: number; end: number }>; } export function applyReplacement( @@ -402,6 +404,8 @@ interface CalculatedEdit { error?: { display: string; raw: string; type: ToolErrorType }; isNewFile: boolean; originalLineEnding: '\r\n' | '\n'; + strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy'; + matchRanges?: Array<{ start: number; end: number }>; } class EditToolInvocation @@ -527,6 +531,8 @@ class EditToolInvocation isNewFile: false, error: undefined, originalLineEnding, + strategy: secondAttemptResult.strategy, + matchRanges: secondAttemptResult.matchRanges, }; } @@ -640,6 +646,8 @@ class EditToolInvocation isNewFile: false, error: undefined, originalLineEnding, + strategy: replacementResult.strategy, + matchRanges: replacementResult.matchRanges, }; } @@ -866,6 +874,10 @@ class EditToolInvocation ? `Created new file: ${this.params.file_path} with provided content.` : `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`, ]; + const fuzzyFeedback = getFuzzyMatchFeedback(editData); + if (fuzzyFeedback) { + llmSuccessMessageParts.push(fuzzyFeedback); + } if (this.params.modified_by_user) { llmSuccessMessageParts.push( `User modified the \`new_string\` content to be: ${this.params.new_string}.`, @@ -1073,6 +1085,20 @@ function stripWhitespace(str: string): string { return str.replace(/\s/g, ''); } +function getFuzzyMatchFeedback(editData: CalculatedEdit): string | null { + if ( + editData.strategy === 'fuzzy' && + editData.matchRanges && + editData.matchRanges.length > 0 + ) { + const ranges = editData.matchRanges + .map((r) => (r.start === r.end ? `${r.start}` : `${r.start}-${r.end}`)) + .join(', '); + return `Applied fuzzy match at line${editData.matchRanges.length > 1 ? 's' : ''} ${ranges}.`; + } + return null; +} + async function calculateFuzzyReplacement( config: Config, context: ReplacementContext, @@ -1158,6 +1184,12 @@ async function calculateFuzzyReplacement( const event = new EditStrategyEvent('fuzzy'); logEditStrategy(config, event); + // Calculate match ranges before sorting for replacement + // Indices in selectedMatches are 0-based line indices + const matchRanges = selectedMatches + .map((m) => ({ start: m.index + 1, end: m.index + N })) + .sort((a, b) => a.start - b.start); + // Sort matches by index descending to apply replacements from bottom to top // so that indices remain valid selectedMatches.sort((a, b) => b.index - a.index); @@ -1192,6 +1224,8 @@ async function calculateFuzzyReplacement( occurrences: selectedMatches.length, finalOldString: normalizedSearch, finalNewString: normalizedReplace, + strategy: 'fuzzy', + matchRanges, }; } From 1f0376042982f7e6e464ecc2777df065b6c08eb3 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sun, 15 Feb 2026 21:19:38 -0800 Subject: [PATCH 19/21] Another prompt. --- packages/core/src/prompts/snippets.ts | 38 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index af94353b3f..588127c0a4 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -164,16 +164,34 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Credential Protection:** Never log, print, or commit secrets, API keys, or sensitive credentials. Rigorously protect \`.env\` files, \`.git\`, and system configuration folders. - **Source Control:** Do not stage or commit changes unless specifically requested by the user. -## Context Efficiency & Research Decisiveness: - - **Primary Discovery:** Utilize \`grep_search\` as your primary investigative tool to pinpoint specific lines and anchor points before using \`read_file\`. Maintaining a lean, high-signal context window is best achieved by always applying precise scoping (\`include\`, \`max_matches_per_file\`, - \`names_only\`) and IN PARTICULAR remembering to strictly limit results to just enough to answer the question (\`total_max_matches\`). - - **Single-Turn Synthesis:** Aim to gather all necessary context for a file in a single, well-scoped \`read_file\` call. Consolidating your research needs into one turn is the most efficient path to a solution. - - **Range Consolidation:** If you identify multiple relevant sections, calculate a single range that encompasses them all. Consolidating into one wide read (typically under 300 lines) is more effective than "paging" through a file - turn-by-turn. - - **Strategic Buffering:** When preparing an edit, request a range that provides approximately 10-15 lines of context above and below your target. This ensures the \`replace\` tool has the stable, unique markers required to succeed in one - attempt. - - **Edit Confirmation:** Rely on the "Success" output of your editing tools. Proceed directly to behavioral validation (running tests) after a successful modification, as test results provide the most accurate verification of the file's - state. +## Context Efficiency: +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to ${GREP_TOOL_NAME}, to enable you to skip using an extra turn reading the file. +- Prefer using tools like ${GREP_TOOL_NAME} to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME}. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like ${GREP_TOOL_NAME} and ${GLOB_TOOL_NAME} with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like ${GREP_TOOL_NAME} with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like ${GREP_TOOL_NAME} and/or ${READ_FILE_TOOL_NAME} called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. From 0aa04bae219be517430fc79211f950fda2cd2b0f Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sun, 15 Feb 2026 22:35:21 -0800 Subject: [PATCH 20/21] Read min. --- packages/core/src/prompts/snippets.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 588127c0a4..5b7dbebb34 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -181,6 +181,7 @@ Use the following guidelines to optimize your search and read patterns. - Prefer using tools like ${GREP_TOOL_NAME} to identify points of interest instead of reading lots of files individually. - If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. - It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME}. +- ${READ_FILE_TOOL_NAME} fails if old_string is ambiguous, causing extra turns. Take care to read enough with ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME} to make the edit unambiguous. - You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. - Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. From 4ad4b2adc95be5736a50bc1bc2f784f8730d1ff3 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Mon, 16 Feb 2026 10:59:52 -0800 Subject: [PATCH 21/21] Emphasize cost of a turn. --- packages/core/src/prompts/snippets.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 5b7dbebb34..e3eef872c1 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -171,7 +171,7 @@ providing the best answer that you can. Consider the following when estimating the cost of your approach: - The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. -- Unnecessary turns are generally more expensive than other types of wasted context. +- Unnecessary turns are generally much more expensive than other types of wasted context. An extra turn late in a session can cost > 1 million tokens vs. reading a full file, which is rarely greater than 10k tokens. - You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy.