diff --git a/evals/frugalReads.eval.ts b/evals/frugalReads.eval.ts
new file mode 100644
index 0000000000..454ba84cbd
--- /dev/null
+++ b/evals/frugalReads.eval.ts
@@ -0,0 +1,270 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import { evalTest } from './test-helper.js';
+import { READ_FILE_TOOL_NAME, EDIT_TOOL_NAME } from '@google/gemini-cli-core';
+
+describe('Frugal reads eval', () => {
+ /**
+ * Ensures that the agent is frugal in its use of context by relying
+ * primarily on ranged reads when the line number is known, and combining
+ * nearby ranges into a single contiguous read to save tool calls.
+ */
+ evalTest('ALWAYS_PASSES', {
+ name: 'should use ranged read when nearby lines are targeted',
+ files: {
+ 'package.json': JSON.stringify({
+ name: 'test-project',
+ version: '1.0.0',
+ type: 'module',
+ }),
+ 'eslint.config.mjs': `export default [
+ {
+ files: ["**/*.ts"],
+ rules: {
+ "no-var": "error"
+ }
+ }
+ ];`,
+ 'linter_mess.ts': (() => {
+ const lines = [];
+ for (let i = 0; i < 1000; i++) {
+ if (i === 500 || i === 510 || i === 520) {
+ lines.push(`var oldVar${i} = "needs fix";`);
+ } else {
+ lines.push(`const goodVar${i} = "clean";`);
+ }
+ }
+ return lines.join('\n');
+ })(),
+ },
+ prompt:
+ 'Fix all linter errors in linter_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.',
+ assert: async (rig) => {
+ const logs = rig.readToolLogs();
+
+ // Check if the agent read the whole file
+ const readCalls = logs.filter(
+ (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
+ );
+
+ const targetFileReads = readCalls.filter((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.file_path.includes('linter_mess.ts');
+ });
+
+ expect(
+ targetFileReads.length,
+ 'Agent should have used read_file to check context',
+ ).toBeGreaterThan(0);
+
+ // We expect a single contiguous range covering all errors since they are near each other.
+ // Some models re-verify or read more than once, so we allow up to 4.
+ expect(
+ targetFileReads.length,
+ 'Agent should have been efficient with ranged reads for near errors',
+ ).toEqual(1);
+
+ let totalLinesRead = 0;
+ const readRanges: { offset: number; limit: number }[] = [];
+
+ for (const call of targetFileReads) {
+ const args = JSON.parse(call.toolRequest.args);
+
+ expect(
+ args.limit,
+ 'Agent read the entire file (missing limit) instead of using ranged read',
+ ).toBeDefined();
+
+ const limit = args.limit;
+ const offset = args.offset ?? 0;
+ totalLinesRead += limit;
+ readRanges.push({ offset, limit });
+
+ expect(args.limit, 'Agent read too many lines at once').toBeLessThan(
+ 1001,
+ );
+ }
+
+ // Ranged read shoud be frugal and just enough to satisfy the task at hand.
+ expect(
+ totalLinesRead,
+ 'Agent read more of the file than expected',
+ ).toBeLessThan(1000);
+
+ // Check that we read around the error lines
+ const errorLines = [500, 510, 520];
+ for (const line of errorLines) {
+ const covered = readRanges.some(
+ (range) => line >= range.offset && line < range.offset + range.limit,
+ );
+ expect(covered, `Agent should have read around line ${line}`).toBe(
+ true,
+ );
+ }
+
+ const editCalls = logs.filter(
+ (log) => log.toolRequest?.name === EDIT_TOOL_NAME,
+ );
+ const targetEditCalls = editCalls.filter((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.file_path.includes('linter_mess.ts');
+ });
+ expect(
+ targetEditCalls.length,
+ 'Agent should have made replacement calls on the target file',
+ ).toBeGreaterThanOrEqual(3);
+ },
+ });
+
+ /**
+ * Ensures the agent uses multiple ranged reads when the targets are far
+ * apart to avoid the need to read the whole file.
+ */
+ evalTest('ALWAYS_PASSES', {
+ name: 'should use ranged read when targets are far apart',
+ files: {
+ 'package.json': JSON.stringify({
+ name: 'test-project',
+ version: '1.0.0',
+ type: 'module',
+ }),
+ 'eslint.config.mjs': `export default [
+ {
+ files: ["**/*.ts"],
+ rules: {
+ "no-var": "error"
+ }
+ }
+ ];`,
+ 'far_mess.ts': (() => {
+ const lines = [];
+ for (let i = 0; i < 1000; i++) {
+ if (i === 100 || i === 900) {
+ lines.push(`var oldVar${i} = "needs fix";`);
+ } else {
+ lines.push(`const goodVar${i} = "clean";`);
+ }
+ }
+ return lines.join('\n');
+ })(),
+ },
+ prompt:
+ 'Fix all linter errors in far_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.',
+ assert: async (rig) => {
+ const logs = rig.readToolLogs();
+
+ const readCalls = logs.filter(
+ (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
+ );
+
+ const targetFileReads = readCalls.filter((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.file_path.includes('far_mess.ts');
+ });
+
+ // The agent should use ranged reads to be frugal with context tokens,
+ // even if it requires multiple calls for far-apart errors.
+ expect(
+ targetFileReads.length,
+ 'Agent should have used read_file to check context',
+ ).toBeGreaterThan(0);
+
+ // We allow multiple calls since the errors are far apart.
+ expect(
+ targetFileReads.length,
+ 'Agent should have used separate reads for far apart errors',
+ ).toBeLessThanOrEqual(4);
+
+ for (const call of targetFileReads) {
+ const args = JSON.parse(call.toolRequest.args);
+ expect(
+ args.limit,
+ 'Agent should have used ranged read (limit) to save tokens',
+ ).toBeDefined();
+ }
+ },
+ });
+
+ /**
+ * Validates that the agent reads the entire file if there are lots of matches
+ * (e.g.: 10), as it's more efficient than many small ranged reads.
+ */
+ evalTest('ALWAYS_PASSES', {
+ name: 'should read the entire file when there are many matches',
+ files: {
+ 'package.json': JSON.stringify({
+ name: 'test-project',
+ version: '1.0.0',
+ type: 'module',
+ }),
+ 'eslint.config.mjs': `export default [
+ {
+ files: ["**/*.ts"],
+ rules: {
+ "no-var": "error"
+ }
+ }
+ ];`,
+ 'many_mess.ts': (() => {
+ const lines = [];
+ for (let i = 0; i < 1000; i++) {
+ if (i % 100 === 0) {
+ lines.push(`var oldVar${i} = "needs fix";`);
+ } else {
+ lines.push(`const goodVar${i} = "clean";`);
+ }
+ }
+ return lines.join('\n');
+ })(),
+ },
+ prompt:
+ 'Fix all linter errors in many_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.',
+ assert: async (rig) => {
+ const logs = rig.readToolLogs();
+
+ const readCalls = logs.filter(
+ (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME,
+ );
+
+ const targetFileReads = readCalls.filter((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.file_path.includes('many_mess.ts');
+ });
+
+ expect(
+ targetFileReads.length,
+ 'Agent should have used read_file to check context',
+ ).toBeGreaterThan(0);
+
+ // In this case, we expect the agent to realize there are many scattered errors
+ // and just read the whole file to be efficient with tool calls.
+ const readEntireFile = targetFileReads.some((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.limit === undefined;
+ });
+
+ expect(
+ readEntireFile,
+ 'Agent should have read the entire file because of the high number of scattered matches',
+ ).toBe(true);
+
+ // Check that the agent actually fixed the errors
+ const editCalls = logs.filter(
+ (log) => log.toolRequest?.name === EDIT_TOOL_NAME,
+ );
+ const targetEditCalls = editCalls.filter((call) => {
+ const args = JSON.parse(call.toolRequest.args);
+ return args.file_path.includes('many_mess.ts');
+ });
+ expect(
+ targetEditCalls.length,
+ 'Agent should have made replacement calls on the target file',
+ ).toBeGreaterThanOrEqual(1);
+ },
+ });
+});
diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts
index 11c51e8529..fefe042ca2 100644
--- a/evals/frugalSearch.eval.ts
+++ b/evals/frugalSearch.eval.ts
@@ -25,7 +25,7 @@ describe('Frugal Search', () => {
return args;
};
- evalTest('USUALLY_PASSES', {
+ evalTest('ALWAYS_PASSES', {
name: 'should use targeted search with limit',
prompt: 'find me a sample usage of path.resolve() in the codebase',
files: {
@@ -128,17 +128,79 @@ describe('Frugal Search', () => {
grepParams.map((p) => p.total_max_matches),
)}`,
).toBe(true);
+ },
+ });
+
+ /**
+ * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
+ * The task is specifically phrased to not evoke "view" or "search" specifically because
+ * the model implicitly understands that such tasks are searches. This covers the case of
+ * an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or
+ * ranged reads.
+ */
+ evalTest('ALWAYS_PASSES', {
+ name: 'should use grep or ranged read for large files',
+ prompt: 'What year was legacy_processor.ts written?',
+ files: {
+ 'src/utils.ts': 'export const add = (a, b) => a + b;',
+ 'src/types.ts': 'export type ID = string;',
+ 'src/legacy_processor.ts': [
+ '// Copyright 2005 Legacy Systems Inc.',
+ ...Array.from(
+ { length: 5000 },
+ (_, i) =>
+ `// Legacy code block ${i} - strictly preserved for backward compatibility`,
+ ),
+ ].join('\\n'),
+ 'README.md': '# Project documentation',
+ },
+ assert: async (rig) => {
+ const toolCalls = rig.readToolLogs();
+ const getParams = (call: any) => {
+ let args = call.toolRequest.args;
+ if (typeof args === 'string') {
+ try {
+ args = JSON.parse(args);
+ } catch (e) {
+ // Ignore parse errors
+ }
+ }
+ return args;
+ };
+
+ // Check for wasteful full file reads
+ const fullReads = toolCalls.filter((call) => {
+ if (call.toolRequest.name !== 'read_file') return false;
+ const args = getParams(call);
+ return (
+ args.file_path === 'src/legacy_processor.ts' &&
+ (args.limit === undefined || args.limit === null)
+ );
+ });
- const hasMaxMatchesPerFileLimit = grepParams.some(
- (p) =>
- p.max_matches_per_file !== undefined && p.max_matches_per_file <= 5,
- );
expect(
- hasMaxMatchesPerFileLimit,
- `Expected agent to use a small max_matches_per_file (<= 5) for a sample usage request. Actual values: ${JSON.stringify(
- grepParams.map((p) => p.max_matches_per_file),
- )}`,
- ).toBe(true);
+ fullReads.length,
+ 'Agent should not attempt to read the entire large file at once',
+ ).toBe(0);
+
+ // Check that it actually tried to find it using appropriate tools
+ const validAttempts = toolCalls.filter((call) => {
+ const args = getParams(call);
+ if (call.toolRequest.name === 'grep_search') {
+ return true;
+ }
+
+ if (
+ call.toolRequest.name === 'read_file' &&
+ args.file_path === 'src/legacy_processor.ts' &&
+ args.limit !== undefined
+ ) {
+ return true;
+ }
+ return false;
+ });
+
+ expect(validAttempts.length).toBeGreaterThan(0);
},
});
});
diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap
index ed79a3a497..d9af62a5b1 100644
--- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap
+++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap
@@ -520,8 +520,10 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -650,8 +652,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -746,8 +750,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1311,8 +1317,10 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills with
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1437,8 +1445,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1554,8 +1564,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1671,8 +1683,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1784,8 +1798,10 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -1896,8 +1912,10 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -2248,8 +2266,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -2361,8 +2381,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -2585,8 +2607,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
@@ -2698,8 +2722,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
+- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
+- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
## Engineering Standards
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts
index 57b6b2dac2..28cd43af3a 100644
--- a/packages/core/src/prompts/snippets.ts
+++ b/packages/core/src/prompts/snippets.ts
@@ -165,8 +165,34 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
## Context Efficiency:
-- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
-- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
+Be strategic in your use of the available tools to minimize unnecessary context usage while still
+providing the best answer that you can.
+
+Consider the following when estimating the cost of your approach:
+
+- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is.
+- Unnecessary turns are generally much more expensive than other types of wasted context. An extra turn late in a session can cost > 1 million tokens vs. reading a full file, which is rarely greater than 10k tokens.
+- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy.
+
+
+Use the following guidelines to optimize your search and read patterns.
+
+- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to ${GREP_TOOL_NAME}, to enable you to skip using an extra turn reading the file.
+- Prefer using tools like ${GREP_TOOL_NAME} to identify points of interest instead of reading lots of files individually.
+- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible.
+- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME}.
+- ${READ_FILE_TOOL_NAME} fails if old_string is ambiguous, causing extra turns. Take care to read enough with ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME} to make the edit unambiguous.
+- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel.
+- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern.
+
+
+
+- **Searching:** utilize search tools like ${GREP_TOOL_NAME} and ${GLOB_TOOL_NAME} with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters).
+- **Searching and editing:** utilize search tools like ${GREP_TOOL_NAME} with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches.
+- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety.
+- **Large files:** utilize search tools like ${GREP_TOOL_NAME} and/or ${READ_FILE_TOOL_NAME} called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large.
+- **Navigating:** read the minimum required to not require additional turns spent reading the file.
+
## Parallelism
You MUST ALWAYS utilize the generalist subagent to conserve context when doing repetitive tasks and parallelize independent bodies of work, even if you think you don't need to. This is very IMPORTANT to ensure you stay on track on repetitive tasks and/or complete tasks in a timely fashion.
diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts
index 56dc2cb2c4..3e199ae52b 100644
--- a/packages/core/src/tools/edit.test.ts
+++ b/packages/core/src/tools/edit.test.ts
@@ -373,6 +373,101 @@ describe('EditTool', () => {
expect(result.occurrences).toBe(1);
});
+ it('should perform a fuzzy replacement when exact match fails but similarity is high', async () => {
+ const content =
+ 'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
+ // Typo: missing comma after true
+ const oldString =
+ 'const myConfig = {\n enableFeature: true\n retries: 3\n};';
+ const newString =
+ 'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
+
+ const result = await calculateReplacement(mockConfig, {
+ params: {
+ file_path: 'config.ts',
+ instruction: 'update config',
+ old_string: oldString,
+ new_string: newString,
+ },
+ currentContent: content,
+ abortSignal,
+ });
+
+ expect(result.occurrences).toBe(1);
+ expect(result.newContent).toBe(newString);
+ });
+
+ it('should NOT perform a fuzzy replacement when similarity is below threshold', async () => {
+ const content =
+ 'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
+ // Completely different string
+ const oldString = 'function somethingElse() {\n return false;\n}';
+ const newString =
+ 'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
+
+ const result = await calculateReplacement(mockConfig, {
+ params: {
+ file_path: 'config.ts',
+ instruction: 'update config',
+ old_string: oldString,
+ new_string: newString,
+ },
+ currentContent: content,
+ abortSignal,
+ });
+
+ expect(result.occurrences).toBe(0);
+ expect(result.newContent).toBe(content);
+ });
+
+ it('should perform multiple fuzzy replacements if multiple valid matches are found', async () => {
+ const content = `
+function doIt() {
+ console.log("hello");
+}
+
+function doIt() {
+ console.log("hello");
+}
+`;
+ // old_string uses single quotes, file uses double.
+ // This is a fuzzy match (quote difference).
+ const oldString = `
+function doIt() {
+ console.log('hello');
+}
+`.trim();
+
+ const newString = `
+function doIt() {
+ console.log("bye");
+}
+`.trim();
+
+ const result = await calculateReplacement(mockConfig, {
+ params: {
+ file_path: 'test.ts',
+ instruction: 'update',
+ old_string: oldString,
+ new_string: newString,
+ },
+ currentContent: content,
+ abortSignal,
+ });
+
+ expect(result.occurrences).toBe(2);
+ const expectedContent = `
+function doIt() {
+ console.log("bye");
+}
+
+function doIt() {
+ console.log("bye");
+}
+`;
+ expect(result.newContent).toBe(expectedContent);
+ });
+
it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => {
const content = '\n function oldFunc() {\n // some code\n }';
const result = await calculateReplacement(mockConfig, {
diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts
index 2e79ebcb6b..29624fe9f0 100644
--- a/packages/core/src/tools/edit.ts
+++ b/packages/core/src/tools/edit.ts
@@ -47,6 +47,11 @@ import { EDIT_TOOL_NAME, READ_FILE_TOOL_NAME } from './tool-names.js';
import { debugLogger } from '../utils/debugLogger.js';
import { EDIT_DEFINITION } from './definitions/coreTools.js';
import { resolveToolDeclaration } from './definitions/resolver.js';
+import levenshtein from 'fast-levenshtein';
+
+const ENABLE_FUZZY_MATCH_RECOVERY = true;
+const FUZZY_MATCH_THRESHOLD = 0.1; // Allow up to 10% weighted difference
+const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference
interface ReplacementContext {
params: EditToolParams;
currentContent: string;
@@ -58,6 +63,8 @@ interface ReplacementResult {
occurrences: number;
finalOldString: string;
finalNewString: string;
+ strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
+ matchRanges?: Array<{ start: number; end: number }>;
}
export function applyReplacement(
@@ -301,6 +308,14 @@ export async function calculateReplacement(
return regexResult;
}
+ let fuzzyResult;
+ if (
+ ENABLE_FUZZY_MATCH_RECOVERY &&
+ (fuzzyResult = await calculateFuzzyReplacement(config, context))
+ ) {
+ return fuzzyResult;
+ }
+
return {
newContent: currentContent,
occurrences: 0,
@@ -391,6 +406,8 @@ interface CalculatedEdit {
error?: { display: string; raw: string; type: ToolErrorType };
isNewFile: boolean;
originalLineEnding: '\r\n' | '\n';
+ strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
+ matchRanges?: Array<{ start: number; end: number }>;
}
class EditToolInvocation
@@ -516,6 +533,8 @@ class EditToolInvocation
isNewFile: false,
error: undefined,
originalLineEnding,
+ strategy: secondAttemptResult.strategy,
+ matchRanges: secondAttemptResult.matchRanges,
};
}
@@ -629,6 +648,8 @@ class EditToolInvocation
isNewFile: false,
error: undefined,
originalLineEnding,
+ strategy: replacementResult.strategy,
+ matchRanges: replacementResult.matchRanges,
};
}
@@ -855,6 +876,10 @@ class EditToolInvocation
? `Created new file: ${this.params.file_path} with provided content.`
: `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`,
];
+ const fuzzyFeedback = getFuzzyMatchFeedback(editData);
+ if (fuzzyFeedback) {
+ llmSuccessMessageParts.push(fuzzyFeedback);
+ }
if (this.params.modified_by_user) {
llmSuccessMessageParts.push(
`User modified the \`new_string\` content to be: ${this.params.new_string}.`,
@@ -1007,3 +1032,154 @@ export class EditTool
};
}
}
+
+function stripWhitespace(str: string): string {
+ return str.replace(/\s/g, '');
+}
+
+function getFuzzyMatchFeedback(editData: CalculatedEdit): string | null {
+ if (
+ editData.strategy === 'fuzzy' &&
+ editData.matchRanges &&
+ editData.matchRanges.length > 0
+ ) {
+ const ranges = editData.matchRanges
+ .map((r) => (r.start === r.end ? `${r.start}` : `${r.start}-${r.end}`))
+ .join(', ');
+ return `Applied fuzzy match at line${editData.matchRanges.length > 1 ? 's' : ''} ${ranges}.`;
+ }
+ return null;
+}
+
+async function calculateFuzzyReplacement(
+ config: Config,
+ context: ReplacementContext,
+): Promise {
+ const { currentContent, params } = context;
+ const { old_string, new_string } = params;
+
+ // Pre-check: Don't fuzzy match very short strings to avoid false positives
+ if (old_string.length < 10) {
+ return null;
+ }
+
+ const normalizedCode = currentContent.replace(/\r\n/g, '\n');
+ const normalizedSearch = old_string.replace(/\r\n/g, '\n');
+ const normalizedReplace = new_string.replace(/\r\n/g, '\n');
+
+ const sourceLines = normalizedCode.match(/.*(?:\n|$)/g)?.slice(0, -1) ?? [];
+ const searchLines = normalizedSearch
+ .match(/.*(?:\n|$)/g)
+ ?.slice(0, -1)
+ .map((l) => l.trimEnd()); // Trim end of search lines to be more robust
+
+ if (!searchLines || searchLines.length === 0) {
+ return null;
+ }
+
+ const N = searchLines.length;
+ const candidates: Array<{ index: number; score: number }> = [];
+ const searchBlock = searchLines.join('\n');
+
+ // Sliding window
+ for (let i = 0; i <= sourceLines.length - N; i++) {
+ const windowLines = sourceLines.slice(i, i + N);
+ const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison
+
+ // Length Heuristic Optimization
+ const lengthDiff = Math.abs(windowText.length - searchBlock.length);
+ if (
+ lengthDiff / searchBlock.length >
+ FUZZY_MATCH_THRESHOLD / WHITESPACE_PENALTY_FACTOR
+ ) {
+ continue;
+ }
+
+ // Tiered Scoring
+ const d_raw = levenshtein.get(windowText, searchBlock);
+ const d_norm = levenshtein.get(
+ stripWhitespace(windowText),
+ stripWhitespace(searchBlock),
+ );
+
+ const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR;
+ const score = weightedDist / searchBlock.length;
+
+ if (score <= FUZZY_MATCH_THRESHOLD) {
+ candidates.push({ index: i, score });
+ }
+ }
+
+ if (candidates.length === 0) {
+ return null;
+ }
+
+ // Select best non-overlapping matches
+ // Sort by score ascending. If scores equal, prefer earlier index (stable sort).
+ candidates.sort((a, b) => a.score - b.score || a.index - b.index);
+
+ const selectedMatches: Array<{ index: number; score: number }> = [];
+ for (const candidate of candidates) {
+ // Check for overlap with already selected matches
+ // Two windows overlap if their start indices are within N lines of each other
+ // (Assuming window size N. Actually overlap is |i - j| < N)
+ const overlaps = selectedMatches.some(
+ (m) => Math.abs(m.index - candidate.index) < N,
+ );
+ if (!overlaps) {
+ selectedMatches.push(candidate);
+ }
+ }
+
+ // If we found matches, apply them
+ if (selectedMatches.length > 0) {
+ const event = new EditStrategyEvent('fuzzy');
+ logEditStrategy(config, event);
+
+ // Calculate match ranges before sorting for replacement
+ // Indices in selectedMatches are 0-based line indices
+ const matchRanges = selectedMatches
+ .map((m) => ({ start: m.index + 1, end: m.index + N }))
+ .sort((a, b) => a.start - b.start);
+
+ // Sort matches by index descending to apply replacements from bottom to top
+ // so that indices remain valid
+ selectedMatches.sort((a, b) => b.index - a.index);
+
+ const newLines = normalizedReplace.split('\n');
+
+ for (const match of selectedMatches) {
+ // If we want to preserve the indentation of the first line of the match:
+ const firstLineMatch = sourceLines[match.index];
+ const indentationMatch = firstLineMatch.match(/^([ \t]*)/);
+ const indentation = indentationMatch ? indentationMatch[1] : '';
+
+ const indentedReplaceLines = newLines.map(
+ (line) => `${indentation}${line}`,
+ );
+
+ let replacementText = indentedReplaceLines.join('\n');
+ // If the last line of the match had a newline, preserve it in the replacement
+ // to avoid merging with the next line or losing a blank line separator.
+ if (sourceLines[match.index + N - 1].endsWith('\n')) {
+ replacementText += '\n';
+ }
+
+ sourceLines.splice(match.index, N, replacementText);
+ }
+
+ let modifiedCode = sourceLines.join('');
+ modifiedCode = restoreTrailingNewline(currentContent, modifiedCode);
+
+ return {
+ newContent: modifiedCode,
+ occurrences: selectedMatches.length,
+ finalOldString: normalizedSearch,
+ finalNewString: normalizedReplace,
+ strategy: 'fuzzy',
+ matchRanges,
+ };
+ }
+
+ return null;
+}
diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts
index 4bc57b8d32..63d7693123 100644
--- a/packages/core/src/tools/ls.test.ts
+++ b/packages/core/src/tools/ls.test.ts
@@ -235,8 +235,8 @@ describe('LSTool', () => {
expect(entries[0]).toBe('[DIR] x-dir');
expect(entries[1]).toBe('[DIR] y-dir');
- expect(entries[2]).toBe('a-file.txt');
- expect(entries[3]).toBe('b-file.txt');
+ expect(entries[2]).toBe('a-file.txt (8 bytes)');
+ expect(entries[3]).toBe('b-file.txt (8 bytes)');
});
it('should handle permission errors gracefully', async () => {
diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts
index 9ca2918b2c..b98dfb9e38 100644
--- a/packages/core/src/tools/ls.ts
+++ b/packages/core/src/tools/ls.ts
@@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation {
// Create formatted content for LLM
const directoryContent = entries
- .map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`)
+ .map((entry) => {
+ if (entry.isDirectory) {
+ return `[DIR] ${entry.name}`;
+ }
+ return `${entry.name} (${entry.size} bytes)`;
+ })
.join('\n');
let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`;