mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-14 08:01:02 -07:00
Merge branch 'gundermanc/ranged-reads3' into gundermanc/sub-agents-context
# Conflicts: # packages/core/src/tools/edit.ts
This commit is contained in:
@@ -520,8 +520,10 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -650,8 +652,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -746,8 +750,10 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1311,8 +1317,10 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills with
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1437,8 +1445,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1554,8 +1564,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1671,8 +1683,10 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1784,8 +1798,10 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -1896,8 +1912,10 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -2248,8 +2266,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -2361,8 +2381,10 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -2585,8 +2607,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
@@ -2698,8 +2722,10 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results by explicitly setting \`total_max_matches\` or \`max_matches_per_file\`, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
- Limit unnecessary context consumption from file reads by using grep_search (configured with \`max_matches_per_file\`) to search large files (> 1kb) or read_file with the desired offset and limit.
|
||||
- If the file is small, prefer reading the whole thing over "scrolling" through it by reading ranges repeatedly.
|
||||
|
||||
## Engineering Standards
|
||||
- **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.
|
||||
|
||||
@@ -165,8 +165,34 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string {
|
||||
- **Source Control:** Do not stage or commit changes unless specifically requested by the user.
|
||||
|
||||
## Context Efficiency:
|
||||
- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase.
|
||||
- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context.
|
||||
Be strategic in your use of the available tools to minimize unnecessary context usage while still
|
||||
providing the best answer that you can.
|
||||
|
||||
Consider the following when estimating the cost of your approach:
|
||||
<estimating_context_usage>
|
||||
- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is.
|
||||
- Unnecessary turns are generally much more expensive than other types of wasted context. An extra turn late in a session can cost > 1 million tokens vs. reading a full file, which is rarely greater than 10k tokens.
|
||||
- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy.
|
||||
</estimating_context_usage>
|
||||
|
||||
Use the following guidelines to optimize your search and read patterns.
|
||||
<guidelines>
|
||||
- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to ${GREP_TOOL_NAME}, to enable you to skip using an extra turn reading the file.
|
||||
- Prefer using tools like ${GREP_TOOL_NAME} to identify points of interest instead of reading lots of files individually.
|
||||
- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible.
|
||||
- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME}.
|
||||
- ${READ_FILE_TOOL_NAME} fails if old_string is ambiguous, causing extra turns. Take care to read enough with ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME} to make the edit unambiguous.
|
||||
- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel.
|
||||
- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern.
|
||||
</guidelines>
|
||||
|
||||
<examples>
|
||||
- **Searching:** utilize search tools like ${GREP_TOOL_NAME} and ${GLOB_TOOL_NAME} with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters).
|
||||
- **Searching and editing:** utilize search tools like ${GREP_TOOL_NAME} with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches.
|
||||
- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety.
|
||||
- **Large files:** utilize search tools like ${GREP_TOOL_NAME} and/or ${READ_FILE_TOOL_NAME} called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large.
|
||||
- **Navigating:** read the minimum required to not require additional turns spent reading the file.
|
||||
</examples>
|
||||
|
||||
## Parallelism
|
||||
You MUST ALWAYS utilize the generalist subagent to conserve context when doing repetitive tasks and parallelize independent bodies of work, even if you think you don't need to. This is very IMPORTANT to ensure you stay on track on repetitive tasks and/or complete tasks in a timely fashion.
|
||||
|
||||
@@ -373,6 +373,101 @@ describe('EditTool', () => {
|
||||
expect(result.occurrences).toBe(1);
|
||||
});
|
||||
|
||||
it('should perform a fuzzy replacement when exact match fails but similarity is high', async () => {
|
||||
const content =
|
||||
'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
|
||||
// Typo: missing comma after true
|
||||
const oldString =
|
||||
'const myConfig = {\n enableFeature: true\n retries: 3\n};';
|
||||
const newString =
|
||||
'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'config.ts',
|
||||
instruction: 'update config',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(1);
|
||||
expect(result.newContent).toBe(newString);
|
||||
});
|
||||
|
||||
it('should NOT perform a fuzzy replacement when similarity is below threshold', async () => {
|
||||
const content =
|
||||
'const myConfig = {\n enableFeature: true,\n retries: 3\n};';
|
||||
// Completely different string
|
||||
const oldString = 'function somethingElse() {\n return false;\n}';
|
||||
const newString =
|
||||
'const myConfig = {\n enableFeature: false,\n retries: 5\n};';
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'config.ts',
|
||||
instruction: 'update config',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(0);
|
||||
expect(result.newContent).toBe(content);
|
||||
});
|
||||
|
||||
it('should perform multiple fuzzy replacements if multiple valid matches are found', async () => {
|
||||
const content = `
|
||||
function doIt() {
|
||||
console.log("hello");
|
||||
}
|
||||
|
||||
function doIt() {
|
||||
console.log("hello");
|
||||
}
|
||||
`;
|
||||
// old_string uses single quotes, file uses double.
|
||||
// This is a fuzzy match (quote difference).
|
||||
const oldString = `
|
||||
function doIt() {
|
||||
console.log('hello');
|
||||
}
|
||||
`.trim();
|
||||
|
||||
const newString = `
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
`.trim();
|
||||
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
params: {
|
||||
file_path: 'test.ts',
|
||||
instruction: 'update',
|
||||
old_string: oldString,
|
||||
new_string: newString,
|
||||
},
|
||||
currentContent: content,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
expect(result.occurrences).toBe(2);
|
||||
const expectedContent = `
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
|
||||
function doIt() {
|
||||
console.log("bye");
|
||||
}
|
||||
`;
|
||||
expect(result.newContent).toBe(expectedContent);
|
||||
});
|
||||
|
||||
it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => {
|
||||
const content = '\n function oldFunc() {\n // some code\n }';
|
||||
const result = await calculateReplacement(mockConfig, {
|
||||
|
||||
@@ -47,6 +47,11 @@ import { EDIT_TOOL_NAME, READ_FILE_TOOL_NAME } from './tool-names.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { EDIT_DEFINITION } from './definitions/coreTools.js';
|
||||
import { resolveToolDeclaration } from './definitions/resolver.js';
|
||||
import levenshtein from 'fast-levenshtein';
|
||||
|
||||
const ENABLE_FUZZY_MATCH_RECOVERY = true;
|
||||
const FUZZY_MATCH_THRESHOLD = 0.1; // Allow up to 10% weighted difference
|
||||
const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference
|
||||
interface ReplacementContext {
|
||||
params: EditToolParams;
|
||||
currentContent: string;
|
||||
@@ -58,6 +63,8 @@ interface ReplacementResult {
|
||||
occurrences: number;
|
||||
finalOldString: string;
|
||||
finalNewString: string;
|
||||
strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
|
||||
matchRanges?: Array<{ start: number; end: number }>;
|
||||
}
|
||||
|
||||
export function applyReplacement(
|
||||
@@ -301,6 +308,14 @@ export async function calculateReplacement(
|
||||
return regexResult;
|
||||
}
|
||||
|
||||
let fuzzyResult;
|
||||
if (
|
||||
ENABLE_FUZZY_MATCH_RECOVERY &&
|
||||
(fuzzyResult = await calculateFuzzyReplacement(config, context))
|
||||
) {
|
||||
return fuzzyResult;
|
||||
}
|
||||
|
||||
return {
|
||||
newContent: currentContent,
|
||||
occurrences: 0,
|
||||
@@ -391,6 +406,8 @@ interface CalculatedEdit {
|
||||
error?: { display: string; raw: string; type: ToolErrorType };
|
||||
isNewFile: boolean;
|
||||
originalLineEnding: '\r\n' | '\n';
|
||||
strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy';
|
||||
matchRanges?: Array<{ start: number; end: number }>;
|
||||
}
|
||||
|
||||
class EditToolInvocation
|
||||
@@ -516,6 +533,8 @@ class EditToolInvocation
|
||||
isNewFile: false,
|
||||
error: undefined,
|
||||
originalLineEnding,
|
||||
strategy: secondAttemptResult.strategy,
|
||||
matchRanges: secondAttemptResult.matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -629,6 +648,8 @@ class EditToolInvocation
|
||||
isNewFile: false,
|
||||
error: undefined,
|
||||
originalLineEnding,
|
||||
strategy: replacementResult.strategy,
|
||||
matchRanges: replacementResult.matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -855,6 +876,10 @@ class EditToolInvocation
|
||||
? `Created new file: ${this.params.file_path} with provided content.`
|
||||
: `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`,
|
||||
];
|
||||
const fuzzyFeedback = getFuzzyMatchFeedback(editData);
|
||||
if (fuzzyFeedback) {
|
||||
llmSuccessMessageParts.push(fuzzyFeedback);
|
||||
}
|
||||
if (this.params.modified_by_user) {
|
||||
llmSuccessMessageParts.push(
|
||||
`User modified the \`new_string\` content to be: ${this.params.new_string}.`,
|
||||
@@ -1007,3 +1032,154 @@ export class EditTool
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function stripWhitespace(str: string): string {
|
||||
return str.replace(/\s/g, '');
|
||||
}
|
||||
|
||||
function getFuzzyMatchFeedback(editData: CalculatedEdit): string | null {
|
||||
if (
|
||||
editData.strategy === 'fuzzy' &&
|
||||
editData.matchRanges &&
|
||||
editData.matchRanges.length > 0
|
||||
) {
|
||||
const ranges = editData.matchRanges
|
||||
.map((r) => (r.start === r.end ? `${r.start}` : `${r.start}-${r.end}`))
|
||||
.join(', ');
|
||||
return `Applied fuzzy match at line${editData.matchRanges.length > 1 ? 's' : ''} ${ranges}.`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function calculateFuzzyReplacement(
|
||||
config: Config,
|
||||
context: ReplacementContext,
|
||||
): Promise<ReplacementResult | null> {
|
||||
const { currentContent, params } = context;
|
||||
const { old_string, new_string } = params;
|
||||
|
||||
// Pre-check: Don't fuzzy match very short strings to avoid false positives
|
||||
if (old_string.length < 10) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedCode = currentContent.replace(/\r\n/g, '\n');
|
||||
const normalizedSearch = old_string.replace(/\r\n/g, '\n');
|
||||
const normalizedReplace = new_string.replace(/\r\n/g, '\n');
|
||||
|
||||
const sourceLines = normalizedCode.match(/.*(?:\n|$)/g)?.slice(0, -1) ?? [];
|
||||
const searchLines = normalizedSearch
|
||||
.match(/.*(?:\n|$)/g)
|
||||
?.slice(0, -1)
|
||||
.map((l) => l.trimEnd()); // Trim end of search lines to be more robust
|
||||
|
||||
if (!searchLines || searchLines.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const N = searchLines.length;
|
||||
const candidates: Array<{ index: number; score: number }> = [];
|
||||
const searchBlock = searchLines.join('\n');
|
||||
|
||||
// Sliding window
|
||||
for (let i = 0; i <= sourceLines.length - N; i++) {
|
||||
const windowLines = sourceLines.slice(i, i + N);
|
||||
const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison
|
||||
|
||||
// Length Heuristic Optimization
|
||||
const lengthDiff = Math.abs(windowText.length - searchBlock.length);
|
||||
if (
|
||||
lengthDiff / searchBlock.length >
|
||||
FUZZY_MATCH_THRESHOLD / WHITESPACE_PENALTY_FACTOR
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Tiered Scoring
|
||||
const d_raw = levenshtein.get(windowText, searchBlock);
|
||||
const d_norm = levenshtein.get(
|
||||
stripWhitespace(windowText),
|
||||
stripWhitespace(searchBlock),
|
||||
);
|
||||
|
||||
const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR;
|
||||
const score = weightedDist / searchBlock.length;
|
||||
|
||||
if (score <= FUZZY_MATCH_THRESHOLD) {
|
||||
candidates.push({ index: i, score });
|
||||
}
|
||||
}
|
||||
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Select best non-overlapping matches
|
||||
// Sort by score ascending. If scores equal, prefer earlier index (stable sort).
|
||||
candidates.sort((a, b) => a.score - b.score || a.index - b.index);
|
||||
|
||||
const selectedMatches: Array<{ index: number; score: number }> = [];
|
||||
for (const candidate of candidates) {
|
||||
// Check for overlap with already selected matches
|
||||
// Two windows overlap if their start indices are within N lines of each other
|
||||
// (Assuming window size N. Actually overlap is |i - j| < N)
|
||||
const overlaps = selectedMatches.some(
|
||||
(m) => Math.abs(m.index - candidate.index) < N,
|
||||
);
|
||||
if (!overlaps) {
|
||||
selectedMatches.push(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
// If we found matches, apply them
|
||||
if (selectedMatches.length > 0) {
|
||||
const event = new EditStrategyEvent('fuzzy');
|
||||
logEditStrategy(config, event);
|
||||
|
||||
// Calculate match ranges before sorting for replacement
|
||||
// Indices in selectedMatches are 0-based line indices
|
||||
const matchRanges = selectedMatches
|
||||
.map((m) => ({ start: m.index + 1, end: m.index + N }))
|
||||
.sort((a, b) => a.start - b.start);
|
||||
|
||||
// Sort matches by index descending to apply replacements from bottom to top
|
||||
// so that indices remain valid
|
||||
selectedMatches.sort((a, b) => b.index - a.index);
|
||||
|
||||
const newLines = normalizedReplace.split('\n');
|
||||
|
||||
for (const match of selectedMatches) {
|
||||
// If we want to preserve the indentation of the first line of the match:
|
||||
const firstLineMatch = sourceLines[match.index];
|
||||
const indentationMatch = firstLineMatch.match(/^([ \t]*)/);
|
||||
const indentation = indentationMatch ? indentationMatch[1] : '';
|
||||
|
||||
const indentedReplaceLines = newLines.map(
|
||||
(line) => `${indentation}${line}`,
|
||||
);
|
||||
|
||||
let replacementText = indentedReplaceLines.join('\n');
|
||||
// If the last line of the match had a newline, preserve it in the replacement
|
||||
// to avoid merging with the next line or losing a blank line separator.
|
||||
if (sourceLines[match.index + N - 1].endsWith('\n')) {
|
||||
replacementText += '\n';
|
||||
}
|
||||
|
||||
sourceLines.splice(match.index, N, replacementText);
|
||||
}
|
||||
|
||||
let modifiedCode = sourceLines.join('');
|
||||
modifiedCode = restoreTrailingNewline(currentContent, modifiedCode);
|
||||
|
||||
return {
|
||||
newContent: modifiedCode,
|
||||
occurrences: selectedMatches.length,
|
||||
finalOldString: normalizedSearch,
|
||||
finalNewString: normalizedReplace,
|
||||
strategy: 'fuzzy',
|
||||
matchRanges,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -235,8 +235,8 @@ describe('LSTool', () => {
|
||||
|
||||
expect(entries[0]).toBe('[DIR] x-dir');
|
||||
expect(entries[1]).toBe('[DIR] y-dir');
|
||||
expect(entries[2]).toBe('a-file.txt');
|
||||
expect(entries[3]).toBe('b-file.txt');
|
||||
expect(entries[2]).toBe('a-file.txt (8 bytes)');
|
||||
expect(entries[3]).toBe('b-file.txt (8 bytes)');
|
||||
});
|
||||
|
||||
it('should handle permission errors gracefully', async () => {
|
||||
|
||||
@@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation<LSToolParams, ToolResult> {
|
||||
|
||||
// Create formatted content for LLM
|
||||
const directoryContent = entries
|
||||
.map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`)
|
||||
.map((entry) => {
|
||||
if (entry.isDirectory) {
|
||||
return `[DIR] ${entry.name}`;
|
||||
}
|
||||
return `${entry.name} (${entry.size} bytes)`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`;
|
||||
|
||||
Reference in New Issue
Block a user