From ce84b3cb5f5ece0a9d03c560c793c3c8640cb77c Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Tue, 17 Feb 2026 23:54:08 +0000 Subject: [PATCH] Use ranged reads and limited searches and fuzzy editing improvements (#19240) --- evals/frugalReads.eval.ts | 278 ++++++++++++ evals/frugalSearch.eval.ts | 168 +++---- .../core/__snapshots__/prompts.test.ts.snap | 420 ++++++++++++++++-- packages/core/src/prompts/snippets.ts | 34 +- packages/core/src/tools/edit.test.ts | 176 ++++++++ packages/core/src/tools/edit.ts | 218 ++++++++- packages/core/src/tools/ls.test.ts | 4 +- packages/core/src/tools/ls.ts | 7 +- packages/test-utils/src/test-rig.ts | 14 + 9 files changed, 1174 insertions(+), 145 deletions(-) create mode 100644 evals/frugalReads.eval.ts diff --git a/evals/frugalReads.eval.ts b/evals/frugalReads.eval.ts new file mode 100644 index 0000000000..55a73f85e2 --- /dev/null +++ b/evals/frugalReads.eval.ts @@ -0,0 +1,278 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import { READ_FILE_TOOL_NAME, EDIT_TOOL_NAME } from '@google/gemini-cli-core'; + +describe('Frugal reads eval', () => { + /** + * Ensures that the agent is frugal in its use of context by relying + * primarily on ranged reads when the line number is known, and combining + * nearby ranges into a single contiguous read to save tool calls. + */ + evalTest('USUALLY_PASSES', { + name: 'should use ranged read when nearby lines are targeted', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'linter_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i === 500 || i === 510 || i === 520) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in linter_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + // Check if the agent read the whole file + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('linter_mess.ts'); + }); + + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // We expect 1-3 ranges in a single turn. + expect( + targetFileReads.length, + 'Agent should have used 1-3 ranged reads for near errors', + ).toBeLessThanOrEqual(3); + + const firstPromptId = targetFileReads[0].toolRequest.prompt_id; + expect(firstPromptId, 'Prompt ID should be defined').toBeDefined(); + expect( + targetFileReads.every( + (call) => call.toolRequest.prompt_id === firstPromptId, + ), + 'All reads should have happened in the same turn', + ).toBe(true); + + let totalLinesRead = 0; + const readRanges: { offset: number; limit: number }[] = []; + + for (const call of targetFileReads) { + const args = JSON.parse(call.toolRequest.args); + + expect( + args.limit, + 'Agent read the entire file (missing limit) instead of using ranged read', + ).toBeDefined(); + + const limit = args.limit; + const offset = args.offset ?? 0; + totalLinesRead += limit; + readRanges.push({ offset, limit }); + + expect(args.limit, 'Agent read too many lines at once').toBeLessThan( + 1001, + ); + } + + // Ranged read shoud be frugal and just enough to satisfy the task at hand. + expect( + totalLinesRead, + 'Agent read more of the file than expected', + ).toBeLessThan(1000); + + // Check that we read around the error lines + const errorLines = [500, 510, 520]; + for (const line of errorLines) { + const covered = readRanges.some( + (range) => line >= range.offset && line < range.offset + range.limit, + ); + expect(covered, `Agent should have read around line ${line}`).toBe( + true, + ); + } + + const editCalls = logs.filter( + (log) => log.toolRequest?.name === EDIT_TOOL_NAME, + ); + const targetEditCalls = editCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('linter_mess.ts'); + }); + expect( + targetEditCalls.length, + 'Agent should have made replacement calls on the target file', + ).toBeGreaterThanOrEqual(3); + }, + }); + + /** + * Ensures the agent uses multiple ranged reads when the targets are far + * apart to avoid the need to read the whole file. + */ + evalTest('USUALLY_PASSES', { + name: 'should use ranged read when targets are far apart', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'far_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i === 100 || i === 900) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in far_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('far_mess.ts'); + }); + + // The agent should use ranged reads to be frugal with context tokens, + // even if it requires multiple calls for far-apart errors. + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // We allow multiple calls since the errors are far apart. + expect( + targetFileReads.length, + 'Agent should have used separate reads for far apart errors', + ).toBeLessThanOrEqual(4); + + for (const call of targetFileReads) { + const args = JSON.parse(call.toolRequest.args); + expect( + args.limit, + 'Agent should have used ranged read (limit) to save tokens', + ).toBeDefined(); + } + }, + }); + + /** + * Validates that the agent reads the entire file if there are lots of matches + * (e.g.: 10), as it's more efficient than many small ranged reads. + */ + evalTest('USUALLY_PASSES', { + name: 'should read the entire file when there are many matches', + files: { + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + 'eslint.config.mjs': `export default [ + { + files: ["**/*.ts"], + rules: { + "no-var": "error" + } + } + ];`, + 'many_mess.ts': (() => { + const lines = []; + for (let i = 0; i < 1000; i++) { + if (i % 100 === 0) { + lines.push(`var oldVar${i} = "needs fix";`); + } else { + lines.push(`const goodVar${i} = "clean";`); + } + } + return lines.join('\n'); + })(), + }, + prompt: + 'Fix all linter errors in many_mess.ts manually by editing the file. Run eslint directly (using "npx --yes eslint") to find them. Do not run the file.', + assert: async (rig) => { + const logs = rig.readToolLogs(); + + const readCalls = logs.filter( + (log) => log.toolRequest?.name === READ_FILE_TOOL_NAME, + ); + + const targetFileReads = readCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('many_mess.ts'); + }); + + expect( + targetFileReads.length, + 'Agent should have used read_file to check context', + ).toBeGreaterThan(0); + + // In this case, we expect the agent to realize there are many scattered errors + // and just read the whole file to be efficient with tool calls. + const readEntireFile = targetFileReads.some((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.limit === undefined; + }); + + expect( + readEntireFile, + 'Agent should have read the entire file because of the high number of scattered matches', + ).toBe(true); + + // Check that the agent actually fixed the errors + const editCalls = logs.filter( + (log) => log.toolRequest?.name === EDIT_TOOL_NAME, + ); + const targetEditCalls = editCalls.filter((call) => { + const args = JSON.parse(call.toolRequest.args); + return args.file_path.includes('many_mess.ts'); + }); + expect( + targetEditCalls.length, + 'Agent should have made replacement calls on the target file', + ).toBeGreaterThanOrEqual(1); + }, + }); +}); diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index 11c51e8529..8805a6a8ed 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -9,7 +9,7 @@ import { evalTest } from './test-helper.js'; /** * Evals to verify that the agent uses search tools efficiently (frugally) - * by utilizing limiting parameters like `total_max_matches` and `max_matches_per_file`. + * by utilizing limiting parameters like `limit` and `max_matches_per_file`. * This ensures the agent doesn't flood the context window with unnecessary search results. */ describe('Frugal Search', () => { @@ -25,120 +25,76 @@ describe('Frugal Search', () => { return args; }; + /** + * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task. + * The task is specifically phrased to not evoke "view" or "search" specifically because + * the model implicitly understands that such tasks are searches. This covers the case of + * an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or + * ranged reads. + */ evalTest('USUALLY_PASSES', { - name: 'should use targeted search with limit', - prompt: 'find me a sample usage of path.resolve() in the codebase', + name: 'should use grep or ranged read for large files', + prompt: 'What year was legacy_processor.ts written?', files: { - 'package.json': JSON.stringify({ - name: 'test-project', - version: '1.0.0', - main: 'dist/index.js', - scripts: { - build: 'tsc', - test: 'vitest', - }, - dependencies: { - typescript: '^5.0.0', - '@types/node': '^20.0.0', - vitest: '^1.0.0', - }, - }), - 'src/index.ts': ` - import { App } from './app.ts'; - - const app = new App(); - app.start(); - `, - 'src/app.ts': ` - import * as path from 'path'; - import { UserController } from './controllers/user.ts'; - - export class App { - constructor() { - console.log('App initialized'); - } - - public start(): void { - const userController = new UserController(); - console.log('Static path:', path.resolve(__dirname, '../public')); - } - } - `, - 'src/utils.ts': ` - import * as path from 'path'; - import * as fs from 'fs'; - - export function resolvePath(p: string): string { - return path.resolve(process.cwd(), p); - } - - export function ensureDir(dirPath: string): void { - const absolutePath = path.resolve(dirPath); - if (!fs.existsSync(absolutePath)) { - fs.mkdirSync(absolutePath, { recursive: true }); - } - } - `, - 'src/config.ts': ` - import * as path from 'path'; - - export const config = { - dbPath: path.resolve(process.cwd(), 'data/db.sqlite'), - logLevel: 'info', - }; - `, - 'src/controllers/user.ts': ` - import * as path from 'path'; - - export class UserController { - public getUsers(): any[] { - console.log('Loading users from:', path.resolve('data/users.json')); - return [{ id: 1, name: 'Alice' }]; - } - } - `, - 'tests/app.test.ts': ` - import { describe, it, expect } from 'vitest'; - import * as path from 'path'; - - describe('App', () => { - it('should resolve paths', () => { - const p = path.resolve('test'); - expect(p).toBeDefined(); - }); - }); - `, + 'src/utils.ts': 'export const add = (a, b) => a + b;', + 'src/types.ts': 'export type ID = string;', + 'src/legacy_processor.ts': [ + '// Copyright 2005 Legacy Systems Inc.', + ...Array.from( + { length: 5000 }, + (_, i) => + `// Legacy code block ${i} - strictly preserved for backward compatibility`, + ), + ].join('\n'), + 'README.md': '# Project documentation', }, assert: async (rig) => { const toolCalls = rig.readToolLogs(); - const grepCalls = toolCalls.filter( - (call) => call.toolRequest.name === 'grep_search', - ); + const getParams = (call: any) => { + let args = call.toolRequest.args; + if (typeof args === 'string') { + try { + args = JSON.parse(args); + } catch (e) { + // Ignore parse errors + } + } + return args; + }; - expect(grepCalls.length).toBeGreaterThan(0); + // Check for wasteful full file reads + const fullReads = toolCalls.filter((call) => { + if (call.toolRequest.name !== 'read_file') return false; + const args = getParams(call); + return ( + args.file_path === 'src/legacy_processor.ts' && + (args.limit === undefined || args.limit === null) + ); + }); - const grepParams = grepCalls.map(getGrepParams); - - const hasTotalMaxLimit = grepParams.some( - (p) => p.total_max_matches !== undefined && p.total_max_matches <= 100, - ); expect( - hasTotalMaxLimit, - `Expected agent to use a small total_max_matches (<= 100) for a sample usage request. Actual values: ${JSON.stringify( - grepParams.map((p) => p.total_max_matches), - )}`, - ).toBe(true); + fullReads.length, + 'Agent should not attempt to read the entire large file at once', + ).toBe(0); - const hasMaxMatchesPerFileLimit = grepParams.some( - (p) => - p.max_matches_per_file !== undefined && p.max_matches_per_file <= 5, - ); - expect( - hasMaxMatchesPerFileLimit, - `Expected agent to use a small max_matches_per_file (<= 5) for a sample usage request. Actual values: ${JSON.stringify( - grepParams.map((p) => p.max_matches_per_file), - )}`, - ).toBe(true); + // Check that it actually tried to find it using appropriate tools + const validAttempts = toolCalls.filter((call) => { + const args = getParams(call); + if (call.toolRequest.name === 'grep_search') { + return true; + } + + if ( + call.toolRequest.name === 'read_file' && + args.file_path === 'src/legacy_processor.ts' && + args.limit !== undefined + ) { + return true; + } + return false; + }); + + expect(validAttempts.length).toBeGreaterThan(0); }, }); }); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index bb78cdb2f9..22d0e6f71a 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -520,8 +520,34 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -649,8 +675,34 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -744,8 +796,34 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1308,8 +1386,34 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills with - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1433,8 +1537,34 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1549,8 +1679,34 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1665,8 +1821,34 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1777,8 +1959,34 @@ exports[`Core System Prompt (prompts.ts) > should include mandate to distinguish - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1889,8 +2097,34 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2000,8 +2234,34 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2351,8 +2611,34 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2463,8 +2749,34 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2686,8 +2998,34 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2798,8 +3136,34 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to grep_search, to enable you to skip using an extra turn reading the file. +- Prefer using tools like grep_search to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like read_file and grep_search. +- read_file fails if old_string is ambiguous, causing extra turns. Take care to read enough with read_file and grep_search to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like grep_search and glob with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like grep_search with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like grep_search and/or read_file called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 54e64a8266..4285c489ab 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -155,6 +155,10 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { .join(', ') + ` or \`${filenames[filenames.length - 1]}\`` : `\`${filenames[0]}\``; + // ⚠️ IMPORTANT: the Context Efficiency changes strike a delicate balance that encourages + // the agent to minimize response sizes while also taking care to avoid extra turns. You + // must run the major benchmarks, such as SWEBench, prior to committing any changes to + // the Context Efficiency section to avoid regressing this behavior. return ` # Core Mandates @@ -163,8 +167,34 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. -- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. +Be strategic in your use of the available tools to minimize unnecessary context usage while still +providing the best answer that you can. + +Consider the following when estimating the cost of your approach: + +- The agent passes the full history with each subsequent message. The larger context is early in the session, the more expensive each subsequent turn is. +- Unnecessary turns are generally more expensive than other types of wasted context. +- You can reduce context usage by limiting the outputs of tools but take care not to cause more token consumption via additional turns required to recover from a tool failure or compensate for a misapplied optimization strategy. + + +Use the following guidelines to optimize your search and read patterns. + +- Combine turns whenever possible by utilizing parallel searching and reading and by requesting enough context by passing context, before, or after to ${GREP_TOOL_NAME}, to enable you to skip using an extra turn reading the file. +- Prefer using tools like ${GREP_TOOL_NAME} to identify points of interest instead of reading lots of files individually. +- If you need to read multiple ranges in a file, do so parallel, in as few turns as possible. +- It is more important to reduce extra turns, but please also try to minimize unnecessarily large file reads and search results, when doing so doesn't result in extra turns. Do this by always providing conservative limits and scopes to tools like ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME}. +- ${READ_FILE_TOOL_NAME} fails if old_string is ambiguous, causing extra turns. Take care to read enough with ${READ_FILE_TOOL_NAME} and ${GREP_TOOL_NAME} to make the edit unambiguous. +- You can compensate for the risk of missing results with scoped or limited searches by doing multiple searches in parallel. +- Your primary goal is still to do your best quality work. Efficiency is an important, but secondary concern. + + + +- **Searching:** utilize search tools like ${GREP_TOOL_NAME} and ${GLOB_TOOL_NAME} with a conservative result count (\`total_max_matches\`) and a narrow scope (\`include\` and \`exclude\` parameters). +- **Searching and editing:** utilize search tools like ${GREP_TOOL_NAME} with a conservative result count and a narrow scope. Use \`context\`, \`before\`, and/or \`after\` to request enough context to avoid the need to read the file before editing matches. +- **Understanding:** minimize turns needed to understand a file. It's most efficient to read small files in their entirety. +- **Large files:** utilize search tools like ${GREP_TOOL_NAME} and/or ${READ_FILE_TOOL_NAME} called in parallel with an offset and a limit to reduce the impact on context. Minmize extra turns, unless unavoidable due to the file being too large. +- **Navigating:** read the minimum required to not require additional turns spent reading the file. + ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index 56dc2cb2c4..3b8cbe9645 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -373,6 +373,182 @@ describe('EditTool', () => { expect(result.occurrences).toBe(1); }); + it('should perform a fuzzy replacement when exact match fails but similarity is high', async () => { + const content = + 'const myConfig = {\n enableFeature: true,\n retries: 3\n};'; + // Typo: missing comma after true + const oldString = + 'const myConfig = {\n enableFeature: true\n retries: 3\n};'; + const newString = + 'const myConfig = {\n enableFeature: false,\n retries: 5\n};'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'config.ts', + instruction: 'update config', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(1); + expect(result.newContent).toBe(newString); + }); + + it('should NOT perform a fuzzy replacement when similarity is below threshold', async () => { + const content = + 'const myConfig = {\n enableFeature: true,\n retries: 3\n};'; + // Completely different string + const oldString = 'function somethingElse() {\n return false;\n}'; + const newString = + 'const myConfig = {\n enableFeature: false,\n retries: 5\n};'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'config.ts', + instruction: 'update config', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(0); + expect(result.newContent).toBe(content); + }); + + it('should NOT perform a fuzzy replacement when the complexity (length * size) is too high', async () => { + // 2000 chars + const longString = 'a'.repeat(2000); + + // Create a file with enough lines to trigger the complexity limit + // Complexity = Lines * Length^2 + // Threshold = 500,000,000 + // 2000^2 = 4,000,000. + // Need > 125 lines. Let's use 200 lines. + const lines = Array(200).fill(longString); + const content = lines.join('\n'); + + // Mismatch at the end (making it a fuzzy match candidate) + const oldString = longString + 'c'; + const newString = 'replacement'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'test.ts', + instruction: 'update', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + // Should return 0 occurrences because fuzzy match is skipped + expect(result.occurrences).toBe(0); + expect(result.newContent).toBe(content); + }); + + it('should perform multiple fuzzy replacements if multiple valid matches are found', async () => { + const content = ` +function doIt() { + console.log("hello"); +} + +function doIt() { + console.log("hello"); +} +`; + // old_string uses single quotes, file uses double. + // This is a fuzzy match (quote difference). + const oldString = ` +function doIt() { + console.log('hello'); +} +`.trim(); + + const newString = ` +function doIt() { + console.log("bye"); +} +`.trim(); + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'test.ts', + instruction: 'update', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(2); + const expectedContent = ` +function doIt() { + console.log("bye"); +} + +function doIt() { + console.log("bye"); +} +`; + expect(result.newContent).toBe(expectedContent); + }); + + it('should correctly rebase indentation in flexible replacement without double-indenting', async () => { + const content = ' if (a) {\n foo();\n }\n'; + // old_string and new_string are unindented. They should be rebased to 4-space. + const oldString = 'if (a) {\n foo();\n}'; + const newString = 'if (a) {\n bar();\n}'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'test.ts', + old_string: oldString, + new_string: newString, + }, + currentContent: content, + abortSignal, + }); + + expect(result.occurrences).toBe(1); + // foo() was at 8 spaces (4 base + 4 indent). + // newString has bar() at 4 spaces (0 base + 4 indent). + // Rebased to 4 base, it should be 4 + 4 = 8 spaces. + const expectedContent = ' if (a) {\n bar();\n }\n'; + expect(result.newContent).toBe(expectedContent); + }); + + it('should correctly rebase indentation in fuzzy replacement without double-indenting', async () => { + const content = + ' const myConfig = {\n enableFeature: true,\n retries: 3\n };'; + // Typo: missing comma. old_string/new_string are unindented. + const fuzzyOld = + 'const myConfig = {\n enableFeature: true\n retries: 3\n};'; + const fuzzyNew = + 'const myConfig = {\n enableFeature: false,\n retries: 5\n};'; + + const result = await calculateReplacement(mockConfig, { + params: { + file_path: 'test.ts', + old_string: fuzzyOld, + new_string: fuzzyNew, + }, + currentContent: content, + abortSignal, + }); + + expect(result.strategy).toBe('fuzzy'); + const expectedContent = + ' const myConfig = {\n enableFeature: false,\n retries: 5\n };'; + expect(result.newContent).toBe(expectedContent); + }); + it('should NOT insert extra newlines when replacing a block preceded by a blank line (regression)', async () => { const content = '\n function oldFunc() {\n // some code\n }'; const result = await calculateReplacement(mockConfig, { diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 41f895f5cd..8a48161662 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -49,8 +49,13 @@ import { EDIT_DISPLAY_NAME, } from './tool-names.js'; import { debugLogger } from '../utils/debugLogger.js'; +import levenshtein from 'fast-levenshtein'; import { EDIT_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; + +const ENABLE_FUZZY_MATCH_RECOVERY = true; +const FUZZY_MATCH_THRESHOLD = 0.1; // Allow up to 10% weighted difference +const WHITESPACE_PENALTY_FACTOR = 0.1; // Whitespace differences cost 10% of a character difference interface ReplacementContext { params: EditToolParams; currentContent: string; @@ -62,6 +67,8 @@ interface ReplacementResult { occurrences: number; finalOldString: string; finalNewString: string; + strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy'; + matchRanges?: Array<{ start: number; end: number }>; } export function applyReplacement( @@ -176,9 +183,7 @@ async function calculateFlexibleReplacement( const firstLineInMatch = window[0]; const indentationMatch = firstLineInMatch.match(/^([ \t]*)/); const indentation = indentationMatch ? indentationMatch[1] : ''; - const newBlockWithIndent = replaceLines.map( - (line: string) => `${indentation}${line}`, - ); + const newBlockWithIndent = applyIndentation(replaceLines, indentation); sourceLines.splice( i, searchLinesStripped.length, @@ -247,9 +252,7 @@ async function calculateRegexReplacement( const indentation = match[1] || ''; const newLines = normalizedReplace.split('\n'); - const newBlockWithIndent = newLines - .map((line) => `${indentation}${line}`) - .join('\n'); + const newBlockWithIndent = applyIndentation(newLines, indentation).join('\n'); // Use replace with the regex to substitute the matched content. // Since the regex doesn't have the 'g' flag, it will only replace the first occurrence. @@ -305,6 +308,14 @@ export async function calculateReplacement( return regexResult; } + let fuzzyResult; + if ( + ENABLE_FUZZY_MATCH_RECOVERY && + (fuzzyResult = await calculateFuzzyReplacement(config, context)) + ) { + return fuzzyResult; + } + return { newContent: currentContent, occurrences: 0, @@ -395,6 +406,8 @@ interface CalculatedEdit { error?: { display: string; raw: string; type: ToolErrorType }; isNewFile: boolean; originalLineEnding: '\r\n' | '\n'; + strategy?: 'exact' | 'flexible' | 'regex' | 'fuzzy'; + matchRanges?: Array<{ start: number; end: number }>; } class EditToolInvocation @@ -520,6 +533,8 @@ class EditToolInvocation isNewFile: false, error: undefined, originalLineEnding, + strategy: secondAttemptResult.strategy, + matchRanges: secondAttemptResult.matchRanges, }; } @@ -633,6 +648,8 @@ class EditToolInvocation isNewFile: false, error: undefined, originalLineEnding, + strategy: replacementResult.strategy, + matchRanges: replacementResult.matchRanges, }; } @@ -859,6 +876,10 @@ class EditToolInvocation ? `Created new file: ${this.params.file_path} with provided content.` : `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`, ]; + const fuzzyFeedback = getFuzzyMatchFeedback(editData); + if (fuzzyFeedback) { + llmSuccessMessageParts.push(fuzzyFeedback); + } if (this.params.modified_by_user) { llmSuccessMessageParts.push( `User modified the \`new_string\` content to be: ${this.params.new_string}.`, @@ -1011,3 +1032,188 @@ export class EditTool }; } } + +function stripWhitespace(str: string): string { + return str.replace(/\s/g, ''); +} + +/** + * Applies the target indentation to the lines, while preserving relative indentation. + * It identifies the common indentation of the provided lines and replaces it with the target indentation. + */ +function applyIndentation( + lines: string[], + targetIndentation: string, +): string[] { + if (lines.length === 0) return []; + + // Use the first line as the reference for indentation, even if it's empty/whitespace. + // This is because flexible/fuzzy matching identifies the indentation of the START of the match. + const referenceLine = lines[0]; + const refIndentMatch = referenceLine.match(/^([ \t]*)/); + const refIndent = refIndentMatch ? refIndentMatch[1] : ''; + + return lines.map((line) => { + if (line.trim() === '') { + return ''; + } + if (line.startsWith(refIndent)) { + return targetIndentation + line.slice(refIndent.length); + } + return targetIndentation + line.trimStart(); + }); +} + +function getFuzzyMatchFeedback(editData: CalculatedEdit): string | null { + if ( + editData.strategy === 'fuzzy' && + editData.matchRanges && + editData.matchRanges.length > 0 + ) { + const ranges = editData.matchRanges + .map((r) => (r.start === r.end ? `${r.start}` : `${r.start}-${r.end}`)) + .join(', '); + return `Applied fuzzy match at line${editData.matchRanges.length > 1 ? 's' : ''} ${ranges}.`; + } + return null; +} + +async function calculateFuzzyReplacement( + config: Config, + context: ReplacementContext, +): Promise { + const { currentContent, params } = context; + const { old_string, new_string } = params; + + // Pre-check: Don't fuzzy match very short strings to avoid false positives + if (old_string.length < 10) { + return null; + } + + const normalizedCode = currentContent.replace(/\r\n/g, '\n'); + const normalizedSearch = old_string.replace(/\r\n/g, '\n'); + const normalizedReplace = new_string.replace(/\r\n/g, '\n'); + + const sourceLines = normalizedCode.match(/.*(?:\n|$)/g)?.slice(0, -1) ?? []; + const searchLines = normalizedSearch + .match(/.*(?:\n|$)/g) + ?.slice(0, -1) + .map((l) => l.trimEnd()); // Trim end of search lines to be more robust + + // Limit the scope of the fuzzy match to reduce impact on responsivesness. + // Each comparison takes roughly O(L^2) time. + // We perform sourceLines.length comparisons (sliding window). + // Total complexity proxy: sourceLines.length * old_string.length^2 + // Limit to 4e8 for < 1 second. + if (sourceLines.length * Math.pow(old_string.length, 2) > 400_000_000) { + return null; + } + + if (!searchLines || searchLines.length === 0) { + return null; + } + + const N = searchLines.length; + const candidates: Array<{ index: number; score: number }> = []; + const searchBlock = searchLines.join('\n'); + + // Sliding window + for (let i = 0; i <= sourceLines.length - N; i++) { + const windowLines = sourceLines.slice(i, i + N); + const windowText = windowLines.map((l) => l.trimEnd()).join('\n'); // Normalized join for comparison + + // Length Heuristic Optimization + const lengthDiff = Math.abs(windowText.length - searchBlock.length); + if ( + lengthDiff / searchBlock.length > + FUZZY_MATCH_THRESHOLD / WHITESPACE_PENALTY_FACTOR + ) { + continue; + } + + // Tiered Scoring + const d_raw = levenshtein.get(windowText, searchBlock); + const d_norm = levenshtein.get( + stripWhitespace(windowText), + stripWhitespace(searchBlock), + ); + + const weightedDist = d_norm + (d_raw - d_norm) * WHITESPACE_PENALTY_FACTOR; + const score = weightedDist / searchBlock.length; + + if (score <= FUZZY_MATCH_THRESHOLD) { + candidates.push({ index: i, score }); + } + } + + if (candidates.length === 0) { + return null; + } + + // Select best non-overlapping matches + // Sort by score ascending. If scores equal, prefer earlier index (stable sort). + candidates.sort((a, b) => a.score - b.score || a.index - b.index); + + const selectedMatches: Array<{ index: number; score: number }> = []; + for (const candidate of candidates) { + // Check for overlap with already selected matches + // Two windows overlap if their start indices are within N lines of each other + // (Assuming window size N. Actually overlap is |i - j| < N) + const overlaps = selectedMatches.some( + (m) => Math.abs(m.index - candidate.index) < N, + ); + if (!overlaps) { + selectedMatches.push(candidate); + } + } + + // If we found matches, apply them + if (selectedMatches.length > 0) { + const event = new EditStrategyEvent('fuzzy'); + logEditStrategy(config, event); + + // Calculate match ranges before sorting for replacement + // Indices in selectedMatches are 0-based line indices + const matchRanges = selectedMatches + .map((m) => ({ start: m.index + 1, end: m.index + N })) + .sort((a, b) => a.start - b.start); + + // Sort matches by index descending to apply replacements from bottom to top + // so that indices remain valid + selectedMatches.sort((a, b) => b.index - a.index); + + const newLines = normalizedReplace.split('\n'); + + for (const match of selectedMatches) { + // If we want to preserve the indentation of the first line of the match: + const firstLineMatch = sourceLines[match.index]; + const indentationMatch = firstLineMatch.match(/^([ \t]*)/); + const indentation = indentationMatch ? indentationMatch[1] : ''; + + const indentedReplaceLines = applyIndentation(newLines, indentation); + + let replacementText = indentedReplaceLines.join('\n'); + // If the last line of the match had a newline, preserve it in the replacement + // to avoid merging with the next line or losing a blank line separator. + if (sourceLines[match.index + N - 1].endsWith('\n')) { + replacementText += '\n'; + } + + sourceLines.splice(match.index, N, replacementText); + } + + let modifiedCode = sourceLines.join(''); + modifiedCode = restoreTrailingNewline(currentContent, modifiedCode); + + return { + newContent: modifiedCode, + occurrences: selectedMatches.length, + finalOldString: normalizedSearch, + finalNewString: normalizedReplace, + strategy: 'fuzzy', + matchRanges, + }; + } + + return null; +} diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts index 4bc57b8d32..63d7693123 100644 --- a/packages/core/src/tools/ls.test.ts +++ b/packages/core/src/tools/ls.test.ts @@ -235,8 +235,8 @@ describe('LSTool', () => { expect(entries[0]).toBe('[DIR] x-dir'); expect(entries[1]).toBe('[DIR] y-dir'); - expect(entries[2]).toBe('a-file.txt'); - expect(entries[3]).toBe('b-file.txt'); + expect(entries[2]).toBe('a-file.txt (8 bytes)'); + expect(entries[3]).toBe('b-file.txt (8 bytes)'); }); it('should handle permission errors gracefully', async () => { diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts index 9ca2918b2c..b98dfb9e38 100644 --- a/packages/core/src/tools/ls.ts +++ b/packages/core/src/tools/ls.ts @@ -241,7 +241,12 @@ class LSToolInvocation extends BaseToolInvocation { // Create formatted content for LLM const directoryContent = entries - .map((entry) => `${entry.isDirectory ? '[DIR] ' : ''}${entry.name}`) + .map((entry) => { + if (entry.isDirectory) { + return `[DIR] ${entry.name}`; + } + return `${entry.name} (${entry.size} bytes)`; + }) .join('\n'); let resultMessage = `Directory listing for ${resolvedDirPath}:\n${directoryContent}`; diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index fdbb316d01..6e32ec7790 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -208,6 +208,7 @@ export interface ParsedLog { stdout?: string; stderr?: string; error?: string; + prompt_id?: string; }; scopeMetrics?: { metrics: { @@ -1051,6 +1052,7 @@ export class TestRig { args: string; success: boolean; duration_ms: number; + prompt_id?: string; }; }[] = []; @@ -1079,6 +1081,13 @@ export class TestRig { args = argsMatch[1]; } + // Look for prompt_id in the context + let promptId = undefined; + const promptIdMatch = context.match(/prompt_id:\s*'([^']+)'/); + if (promptIdMatch) { + promptId = promptIdMatch[1]; + } + // Also try to find function_name to double-check // Updated regex to handle tool names with hyphens and underscores const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); @@ -1091,6 +1100,7 @@ export class TestRig { args: args, success: success, duration_ms: duration, + prompt_id: promptId, }, }); } @@ -1138,6 +1148,7 @@ export class TestRig { args: obj.attributes.function_args || '{}', success: obj.attributes.success !== false, duration_ms: obj.attributes.duration_ms || 0, + prompt_id: obj.attributes.prompt_id, }, }); } @@ -1152,6 +1163,7 @@ export class TestRig { args: obj.attributes.function_args, success: obj.attributes.success, duration_ms: obj.attributes.duration_ms, + prompt_id: obj.attributes.prompt_id, }, }); } @@ -1242,6 +1254,7 @@ export class TestRig { args: string; success: boolean; duration_ms: number; + prompt_id?: string; }; }[] = []; @@ -1258,6 +1271,7 @@ export class TestRig { args: logData.attributes.function_args ?? '{}', success: logData.attributes.success ?? false, duration_ms: logData.attributes.duration_ms ?? 0, + prompt_id: logData.attributes.prompt_id, }, }); }