diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts index 05d4ab355a..0a9e63f23c 100644 --- a/evals/subagents.eval.ts +++ b/evals/subagents.eval.ts @@ -53,8 +53,7 @@ describe('subagent eval test cases', () => { evalTest('ALWAYS_PASSES', { name: 'should fix linter errors in multiple projects', - prompt: - 'Fix all linter errors by delegating to the codebase investigator in parallel. Do not run any shell commands to verify.', + prompt: 'Fix all linter errors.', files: { 'project-a/eslint.config.js': ` module.exports = [ diff --git a/packages/core/src/agents/codebase-investigator.ts b/packages/core/src/agents/codebase-investigator.ts index c4458a14d4..0125c2fce4 100644 --- a/packages/core/src/agents/codebase-investigator.ts +++ b/packages/core/src/agents/codebase-investigator.ts @@ -10,6 +10,8 @@ import { GREP_TOOL_NAME, LS_TOOL_NAME, READ_FILE_TOOL_NAME, + SHELL_TOOL_NAME, + WEB_FETCH_TOOL_NAME, } from '../tools/tool-names.js'; import { DEFAULT_THINKING_MODE, @@ -66,8 +68,8 @@ export const CodebaseInvestigatorAgent = ( name: 'codebase_investigator', kind: 'local', displayName: 'Codebase Investigator Agent', - description: `The specialized tool for codebase analysis, architectural mapping, and understanding system-wide dependencies. - Invoke this tool for tasks like vague requests, bug root-cause analysis, system refactoring, comprehensive feature implementation or to answer questions about the codebase that require investigation. + description: `The specialized tool for codebase analysis, architectural mapping, understanding system-wide dependencies, and VERIFYING fixes. + Invoke this tool for tasks like vague requests, bug root-cause analysis, system refactoring, comprehensive feature implementation or to answer questions about the codebase that require investigation or final verification. It returns a structured report with key file paths, symbols, and actionable architectural insights.`, inputConfig: { inputSchema: { @@ -114,12 +116,14 @@ export const CodebaseInvestigatorAgent = ( }, toolConfig: { - // Grant access only to read-only tools. + // Grant access to investigation tools. tools: [ LS_TOOL_NAME, READ_FILE_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME, + SHELL_TOOL_NAME, + WEB_FETCH_TOOL_NAME, ], }, @@ -144,7 +148,8 @@ You operate in a non-interactive loop and must reason based on the information p 1. **DEEP ANALYSIS, NOT JUST FILE FINDING:** Your goal is to understand the *why* behind the code. Don't just list files; explain their purpose and the role of their key components. Your final report should empower another agent to make a correct and complete fix. 2. **SYSTEMATIC & CURIOUS EXPLORATION:** Start with high-value clues (like tracebacks or ticket numbers) and broaden your search as needed. Think like a senior engineer doing a code review. An initial file contains clues (imports, function calls, puzzling logic). **If you find something you don't understand, you MUST prioritize investigating it until it is clear.** Treat confusion as a signal to dig deeper. 3. **HOLISTIC & PRECISE:** Your goal is to find the complete and minimal set of locations that need to be understood or changed. Do not stop until you are confident you have considered the side effects of a potential fix (e.g., type errors, breaking changes to callers, opportunities for code reuse). -4. **Web Search:** You are allowed to use the \`web_fetch\` tool to research libraries, language features, or concepts you don't understand (e.g., "what does gettext.translation do with localedir=None?"). +4. **Tool Usage:** You are allowed to use the \`run_shell_command\` tool to run linters, tests, or other diagnostic commands to gather information or verify that issues are resolved. Do NOT use it to perform implementation changes. +5. **Web Search:** You are allowed to use the \`web_fetch\` tool to research libraries, language features, or concepts you don't understand (e.g., "what does gettext.translation do with localedir=None?"). --- ## Scratchpad Management diff --git a/packages/core/src/agents/generalist-agent.ts b/packages/core/src/agents/generalist-agent.ts index 4f9040a7b0..1b7fc961fb 100644 --- a/packages/core/src/agents/generalist-agent.ts +++ b/packages/core/src/agents/generalist-agent.ts @@ -23,8 +23,9 @@ export const GeneralistAgent = ( kind: 'local', name: 'generalist', displayName: 'Generalist Agent', - description: - "A general-purpose AI agent with access to all tools. Use it for complex tasks that don't fit into other specialized agents.", + description: `A general-purpose AI agent with access to all tools. + - ALWAYS use it to break up and parallelize independent pieces of a larger task, when possible. + `, experimental: true, inputConfig: { inputSchema: { diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 8d46fd6a1a..b301a97afe 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -497,15 +497,15 @@ function workflowStepPlan(options: PrimaryWorkflowsOptions): string { return `2. **Plan:** An approved plan is available for this task. Use this file as a guide for your implementation. You MUST read this file before proceeding. If you discover new requirements or need to change the approach, confirm with the user and update this plan file to reflect the updated design decisions or discovered requirements.`; } if (options.enableCodebaseInvestigator && options.enableWriteTodosTool) { - return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. If 'codebase_investigator' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; + return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. If 'codebase_investigator' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. When these subtasks are independent, leverage the 'generalist' agent to execute them in parallel, increasing efficiency. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; } if (options.enableCodebaseInvestigator) { - return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. If 'codebase_investigator' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; + return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. If 'codebase_investigator' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. For tasks that can be broken down into independent sub-tasks, leverage the 'generalist' agent to parallelize their execution. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; } if (options.enableWriteTodosTool) { - return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; + return `2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. When these subtasks are independent, leverage the 'generalist' agent to execute them in parallel, increasing efficiency. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`; } - return "2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution."; + return "2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If the user's request implies a change but does not explicitly state it, **YOU MUST ASK** for confirmation before modifying code. For tasks that can be broken down into independent sub-tasks, leverage the 'generalist' agent to parallelize their execution. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution."; } function workflowVerifyStandardsSuffix(interactive: boolean): string { diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index bd062373df..f876b5f4f2 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -534,11 +534,13 @@ function workflowStepStrategy(options: PrimaryWorkflowsOptions): string { if (options.enableWriteTodosTool) { return `2. **Strategy:** Formulate a grounded plan based on your research.${ options.interactive ? ' Share a concise summary of your strategy.' : '' - } For complex tasks, break them down into smaller, manageable subtasks and use the ${formatToolName(WRITE_TODOS_TOOL_NAME)} tool to track your progress.`; + } For complex tasks, break them down into smaller, manageable subtasks and use the ${formatToolName( + WRITE_TODOS_TOOL_NAME, + )} tool to track your progress. When these subtasks are independent, leverage the 'generalist' agent to execute them in parallel, increasing efficiency.`; } return `2. **Strategy:** Formulate a grounded plan based on your research.${ options.interactive ? ' Share a concise summary of your strategy.' : '' - }`; + } For tasks that can be broken down into independent sub-tasks, leverage the 'generalist' agent to parallelize their execution.`; } function workflowVerifyStandardsSuffix(interactive: boolean): string {