diff --git a/docs/core/policy-engine.md b/docs/core/policy-engine.md index 519bd7102d..ca2a6ea543 100644 --- a/docs/core/policy-engine.md +++ b/docs/core/policy-engine.md @@ -239,6 +239,8 @@ The Gemini CLI ships with a set of default policies to provide a safe out-of-the-box experience. - **Read-only tools** (like `read_file`, `glob`) are generally **allowed**. +- **Agent delegation** (like `delegate_to_agent`) is **allowed** (sub-agent + actions are checked individually). - **Write tools** (like `write_file`, `run_shell_command`) default to **`ask_user`**. - In **`yolo`** mode, a high-priority rule allows all tools. diff --git a/packages/core/src/agents/delegate-to-agent-tool.test.ts b/packages/core/src/agents/delegate-to-agent-tool.test.ts new file mode 100644 index 0000000000..ebf753db30 --- /dev/null +++ b/packages/core/src/agents/delegate-to-agent-tool.test.ts @@ -0,0 +1,178 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { DelegateToAgentTool } from './delegate-to-agent-tool.js'; +import { AgentRegistry } from './registry.js'; +import type { Config } from '../config/config.js'; +import type { AgentDefinition } from './types.js'; +import { SubagentInvocation } from './invocation.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { MessageBusType } from '../confirmation-bus/types.js'; +import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js'; + +vi.mock('./invocation.js', () => ({ + SubagentInvocation: vi.fn().mockImplementation(() => ({ + execute: vi + .fn() + .mockResolvedValue({ content: [{ type: 'text', text: 'Success' }] }), + })), +})); + +describe('DelegateToAgentTool', () => { + let registry: AgentRegistry; + let config: Config; + let tool: DelegateToAgentTool; + let messageBus: MessageBus; + + const mockAgentDef: AgentDefinition = { + name: 'test_agent', + description: 'A test agent', + promptConfig: {}, + modelConfig: { model: 'test-model', temp: 0, top_p: 0 }, + inputConfig: { + inputs: { + arg1: { type: 'string', description: 'Argument 1', required: true }, + arg2: { type: 'number', description: 'Argument 2', required: false }, + }, + }, + runConfig: { max_turns: 1, max_time_minutes: 1 }, + toolConfig: { tools: [] }, + }; + + beforeEach(() => { + config = { + getDebugMode: () => false, + modelConfigService: { + registerRuntimeModelConfig: vi.fn(), + }, + } as unknown as Config; + + registry = new AgentRegistry(config); + // Manually register the mock agent (bypassing protected method for testing) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (registry as any).agents.set(mockAgentDef.name, mockAgentDef); + + messageBus = { + publish: vi.fn(), + subscribe: vi.fn(), + unsubscribe: vi.fn(), + } as unknown as MessageBus; + + tool = new DelegateToAgentTool(registry, config, messageBus); + }); + + it('should use dynamic description from registry', () => { + // registry has mockAgentDef registered in beforeEach + expect(tool.description).toContain( + 'Delegates a task to a specialized sub-agent', + ); + expect(tool.description).toContain( + `- **${mockAgentDef.name}**: ${mockAgentDef.description}`, + ); + }); + + it('should validate agent_name exists in registry', async () => { + // Zod validation happens at build time now (or rather, build validates the schema) + // Since we use discriminated union, an invalid agent_name won't match any option. + expect(() => + tool.build({ + agent_name: 'non_existent_agent', + }), + ).toThrow(); + }); + + it('should validate correct arguments', async () => { + const invocation = tool.build({ + agent_name: 'test_agent', + arg1: 'valid', + }); + + const result = await invocation.execute(new AbortController().signal); + expect(result).toEqual({ content: [{ type: 'text', text: 'Success' }] }); + expect(SubagentInvocation).toHaveBeenCalledWith( + { arg1: 'valid' }, + mockAgentDef, + config, + messageBus, + ); + }); + + it('should throw error for missing required argument', async () => { + // Missing arg1 should fail Zod validation + expect(() => + tool.build({ + agent_name: 'test_agent', + arg2: 123, + }), + ).toThrow(); + }); + + it('should throw error for invalid argument type', async () => { + // arg1 should be string, passing number + expect(() => + tool.build({ + agent_name: 'test_agent', + arg1: 123, + }), + ).toThrow(); + }); + + it('should allow optional arguments to be omitted', async () => { + const invocation = tool.build({ + agent_name: 'test_agent', + arg1: 'valid', + // arg2 is optional + }); + + await expect( + invocation.execute(new AbortController().signal), + ).resolves.toBeDefined(); + }); + + it('should throw error if an agent has an input named "agent_name"', () => { + const invalidAgentDef: AgentDefinition = { + ...mockAgentDef, + name: 'invalid_agent', + inputConfig: { + inputs: { + agent_name: { + type: 'string', + description: 'Conflict', + required: true, + }, + }, + }, + }; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (registry as any).agents.set(invalidAgentDef.name, invalidAgentDef); + + expect(() => new DelegateToAgentTool(registry, config)).toThrow( + "Agent 'invalid_agent' cannot have an input parameter named 'agent_name' as it is a reserved parameter for delegation.", + ); + }); + + it('should use correct tool name "delegate_to_agent" when requesting confirmation', async () => { + const invocation = tool.build({ + agent_name: 'test_agent', + arg1: 'valid', + }); + + // Trigger confirmation check + const p = invocation.shouldConfirmExecute(new AbortController().signal); + void p; + + expect(messageBus.publish).toHaveBeenCalledWith( + expect.objectContaining({ + type: MessageBusType.TOOL_CONFIRMATION_REQUEST, + toolCall: expect.objectContaining({ + name: DELEGATE_TO_AGENT_TOOL_NAME, + }), + }), + ); + }); +}); diff --git a/packages/core/src/agents/delegate-to-agent-tool.ts b/packages/core/src/agents/delegate-to-agent-tool.ts new file mode 100644 index 0000000000..9993507e08 --- /dev/null +++ b/packages/core/src/agents/delegate-to-agent-tool.ts @@ -0,0 +1,182 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { z } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { + BaseDeclarativeTool, + Kind, + type ToolInvocation, + type ToolResult, + BaseToolInvocation, +} from '../tools/tools.js'; +import type { AnsiOutput } from '../utils/terminalSerializer.js'; +import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js'; +import type { AgentRegistry } from './registry.js'; +import type { Config } from '../config/config.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { SubagentInvocation } from './invocation.js'; +import type { AgentInputs } from './types.js'; + +type DelegateParams = { agent_name: string } & Record; + +export class DelegateToAgentTool extends BaseDeclarativeTool< + DelegateParams, + ToolResult +> { + constructor( + private readonly registry: AgentRegistry, + private readonly config: Config, + messageBus?: MessageBus, + ) { + const definitions = registry.getAllDefinitions(); + + let schema: z.ZodTypeAny; + + if (definitions.length === 0) { + // Fallback if no agents are registered (mostly for testing/safety) + schema = z.object({ + agent_name: z.string().describe('No agents are currently available.'), + }); + } else { + const agentSchemas = definitions.map((def) => { + const inputShape: Record = { + agent_name: z.literal(def.name).describe(def.description), + }; + + for (const [key, inputDef] of Object.entries(def.inputConfig.inputs)) { + if (key === 'agent_name') { + throw new Error( + `Agent '${def.name}' cannot have an input parameter named 'agent_name' as it is a reserved parameter for delegation.`, + ); + } + + let validator: z.ZodTypeAny; + + // Map input types to Zod + switch (inputDef.type) { + case 'string': + validator = z.string(); + break; + case 'number': + validator = z.number(); + break; + case 'boolean': + validator = z.boolean(); + break; + case 'integer': + validator = z.number().int(); + break; + case 'string[]': + validator = z.array(z.string()); + break; + case 'number[]': + validator = z.array(z.number()); + break; + default: { + // This provides compile-time exhaustiveness checking. + const _exhaustiveCheck: never = inputDef.type; + void _exhaustiveCheck; + throw new Error(`Unhandled agent input type: '${inputDef.type}'`); + } + } + + if (!inputDef.required) { + validator = validator.optional(); + } + + inputShape[key] = validator.describe(inputDef.description); + } + + // Cast required because Zod can't infer the discriminator from dynamic keys + return z.object( + inputShape, + ) as z.ZodDiscriminatedUnionOption<'agent_name'>; + }); + + // Create the discriminated union + // z.discriminatedUnion requires at least 2 options, so we handle the single agent case + if (agentSchemas.length === 1) { + schema = agentSchemas[0]; + } else { + schema = z.discriminatedUnion( + 'agent_name', + agentSchemas as [ + z.ZodDiscriminatedUnionOption<'agent_name'>, + z.ZodDiscriminatedUnionOption<'agent_name'>, + ...Array>, + ], + ); + } + } + + super( + DELEGATE_TO_AGENT_TOOL_NAME, + 'Delegate to Agent', + registry.getToolDescription(), + Kind.Think, + zodToJsonSchema(schema), + /* isOutputMarkdown */ true, + /* canUpdateOutput */ true, + messageBus, + ); + } + + protected createInvocation( + params: DelegateParams, + ): ToolInvocation { + return new DelegateInvocation( + params, + this.registry, + this.config, + this.messageBus, + ); + } +} + +class DelegateInvocation extends BaseToolInvocation< + DelegateParams, + ToolResult +> { + constructor( + params: DelegateParams, + private readonly registry: AgentRegistry, + private readonly config: Config, + messageBus?: MessageBus, + ) { + super(params, messageBus, DELEGATE_TO_AGENT_TOOL_NAME); + } + + getDescription(): string { + return `Delegating to agent '${this.params.agent_name}'`; + } + + async execute( + signal: AbortSignal, + updateOutput?: (output: string | AnsiOutput) => void, + ): Promise { + const definition = this.registry.getDefinition(this.params.agent_name); + if (!definition) { + throw new Error( + `Agent '${this.params.agent_name}' exists in the tool definition but could not be found in the registry.`, + ); + } + + // Extract arguments (everything except agent_name) + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { agent_name, ...agentArgs } = this.params; + + // Instantiate the Subagent Loop + const subagentInvocation = new SubagentInvocation( + agentArgs as AgentInputs, + definition, + this.config, + this.messageBus, + ); + + return subagentInvocation.execute(signal, updateOutput); + } +} diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index 4df0b206a9..8060815ac3 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -237,4 +237,33 @@ describe('AgentRegistry', () => { ); }); }); + describe('getToolDescription', () => { + it('should return default message when no agents are registered', () => { + expect(registry.getToolDescription()).toContain( + 'No agents are currently available', + ); + }); + + it('should return formatted list of agents when agents are available', () => { + registry.testRegisterAgent(MOCK_AGENT_V1); + registry.testRegisterAgent({ + ...MOCK_AGENT_V2, + name: 'AnotherAgent', + description: 'Another agent description', + }); + + const description = registry.getToolDescription(); + + expect(description).toContain( + 'Delegates a task to a specialized sub-agent', + ); + expect(description).toContain('Available agents:'); + expect(description).toContain( + `- **${MOCK_AGENT_V1.name}**: ${MOCK_AGENT_V1.description}`, + ); + expect(description).toContain( + `- **AnotherAgent**: Another agent description`, + ); + }); + }); }); diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index c7d7c65fae..927b14c8cc 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -150,4 +150,50 @@ export class AgentRegistry { getAllDefinitions(): AgentDefinition[] { return Array.from(this.agents.values()); } + + /** + * Returns a list of all registered agent names. + */ + getAllAgentNames(): string[] { + return Array.from(this.agents.keys()); + } + + /** + * Generates a description for the delegate_to_agent tool. + * Unlike getDirectoryContext() which is for system prompts, + * this is formatted for tool descriptions. + */ + getToolDescription(): string { + if (this.agents.size === 0) { + return 'Delegates a task to a specialized sub-agent. No agents are currently available.'; + } + + const agentDescriptions = Array.from(this.agents.entries()) + .map(([name, def]) => `- **${name}**: ${def.description}`) + .join('\n'); + + return `Delegates a task to a specialized sub-agent. + +Available agents: +${agentDescriptions}`; + } + + /** + * Generates a markdown "Phone Book" of available agents and their schemas. + * This MUST be injected into the System Prompt of the parent agent. + */ + getDirectoryContext(): string { + if (this.agents.size === 0) { + return 'No sub-agents are currently available.'; + } + + let context = '## Available Sub-Agents\n'; + context += + 'Use `delegate_to_agent` for complex tasks requiring specialized analysis.\n\n'; + + for (const [name, def] of this.agents.entries()) { + context += `- **${name}**: ${def.description}\n`; + } + return context; + } } diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 554cee04bf..ea2f8b40ac 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -138,8 +138,12 @@ vi.mock('../agents/registry.js', () => { return { AgentRegistry: AgentRegistryMock }; }); -vi.mock('../agents/subagent-tool-wrapper.js', () => ({ - SubagentToolWrapper: vi.fn(), +vi.mock('../agents/delegate-to-agent-tool.js', () => ({ + DelegateToAgentTool: vi.fn(), +})); + +vi.mock('../resources/resource-registry.js', () => ({ + ResourceRegistry: vi.fn(), })); const mockCoreEvents = vi.hoisted(() => ({ @@ -842,11 +846,11 @@ describe('Server Config (config.ts)', () => { mockAgentDefinition, ); - const SubagentToolWrapperMock = ( - (await vi.importMock('../agents/subagent-tool-wrapper.js')) as { - SubagentToolWrapper: Mock; + const DelegateToAgentToolMock = ( + (await vi.importMock('../agents/delegate-to-agent-tool.js')) as { + DelegateToAgentTool: Mock; } - ).SubagentToolWrapper; + ).DelegateToAgentTool; await config.initialize(); @@ -856,16 +860,16 @@ describe('Server Config (config.ts)', () => { } ).ToolRegistry.prototype.registerTool; - expect(SubagentToolWrapperMock).toHaveBeenCalledTimes(1); - expect(SubagentToolWrapperMock).toHaveBeenCalledWith( - mockAgentDefinition, + expect(DelegateToAgentToolMock).toHaveBeenCalledTimes(1); + expect(DelegateToAgentToolMock).toHaveBeenCalledWith( + expect.anything(), // AgentRegistry config, undefined, ); const calls = registerToolMock.mock.calls; const registeredWrappers = calls.filter( - (call) => call[0] instanceof SubagentToolWrapperMock, + (call) => call[0] instanceof DelegateToAgentToolMock, ); expect(registeredWrappers).toHaveLength(1); }); @@ -877,15 +881,15 @@ describe('Server Config (config.ts)', () => { }; const config = new Config(params); - const SubagentToolWrapperMock = ( - (await vi.importMock('../agents/subagent-tool-wrapper.js')) as { - SubagentToolWrapper: Mock; + const DelegateToAgentToolMock = ( + (await vi.importMock('../agents/delegate-to-agent-tool.js')) as { + DelegateToAgentTool: Mock; } - ).SubagentToolWrapper; + ).DelegateToAgentTool; await config.initialize(); - expect(SubagentToolWrapperMock).not.toHaveBeenCalled(); + expect(DelegateToAgentToolMock).not.toHaveBeenCalled(); }); it('should not set default codebase investigator model in config (defaults in registry)', () => { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 7b033b5037..4c0c26647e 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -84,7 +84,8 @@ import { getCodeAssistServer } from '../code_assist/codeAssist.js'; import type { Experiments } from '../code_assist/experiments/experiments.js'; import { AgentRegistry } from '../agents/registry.js'; import { setGlobalProxy } from '../utils/fetch.js'; -import { SubagentToolWrapper } from '../agents/subagent-tool-wrapper.js'; +import { DelegateToAgentTool } from '../agents/delegate-to-agent-tool.js'; +import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js'; import { getExperiments } from '../code_assist/experiments/experiments.js'; import { ExperimentFlags } from '../code_assist/experiments/flagNames.js'; import { debugLogger } from '../utils/debugLogger.js'; @@ -1571,26 +1572,24 @@ export class Config { } // Register Subagents as Tools - if (this.getCodebaseInvestigatorSettings().enabled) { - const definition = this.agentRegistry.getDefinition( - 'codebase_investigator', - ); - if (definition) { - // We must respect the main allowed/exclude lists for agents too. - const allowedTools = this.getAllowedTools(); + // Register DelegateToAgentTool if agents are enabled + if ( + this.isAgentsEnabled() || + this.getCodebaseInvestigatorSettings().enabled + ) { + // Check if the delegate tool itself is allowed (if allowedTools is set) + const allowedTools = this.getAllowedTools(); + const isAllowed = + !allowedTools || allowedTools.includes(DELEGATE_TO_AGENT_TOOL_NAME); - const isAllowed = - !allowedTools || allowedTools.includes(definition.name); - - if (isAllowed) { - const messageBusEnabled = this.getEnableMessageBusIntegration(); - const wrapper = new SubagentToolWrapper( - definition, - this, - messageBusEnabled ? this.getMessageBus() : undefined, - ); - registry.registerTool(wrapper); - } + if (isAllowed) { + const messageBusEnabled = this.getEnableMessageBusIntegration(); + const delegateTool = new DelegateToAgentTool( + this.agentRegistry, + this, + messageBusEnabled ? this.getMessageBus() : undefined, + ); + registry.registerTool(delegateTool); } } diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 8469813d7c..48d5c00655 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -15,6 +15,8 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -117,6 +119,8 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -214,6 +218,8 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -326,6 +332,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -423,6 +431,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -520,6 +530,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -617,6 +629,8 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -714,6 +728,8 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -811,6 +827,8 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks @@ -909,6 +927,8 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. - **Do not call tools in silence:** You must provide to the user very short and concise natural explanation (one sentence) before calling tools. +Mock Agent Directory + # Primary Workflows ## Software Engineering Tasks diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index bb12eaaf31..d5fb0a28f3 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -69,6 +69,9 @@ describe('Core System Prompt (prompts.ts)', () => { getModel: vi.fn().mockReturnValue('auto'), getPreviewFeatures: vi.fn().mockReturnValue(false), isInFallbackMode: vi.fn().mockReturnValue(false), + getAgentRegistry: vi.fn().mockReturnValue({ + getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'), + }), } as unknown as Config; vi.mocked(getEffectiveModel).mockReturnValue(DEFAULT_GEMINI_MODEL); }); @@ -162,22 +165,23 @@ describe('Core System Prompt (prompts.ts)', () => { getModel: vi.fn().mockReturnValue('auto'), getPreviewFeatures: vi.fn().mockReturnValue(false), isInFallbackMode: vi.fn().mockReturnValue(false), + getAgentRegistry: vi.fn().mockReturnValue({ + getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'), + }), } as unknown as Config; const prompt = getCoreSystemPrompt(testConfig); if (expectCodebaseInvestigator) { expect(prompt).toContain( - `your **first and primary tool** must be '${CodebaseInvestigatorAgent.name}'`, - ); - expect(prompt).toContain( - `do not ignore the output of '${CodebaseInvestigatorAgent.name}'`, + `your **first and primary action** must be to delegate to the '${CodebaseInvestigatorAgent.name}' agent`, ); + expect(prompt).toContain(`do not ignore the output of the agent`); expect(prompt).not.toContain( "Use 'search_file_content' and 'glob' search tools extensively", ); } else { expect(prompt).not.toContain( - `your **first and primary tool** must be '${CodebaseInvestigatorAgent.name}'`, + `your **first and primary action** must be to delegate to the '${CodebaseInvestigatorAgent.name}' agent`, ); expect(prompt).toContain( "Use 'search_file_content' and 'glob' search tools extensively", diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index ef778410bc..4a6c74b876 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -16,6 +16,7 @@ import { SHELL_TOOL_NAME, WRITE_FILE_TOOL_NAME, WRITE_TODOS_TOOL_NAME, + DELEGATE_TO_AGENT_TOOL_NAME, } from '../tools/tool-names.js'; import process from 'node:process'; import { isGitRepository } from '../utils/gitUtils.js'; @@ -152,7 +153,9 @@ export function getCoreSystemPrompt( ? ` - **Continue the work** You are not to interact with the user. Do your best to complete the task at hand, using your best judgement and avoid asking user for any additional information.` : '' - }`, + } + +${config.getAgentRegistry().getDirectoryContext()}`, primaryWorkflows_prefix: ` # Primary Workflows @@ -167,16 +170,16 @@ Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary tool** must be '${CodebaseInvestigatorAgent.name}'. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly. -2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If '${CodebaseInvestigatorAgent.name}' was used, do not ignore the output of '${CodebaseInvestigatorAgent.name}', you must use it as the foundation of your plan. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, +1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the '${CodebaseInvestigatorAgent.name}' agent using the '${DELEGATE_TO_AGENT_TOOL_NAME}' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly. +2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If '${CodebaseInvestigatorAgent.name}' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, primaryWorkflows_prefix_ci_todo: ` # Primary Workflows ## Software Engineering Tasks When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: -1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary tool** must be '${CodebaseInvestigatorAgent.name}'. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly. -2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If '${CodebaseInvestigatorAgent.name}' was used, do not ignore the output of '${CodebaseInvestigatorAgent.name}', you must use it as the foundation of your plan. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, +1. **Understand & Strategize:** Think about the user's request and the relevant codebase context. When the task involves **complex refactoring, codebase exploration or system-wide analysis**, your **first and primary action** must be to delegate to the '${CodebaseInvestigatorAgent.name}' agent using the '${DELEGATE_TO_AGENT_TOOL_NAME}' tool. Use it to build a comprehensive understanding of the code, its structure, and dependencies. For **simple, targeted searches** (like finding a specific function name, file path, or variable declaration), you should use '${GREP_TOOL_NAME}' or '${GLOB_TOOL_NAME}' directly. +2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. If '${CodebaseInvestigatorAgent.name}' was used, do not ignore the output of the agent, you must use it as the foundation of your plan. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`, primaryWorkflows_todo: ` # Primary Workflows diff --git a/packages/core/src/policy/policies/agent.toml b/packages/core/src/policy/policies/agent.toml new file mode 100644 index 0000000000..58876246bf --- /dev/null +++ b/packages/core/src/policy/policies/agent.toml @@ -0,0 +1,31 @@ +# Priority system for policy rules: +# - Higher priority numbers win over lower priority numbers +# - When multiple rules match, the highest priority rule is applied +# - Rules are evaluated in order of priority (highest first) +# +# Priority bands (tiers): +# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100) +# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100) +# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100) +# +# This ensures Admin > User > Default hierarchy is always preserved, +# while allowing user-specified priorities to work within each tier. +# +# Settings-based and dynamic rules (all in user tier 2.x): +# 2.95: Tools that the user has selected as "Always Allow" in the interactive UI +# 2.9: MCP servers excluded list (security: persistent server blocks) +# 2.4: Command line flag --exclude-tools (explicit temporary blocks) +# 2.3: Command line flag --allowed-tools (explicit temporary allows) +# 2.2: MCP servers with trust=true (persistent trusted servers) +# 2.1: MCP servers allowed list (persistent general server allows) +# +# TOML policy priorities (before transformation): +# 10: Write tools default to ASK_USER (becomes 1.010 in default tier) +# 15: Auto-edit tool override (becomes 1.015 in default tier) +# 50: Read-only tools (becomes 1.050 in default tier) +# 999: YOLO mode allow-all (becomes 1.999 in default tier) + +[[rule]] +toolName = "delegate_to_agent" +decision = "allow" +priority = 50 diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index ec8ebb6d80..b76b429000 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -21,3 +21,4 @@ export const READ_FILE_TOOL_NAME = 'read_file'; export const LS_TOOL_NAME = 'list_directory'; export const MEMORY_TOOL_NAME = 'save_memory'; export const EDIT_TOOL_NAMES = new Set([EDIT_TOOL_NAME, WRITE_FILE_TOOL_NAME]); +export const DELEGATE_TO_AGENT_TOOL_NAME = 'delegate_to_agent';