From a15978593a3d39e9b3b88d353fc88c53a87acdb8 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 16 Jan 2026 16:51:10 +0000 Subject: [PATCH] Steer outer agent to use expert subagents when present (#16763) --- evals/README.md | 7 +++ evals/subagents.eval.ts | 64 ++++++++++++++++++++++ evals/test-helper.ts | 33 ++++++++++- packages/core/src/agents/local-executor.ts | 6 +- packages/core/src/agents/registry.ts | 16 +++++- packages/core/src/index.ts | 1 + 6 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 evals/subagents.eval.ts diff --git a/evals/README.md b/evals/README.md index 891a9549f5..962f54886c 100644 --- a/evals/README.md +++ b/evals/README.md @@ -88,6 +88,13 @@ describe('my_feature', () => { ## Running Evaluations +First, build the bundled Gemini CLI. You must do this after every code change. + +```bash +npm run build +npm run bundle +``` + ### Always Passing Evals To run the evaluations that are expected to always pass (CI safe): diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts new file mode 100644 index 0000000000..d0c77d4fe7 --- /dev/null +++ b/evals/subagents.eval.ts @@ -0,0 +1,64 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe } from 'vitest'; +import { evalTest } from './test-helper.js'; + +const AGENT_DEFINITION = `--- +name: docs-agent +description: An agent with expertise in updating documentation. +tools: + - read_file + - write_file +--- + +You are the docs agent. Update the documentation. +`; + +const INDEX_TS = 'export const add = (a: number, b: number) => a + b;'; + +describe('subagent eval test cases', () => { + /** + * Checks whether the outer agent reliably utilizes an expert subagent to + * accomplish a task when one is available. + * + * Note that the test is intentionally crafted to avoid the word "document" + * or "docs". We want to see the outer agent make the connection even when + * the prompt indirectly implies need of expertise. + * + * This tests the system prompt's subagent specific clauses. + */ + evalTest('ALWAYS_PASSES', { + name: 'should delegate to user provided agent with relevant expertise', + params: { + settings: { + experimental: { + enableAgents: true, + }, + }, + }, + prompt: 'Please update README.md with a description of this library.', + files: { + '.gemini/agents/test-agent.md': AGENT_DEFINITION, + 'index.ts': INDEX_TS, + 'README.md': 'TODO: update the README.', + }, + assert: async (rig, _result) => { + await rig.expectToolCallSuccess( + ['delegate_to_agent'], + undefined, + (args) => { + try { + const parsed = JSON.parse(args); + return parsed.agent_name === 'docs-agent'; + } catch { + return false; + } + }, + ); + }, + }); +}); diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 9801d2307b..7fc9589986 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -6,7 +6,10 @@ import { it } from 'vitest'; import fs from 'node:fs'; +import path from 'node:path'; +import { execSync } from 'node:child_process'; import { TestRig } from '@google/gemini-cli-test-utils'; +import { createUnauthorizedToolError } from '@google/gemini-cli-core'; export * from '@google/gemini-cli-test-utils'; @@ -32,8 +35,33 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { const fn = async () => { const rig = new TestRig(); try { - await rig.setup(evalCase.name, evalCase.params); + rig.setup(evalCase.name, evalCase.params); + + if (evalCase.files) { + for (const [filePath, content] of Object.entries(evalCase.files)) { + const fullPath = path.join(rig.testDir!, filePath); + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); + fs.writeFileSync(fullPath, content); + } + + const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const }; + execSync('git init', execOptions); + execSync('git config user.email "test@example.com"', execOptions); + execSync('git config user.name "Test User"', execOptions); + execSync('git add .', execOptions); + execSync('git commit --allow-empty -m "Initial commit"', execOptions); + } + const result = await rig.run({ args: evalCase.prompt }); + + const unauthorizedErrorPrefix = + createUnauthorizedToolError('').split("'")[0]; + if (result.includes(unauthorizedErrorPrefix)) { + throw new Error( + 'Test failed due to unauthorized tool call in output: ' + result, + ); + } + await evalCase.assert(rig, result); } finally { await logToFile( @@ -44,7 +72,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { } }; - if (policy === 'USUALLY_PASSES' && !process.env.RUN_EVALS) { + if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) { it.skip(evalCase.name, fn); } else { it(evalCase.name, fn); @@ -55,6 +83,7 @@ export interface EvalCase { name: string; params?: Record; prompt: string; + files?: Record; assert: (rig: TestRig, result: string) => Promise; } diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index fa5b4701c6..8859b72385 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -68,6 +68,10 @@ type AgentTurnResult = finalResult: string | null; }; +export function createUnauthorizedToolError(toolName: string): string { + return `Unauthorized tool call: '${toolName}' is not available to this agent.`; +} + /** * Executes an agent loop based on an {@link AgentDefinition}. * @@ -883,7 +887,7 @@ export class LocalAgentExecutor { // Handle standard tools if (!allowedToolNames.has(functionCall.name as string)) { - const error = `Unauthorized tool call: '${functionCall.name}' is not available to this agent.`; + const error = createUnauthorizedToolError(functionCall.name as string); debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`); diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index 4e042ab711..dd7a7d04fd 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -26,6 +26,7 @@ import { type ModelConfig, ModelConfigService, } from '../services/modelConfigService.js'; +import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js'; /** * Returns the model config alias for a given agent definition. @@ -434,8 +435,19 @@ export class AgentRegistry { } let context = '## Available Sub-Agents\n'; - context += - 'Use `delegate_to_agent` for complex tasks requiring specialized analysis.\n\n'; + context += `Sub-agents are specialized expert agents that you can use to assist you in + the completion of all or part of a task. + + ALWAYS use \`${DELEGATE_TO_AGENT_TOOL_NAME}\` to delegate to a subagent if one + exists that has expertise relevant to your task. + + For example: + - Prompt: 'Fix test', Description: 'An agent with expertise in fixing tests.' -> should use the sub-agent. + - Prompt: 'Update the license header', Description: 'An agent with expertise in licensing and copyright.' -> should use the sub-agent. + - Prompt: 'Diagram the architecture of the codebase', Description: 'Agent with architecture experience'. -> should use the sub-agent. + - Prompt: 'Implement a fix for [bug]' -> Should decompose the project into subtasks, which may utilize available agents like 'plan', 'validate', and 'fix-tests'. + + The following are the available sub-agents:\n\n`; for (const [name, def] of this.agents) { context += `- **${name}**: ${def.description}\n`; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a42ea862f2..506e602ebf 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -127,6 +127,7 @@ export * from './prompts/mcp-prompts.js'; // Export agent definitions export * from './agents/types.js'; export * from './agents/agentLoader.js'; +export * from './agents/local-executor.js'; // Export specific tool logic export * from './tools/read-file.js';