From a15978593a3d39e9b3b88d353fc88c53a87acdb8 Mon Sep 17 00:00:00 2001
From: Christian Gunderman <gundermanc@gmail.com>
Date: Fri, 16 Jan 2026 16:51:10 +0000
Subject: [PATCH] Steer outer agent to use expert subagents when present
 (#16763)

---
 evals/README.md                            |  7 +++
 evals/subagents.eval.ts                    | 64 ++++++++++++++++++++++
 evals/test-helper.ts                       | 33 ++++++++++-
 packages/core/src/agents/local-executor.ts |  6 +-
 packages/core/src/agents/registry.ts       | 16 +++++-
 packages/core/src/index.ts                 |  1 +
 6 files changed, 122 insertions(+), 5 deletions(-)
 create mode 100644 evals/subagents.eval.ts

diff --git a/evals/README.md b/evals/README.md
index 891a9549f5..962f54886c 100644
--- a/evals/README.md
+++ b/evals/README.md
@@ -88,6 +88,13 @@ describe('my_feature', () => {
 
 ## Running Evaluations
 
+First, build the bundled Gemini CLI. You must do this after every code change.
+
+```bash
+npm run build
+npm run bundle
+```
+
 ### Always Passing Evals
 
 To run the evaluations that are expected to always pass (CI safe):
diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts
new file mode 100644
index 0000000000..d0c77d4fe7
--- /dev/null
+++ b/evals/subagents.eval.ts
@@ -0,0 +1,64 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe } from 'vitest';
+import { evalTest } from './test-helper.js';
+
+const AGENT_DEFINITION = `---
+name: docs-agent
+description: An agent with expertise in updating documentation.
+tools:
+  - read_file
+  - write_file
+---
+
+You are the docs agent. Update the documentation.
+`;
+
+const INDEX_TS = 'export const add = (a: number, b: number) => a + b;';
+
+describe('subagent eval test cases', () => {
+  /**
+   * Checks whether the outer agent reliably utilizes an expert subagent to
+   * accomplish a task when one is available.
+   *
+   * Note that the test is intentionally crafted to avoid the word "document"
+   * or "docs". We want to see the outer agent make the connection even when
+   * the prompt indirectly implies need of expertise.
+   *
+   * This tests the system prompt's subagent specific clauses.
+   */
+  evalTest('ALWAYS_PASSES', {
+    name: 'should delegate to user provided agent with relevant expertise',
+    params: {
+      settings: {
+        experimental: {
+          enableAgents: true,
+        },
+      },
+    },
+    prompt: 'Please update README.md with a description of this library.',
+    files: {
+      '.gemini/agents/test-agent.md': AGENT_DEFINITION,
+      'index.ts': INDEX_TS,
+      'README.md': 'TODO: update the README.',
+    },
+    assert: async (rig, _result) => {
+      await rig.expectToolCallSuccess(
+        ['delegate_to_agent'],
+        undefined,
+        (args) => {
+          try {
+            const parsed = JSON.parse(args);
+            return parsed.agent_name === 'docs-agent';
+          } catch {
+            return false;
+          }
+        },
+      );
+    },
+  });
+});
diff --git a/evals/test-helper.ts b/evals/test-helper.ts
index 9801d2307b..7fc9589986 100644
--- a/evals/test-helper.ts
+++ b/evals/test-helper.ts
@@ -6,7 +6,10 @@
 
 import { it } from 'vitest';
 import fs from 'node:fs';
+import path from 'node:path';
+import { execSync } from 'node:child_process';
 import { TestRig } from '@google/gemini-cli-test-utils';
+import { createUnauthorizedToolError } from '@google/gemini-cli-core';
 
 export * from '@google/gemini-cli-test-utils';
 
@@ -32,8 +35,33 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
   const fn = async () => {
     const rig = new TestRig();
     try {
-      await rig.setup(evalCase.name, evalCase.params);
+      rig.setup(evalCase.name, evalCase.params);
+
+      if (evalCase.files) {
+        for (const [filePath, content] of Object.entries(evalCase.files)) {
+          const fullPath = path.join(rig.testDir!, filePath);
+          fs.mkdirSync(path.dirname(fullPath), { recursive: true });
+          fs.writeFileSync(fullPath, content);
+        }
+
+        const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
+        execSync('git init', execOptions);
+        execSync('git config user.email "test@example.com"', execOptions);
+        execSync('git config user.name "Test User"', execOptions);
+        execSync('git add .', execOptions);
+        execSync('git commit --allow-empty -m "Initial commit"', execOptions);
+      }
+
       const result = await rig.run({ args: evalCase.prompt });
+
+      const unauthorizedErrorPrefix =
+        createUnauthorizedToolError('').split("'")[0];
+      if (result.includes(unauthorizedErrorPrefix)) {
+        throw new Error(
+          'Test failed due to unauthorized tool call in output: ' + result,
+        );
+      }
+
       await evalCase.assert(rig, result);
     } finally {
       await logToFile(
@@ -44,7 +72,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
     }
   };
 
-  if (policy === 'USUALLY_PASSES' && !process.env.RUN_EVALS) {
+  if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
     it.skip(evalCase.name, fn);
   } else {
     it(evalCase.name, fn);
@@ -55,6 +83,7 @@ export interface EvalCase {
   name: string;
   params?: Record<string, any>;
   prompt: string;
+  files?: Record<string, string>;
   assert: (rig: TestRig, result: string) => Promise<void>;
 }
 
diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts
index fa5b4701c6..8859b72385 100644
--- a/packages/core/src/agents/local-executor.ts
+++ b/packages/core/src/agents/local-executor.ts
@@ -68,6 +68,10 @@ type AgentTurnResult =
       finalResult: string | null;
     };
 
+export function createUnauthorizedToolError(toolName: string): string {
+  return `Unauthorized tool call: '${toolName}' is not available to this agent.`;
+}
+
 /**
  * Executes an agent loop based on an {@link AgentDefinition}.
  *
@@ -883,7 +887,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
 
       // Handle standard tools
       if (!allowedToolNames.has(functionCall.name as string)) {
-        const error = `Unauthorized tool call: '${functionCall.name}' is not available to this agent.`;
+        const error = createUnauthorizedToolError(functionCall.name as string);
 
         debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`);
 
diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts
index 4e042ab711..dd7a7d04fd 100644
--- a/packages/core/src/agents/registry.ts
+++ b/packages/core/src/agents/registry.ts
@@ -26,6 +26,7 @@ import {
   type ModelConfig,
   ModelConfigService,
 } from '../services/modelConfigService.js';
+import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js';
 
 /**
  * Returns the model config alias for a given agent definition.
@@ -434,8 +435,19 @@ export class AgentRegistry {
     }
 
     let context = '## Available Sub-Agents\n';
-    context +=
-      'Use `delegate_to_agent` for complex tasks requiring specialized analysis.\n\n';
+    context += `Sub-agents are specialized expert agents that you can use to assist you in
+      the completion of all or part of a task.
+
+      ALWAYS use \`${DELEGATE_TO_AGENT_TOOL_NAME}\` to delegate to a subagent if one
+      exists that has expertise relevant to your task.
+
+      For example:
+      - Prompt: 'Fix test', Description: 'An agent with expertise in fixing tests.' -> should use the sub-agent.
+      - Prompt: 'Update the license header', Description: 'An agent with expertise in licensing and copyright.' -> should use the sub-agent.
+      - Prompt: 'Diagram the architecture of the codebase', Description: 'Agent with architecture experience'. -> should use the sub-agent.
+      - Prompt: 'Implement a fix for [bug]' -> Should decompose the project into subtasks, which may utilize available agents like 'plan', 'validate', and 'fix-tests'.
+
+      The following are the available sub-agents:\n\n`;
 
     for (const [name, def] of this.agents) {
       context += `- **${name}**: ${def.description}\n`;
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index a42ea862f2..506e602ebf 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -127,6 +127,7 @@ export * from './prompts/mcp-prompts.js';
 // Export agent definitions
 export * from './agents/types.js';
 export * from './agents/agentLoader.js';
+export * from './agents/local-executor.js';
 
 // Export specific tool logic
 export * from './tools/read-file.js';