mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
Steer outer agent to use expert subagents when present (#16763)
This commit is contained in:
committed by
GitHub
parent
4bb817de22
commit
a15978593a
@@ -88,6 +88,13 @@ describe('my_feature', () => {
|
||||
|
||||
## Running Evaluations
|
||||
|
||||
First, build the bundled Gemini CLI. You must do this after every code change.
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
### Always Passing Evals
|
||||
|
||||
To run the evaluations that are expected to always pass (CI safe):
|
||||
|
||||
64
evals/subagents.eval.ts
Normal file
64
evals/subagents.eval.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
|
||||
const AGENT_DEFINITION = `---
|
||||
name: docs-agent
|
||||
description: An agent with expertise in updating documentation.
|
||||
tools:
|
||||
- read_file
|
||||
- write_file
|
||||
---
|
||||
|
||||
You are the docs agent. Update the documentation.
|
||||
`;
|
||||
|
||||
const INDEX_TS = 'export const add = (a: number, b: number) => a + b;';
|
||||
|
||||
describe('subagent eval test cases', () => {
|
||||
/**
|
||||
* Checks whether the outer agent reliably utilizes an expert subagent to
|
||||
* accomplish a task when one is available.
|
||||
*
|
||||
* Note that the test is intentionally crafted to avoid the word "document"
|
||||
* or "docs". We want to see the outer agent make the connection even when
|
||||
* the prompt indirectly implies need of expertise.
|
||||
*
|
||||
* This tests the system prompt's subagent specific clauses.
|
||||
*/
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should delegate to user provided agent with relevant expertise',
|
||||
params: {
|
||||
settings: {
|
||||
experimental: {
|
||||
enableAgents: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
prompt: 'Please update README.md with a description of this library.',
|
||||
files: {
|
||||
'.gemini/agents/test-agent.md': AGENT_DEFINITION,
|
||||
'index.ts': INDEX_TS,
|
||||
'README.md': 'TODO: update the README.',
|
||||
},
|
||||
assert: async (rig, _result) => {
|
||||
await rig.expectToolCallSuccess(
|
||||
['delegate_to_agent'],
|
||||
undefined,
|
||||
(args) => {
|
||||
try {
|
||||
const parsed = JSON.parse(args);
|
||||
return parsed.agent_name === 'docs-agent';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
);
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -6,7 +6,10 @@
|
||||
|
||||
import { it } from 'vitest';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { TestRig } from '@google/gemini-cli-test-utils';
|
||||
import { createUnauthorizedToolError } from '@google/gemini-cli-core';
|
||||
|
||||
export * from '@google/gemini-cli-test-utils';
|
||||
|
||||
@@ -32,8 +35,33 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
const fn = async () => {
|
||||
const rig = new TestRig();
|
||||
try {
|
||||
await rig.setup(evalCase.name, evalCase.params);
|
||||
rig.setup(evalCase.name, evalCase.params);
|
||||
|
||||
if (evalCase.files) {
|
||||
for (const [filePath, content] of Object.entries(evalCase.files)) {
|
||||
const fullPath = path.join(rig.testDir!, filePath);
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
fs.writeFileSync(fullPath, content);
|
||||
}
|
||||
|
||||
const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const };
|
||||
execSync('git init', execOptions);
|
||||
execSync('git config user.email "test@example.com"', execOptions);
|
||||
execSync('git config user.name "Test User"', execOptions);
|
||||
execSync('git add .', execOptions);
|
||||
execSync('git commit --allow-empty -m "Initial commit"', execOptions);
|
||||
}
|
||||
|
||||
const result = await rig.run({ args: evalCase.prompt });
|
||||
|
||||
const unauthorizedErrorPrefix =
|
||||
createUnauthorizedToolError('').split("'")[0];
|
||||
if (result.includes(unauthorizedErrorPrefix)) {
|
||||
throw new Error(
|
||||
'Test failed due to unauthorized tool call in output: ' + result,
|
||||
);
|
||||
}
|
||||
|
||||
await evalCase.assert(rig, result);
|
||||
} finally {
|
||||
await logToFile(
|
||||
@@ -44,7 +72,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
}
|
||||
};
|
||||
|
||||
if (policy === 'USUALLY_PASSES' && !process.env.RUN_EVALS) {
|
||||
if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
|
||||
it.skip(evalCase.name, fn);
|
||||
} else {
|
||||
it(evalCase.name, fn);
|
||||
@@ -55,6 +83,7 @@ export interface EvalCase {
|
||||
name: string;
|
||||
params?: Record<string, any>;
|
||||
prompt: string;
|
||||
files?: Record<string, string>;
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,10 @@ type AgentTurnResult =
|
||||
finalResult: string | null;
|
||||
};
|
||||
|
||||
export function createUnauthorizedToolError(toolName: string): string {
|
||||
return `Unauthorized tool call: '${toolName}' is not available to this agent.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes an agent loop based on an {@link AgentDefinition}.
|
||||
*
|
||||
@@ -883,7 +887,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
|
||||
// Handle standard tools
|
||||
if (!allowedToolNames.has(functionCall.name as string)) {
|
||||
const error = `Unauthorized tool call: '${functionCall.name}' is not available to this agent.`;
|
||||
const error = createUnauthorizedToolError(functionCall.name as string);
|
||||
|
||||
debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`);
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
type ModelConfig,
|
||||
ModelConfigService,
|
||||
} from '../services/modelConfigService.js';
|
||||
import { DELEGATE_TO_AGENT_TOOL_NAME } from '../tools/tool-names.js';
|
||||
|
||||
/**
|
||||
* Returns the model config alias for a given agent definition.
|
||||
@@ -434,8 +435,19 @@ export class AgentRegistry {
|
||||
}
|
||||
|
||||
let context = '## Available Sub-Agents\n';
|
||||
context +=
|
||||
'Use `delegate_to_agent` for complex tasks requiring specialized analysis.\n\n';
|
||||
context += `Sub-agents are specialized expert agents that you can use to assist you in
|
||||
the completion of all or part of a task.
|
||||
|
||||
ALWAYS use \`${DELEGATE_TO_AGENT_TOOL_NAME}\` to delegate to a subagent if one
|
||||
exists that has expertise relevant to your task.
|
||||
|
||||
For example:
|
||||
- Prompt: 'Fix test', Description: 'An agent with expertise in fixing tests.' -> should use the sub-agent.
|
||||
- Prompt: 'Update the license header', Description: 'An agent with expertise in licensing and copyright.' -> should use the sub-agent.
|
||||
- Prompt: 'Diagram the architecture of the codebase', Description: 'Agent with architecture experience'. -> should use the sub-agent.
|
||||
- Prompt: 'Implement a fix for [bug]' -> Should decompose the project into subtasks, which may utilize available agents like 'plan', 'validate', and 'fix-tests'.
|
||||
|
||||
The following are the available sub-agents:\n\n`;
|
||||
|
||||
for (const [name, def] of this.agents) {
|
||||
context += `- **${name}**: ${def.description}\n`;
|
||||
|
||||
@@ -127,6 +127,7 @@ export * from './prompts/mcp-prompts.js';
|
||||
// Export agent definitions
|
||||
export * from './agents/types.js';
|
||||
export * from './agents/agentLoader.js';
|
||||
export * from './agents/local-executor.js';
|
||||
|
||||
// Export specific tool logic
|
||||
export * from './tools/read-file.js';
|
||||
|
||||
Reference in New Issue
Block a user