feat(evals): centralize test agents into test-utils for reuse (#23616)

Co-authored-by: Samee Zahid <sameez@google.com>
This commit is contained in:
Samee Zahid
2026-03-24 12:50:48 -07:00
committed by GitHub
parent bf80e27dbc
commit 84f40768a1
3 changed files with 91 additions and 33 deletions
+17 -32
View File
@@ -9,27 +9,7 @@ import path from 'node:path';
import { describe, expect } from 'vitest'; import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js'; import { evalTest, TEST_AGENTS } from './test-helper.js';
const DOCS_AGENT_DEFINITION = `---
name: docs-agent
description: An agent with expertise in updating documentation.
tools:
- read_file
- write_file
---
You are the docs agent. Update documentation clearly and accurately.
`;
const TEST_AGENT_DEFINITION = `---
name: test-agent
description: An agent with expertise in writing and updating tests.
tools:
- read_file
- write_file
---
You are the test agent. Add or update tests.
`;
const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n'; const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n';
@@ -62,12 +42,12 @@ describe('subagent eval test cases', () => {
}, },
prompt: 'Please update README.md with a description of this library.', prompt: 'Please update README.md with a description of this library.',
files: { files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, ...TEST_AGENTS.DOCS_AGENT.asFile(),
'index.ts': INDEX_TS, 'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.\n', 'README.md': 'TODO: update the README.\n',
}, },
assert: async (rig, _result) => { assert: async (rig, _result) => {
await rig.expectToolCallSuccess(['docs-agent']); await rig.expectToolCallSuccess([TEST_AGENTS.DOCS_AGENT.name]);
}, },
}); });
@@ -92,7 +72,7 @@ describe('subagent eval test cases', () => {
prompt: prompt:
'Rename the exported function in index.ts from add to sum and update the file directly.', 'Rename the exported function in index.ts from add to sum and update the file directly.',
files: { files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, ...TEST_AGENTS.DOCS_AGENT.asFile(),
'index.ts': INDEX_TS, 'index.ts': INDEX_TS,
}, },
assert: async (rig, _result) => { assert: async (rig, _result) => {
@@ -102,9 +82,11 @@ describe('subagent eval test cases', () => {
}>; }>;
expect(updatedIndex).toContain('export const sum ='); expect(updatedIndex).toContain('export const sum =');
expect(toolLogs.some((l) => l.toolRequest.name === 'docs-agent')).toBe( expect(
false, toolLogs.some(
); (l) => l.toolRequest.name === TEST_AGENTS.DOCS_AGENT.name,
),
).toBe(false);
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false, false,
); );
@@ -133,7 +115,7 @@ describe('subagent eval test cases', () => {
}, },
prompt: 'Please add a small test file that verifies add(1, 2) returns 3.', prompt: 'Please add a small test file that verifies add(1, 2) returns 3.',
files: { files: {
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION, ...TEST_AGENTS.TESTING_AGENT.asFile(),
'index.ts': INDEX_TS, 'index.ts': INDEX_TS,
'package.json': JSON.stringify( 'package.json': JSON.stringify(
{ {
@@ -150,7 +132,7 @@ describe('subagent eval test cases', () => {
toolRequest: { name: string }; toolRequest: { name: string };
}>; }>;
await rig.expectToolCallSuccess(['test-agent']); await rig.expectToolCallSuccess([TEST_AGENTS.TESTING_AGENT.name]);
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false, false,
); );
@@ -178,8 +160,8 @@ describe('subagent eval test cases', () => {
prompt: prompt:
'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.', 'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.',
files: { files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION, ...TEST_AGENTS.DOCS_AGENT.asFile(),
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION, ...TEST_AGENTS.TESTING_AGENT.asFile(),
'index.ts': INDEX_TS, 'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.\n', 'README.md': 'TODO: update the README.\n',
'package.json': JSON.stringify( 'package.json': JSON.stringify(
@@ -198,7 +180,10 @@ describe('subagent eval test cases', () => {
}>; }>;
const readme = readProjectFile(rig, 'README.md'); const readme = readProjectFile(rig, 'README.md');
await rig.expectToolCallSuccess(['docs-agent', 'test-agent']); await rig.expectToolCallSuccess([
TEST_AGENTS.DOCS_AGENT.name,
TEST_AGENTS.TESTING_AGENT.name,
]);
expect(readme).not.toContain('TODO: update the README.'); expect(readme).not.toContain('TODO: update the README.');
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false, false,
@@ -0,0 +1,72 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Represents a test agent used in evaluations and tests.
*/
export interface TestAgent {
/** The unique name of the agent. */
readonly name: string;
/** The full YAML/Markdown definition of the agent. */
readonly definition: string;
/** The standard path where this agent should be saved in a test project. */
readonly path: string;
/** A helper to spread this agent directly into a 'files' object for evalTest. */
readonly asFile: () => Record<string, string>;
}
/**
* Helper to create a TestAgent with consistent formatting and pathing.
*/
function createAgent(options: {
name: string;
description: string;
tools: string[];
body: string;
}): TestAgent {
const definition = `---
name: ${options.name}
description: ${options.description}
tools:
${options.tools.map((t) => ` - ${t}`).join('\n')}
---
${options.body}
`;
const path = `.gemini/agents/${options.name}.md`;
return {
name: options.name,
definition,
path,
asFile: () => ({ [path]: definition }),
};
}
/**
* A collection of predefined test agents for use in evaluations and tests.
*/
export const TEST_AGENTS = {
/**
* An agent with expertise in updating documentation.
*/
DOCS_AGENT: createAgent({
name: 'docs-agent',
description: 'An agent with expertise in updating documentation.',
tools: ['read_file', 'write_file'],
body: 'You are the docs agent. Update documentation clearly and accurately.',
}),
/**
* An agent with expertise in writing and updating tests.
*/
TESTING_AGENT: createAgent({
name: 'testing-agent',
description: 'An agent with expertise in writing and updating tests.',
tools: ['read_file', 'write_file'],
body: 'You are the test agent. Add or update tests.',
}),
} as const;
+2 -1
View File
@@ -5,6 +5,7 @@
*/ */
export * from './file-system-test-helpers.js'; export * from './file-system-test-helpers.js';
export * from './test-rig.js'; export * from './fixtures/agents.js';
export * from './mock-utils.js'; export * from './mock-utils.js';
export * from './test-mcp-server.js'; export * from './test-mcp-server.js';
export * from './test-rig.js';