feat(evals): centralize test agents into test-utils for reuse (#23616)

Co-authored-by: Samee Zahid <sameez@google.com>
This commit is contained in:
Samee Zahid
2026-03-24 12:50:48 -07:00
committed by GitHub
parent c223624a85
commit d2d6c9abe0
3 changed files with 91 additions and 33 deletions
+17 -32
View File
@@ -9,27 +9,7 @@ import path from 'node:path';
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
const DOCS_AGENT_DEFINITION = `---
name: docs-agent
description: An agent with expertise in updating documentation.
tools:
- read_file
- write_file
---
You are the docs agent. Update documentation clearly and accurately.
`;
const TEST_AGENT_DEFINITION = `---
name: test-agent
description: An agent with expertise in writing and updating tests.
tools:
- read_file
- write_file
---
You are the test agent. Add or update tests.
`;
import { evalTest, TEST_AGENTS } from './test-helper.js';
const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n';
@@ -62,12 +42,12 @@ describe('subagent eval test cases', () => {
},
prompt: 'Please update README.md with a description of this library.',
files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
...TEST_AGENTS.DOCS_AGENT.asFile(),
'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.\n',
},
assert: async (rig, _result) => {
await rig.expectToolCallSuccess(['docs-agent']);
await rig.expectToolCallSuccess([TEST_AGENTS.DOCS_AGENT.name]);
},
});
@@ -92,7 +72,7 @@ describe('subagent eval test cases', () => {
prompt:
'Rename the exported function in index.ts from add to sum and update the file directly.',
files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
...TEST_AGENTS.DOCS_AGENT.asFile(),
'index.ts': INDEX_TS,
},
assert: async (rig, _result) => {
@@ -102,9 +82,11 @@ describe('subagent eval test cases', () => {
}>;
expect(updatedIndex).toContain('export const sum =');
expect(toolLogs.some((l) => l.toolRequest.name === 'docs-agent')).toBe(
false,
);
expect(
toolLogs.some(
(l) => l.toolRequest.name === TEST_AGENTS.DOCS_AGENT.name,
),
).toBe(false);
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false,
);
@@ -133,7 +115,7 @@ describe('subagent eval test cases', () => {
},
prompt: 'Please add a small test file that verifies add(1, 2) returns 3.',
files: {
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
...TEST_AGENTS.TESTING_AGENT.asFile(),
'index.ts': INDEX_TS,
'package.json': JSON.stringify(
{
@@ -150,7 +132,7 @@ describe('subagent eval test cases', () => {
toolRequest: { name: string };
}>;
await rig.expectToolCallSuccess(['test-agent']);
await rig.expectToolCallSuccess([TEST_AGENTS.TESTING_AGENT.name]);
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false,
);
@@ -178,8 +160,8 @@ describe('subagent eval test cases', () => {
prompt:
'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.',
files: {
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
...TEST_AGENTS.DOCS_AGENT.asFile(),
...TEST_AGENTS.TESTING_AGENT.asFile(),
'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.\n',
'package.json': JSON.stringify(
@@ -198,7 +180,10 @@ describe('subagent eval test cases', () => {
}>;
const readme = readProjectFile(rig, 'README.md');
await rig.expectToolCallSuccess(['docs-agent', 'test-agent']);
await rig.expectToolCallSuccess([
TEST_AGENTS.DOCS_AGENT.name,
TEST_AGENTS.TESTING_AGENT.name,
]);
expect(readme).not.toContain('TODO: update the README.');
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
false,