mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-12 12:54:07 -07:00
feat(evals): centralize test agents into test-utils for reuse (#23616)
Co-authored-by: Samee Zahid <sameez@google.com>
This commit is contained in:
+17
-32
@@ -9,27 +9,7 @@ import path from 'node:path';
|
|||||||
|
|
||||||
import { describe, expect } from 'vitest';
|
import { describe, expect } from 'vitest';
|
||||||
|
|
||||||
import { evalTest } from './test-helper.js';
|
import { evalTest, TEST_AGENTS } from './test-helper.js';
|
||||||
|
|
||||||
const DOCS_AGENT_DEFINITION = `---
|
|
||||||
name: docs-agent
|
|
||||||
description: An agent with expertise in updating documentation.
|
|
||||||
tools:
|
|
||||||
- read_file
|
|
||||||
- write_file
|
|
||||||
---
|
|
||||||
You are the docs agent. Update documentation clearly and accurately.
|
|
||||||
`;
|
|
||||||
|
|
||||||
const TEST_AGENT_DEFINITION = `---
|
|
||||||
name: test-agent
|
|
||||||
description: An agent with expertise in writing and updating tests.
|
|
||||||
tools:
|
|
||||||
- read_file
|
|
||||||
- write_file
|
|
||||||
---
|
|
||||||
You are the test agent. Add or update tests.
|
|
||||||
`;
|
|
||||||
|
|
||||||
const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n';
|
const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n';
|
||||||
|
|
||||||
@@ -62,12 +42,12 @@ describe('subagent eval test cases', () => {
|
|||||||
},
|
},
|
||||||
prompt: 'Please update README.md with a description of this library.',
|
prompt: 'Please update README.md with a description of this library.',
|
||||||
files: {
|
files: {
|
||||||
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
|
...TEST_AGENTS.DOCS_AGENT.asFile(),
|
||||||
'index.ts': INDEX_TS,
|
'index.ts': INDEX_TS,
|
||||||
'README.md': 'TODO: update the README.\n',
|
'README.md': 'TODO: update the README.\n',
|
||||||
},
|
},
|
||||||
assert: async (rig, _result) => {
|
assert: async (rig, _result) => {
|
||||||
await rig.expectToolCallSuccess(['docs-agent']);
|
await rig.expectToolCallSuccess([TEST_AGENTS.DOCS_AGENT.name]);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -92,7 +72,7 @@ describe('subagent eval test cases', () => {
|
|||||||
prompt:
|
prompt:
|
||||||
'Rename the exported function in index.ts from add to sum and update the file directly.',
|
'Rename the exported function in index.ts from add to sum and update the file directly.',
|
||||||
files: {
|
files: {
|
||||||
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
|
...TEST_AGENTS.DOCS_AGENT.asFile(),
|
||||||
'index.ts': INDEX_TS,
|
'index.ts': INDEX_TS,
|
||||||
},
|
},
|
||||||
assert: async (rig, _result) => {
|
assert: async (rig, _result) => {
|
||||||
@@ -102,9 +82,11 @@ describe('subagent eval test cases', () => {
|
|||||||
}>;
|
}>;
|
||||||
|
|
||||||
expect(updatedIndex).toContain('export const sum =');
|
expect(updatedIndex).toContain('export const sum =');
|
||||||
expect(toolLogs.some((l) => l.toolRequest.name === 'docs-agent')).toBe(
|
expect(
|
||||||
false,
|
toolLogs.some(
|
||||||
);
|
(l) => l.toolRequest.name === TEST_AGENTS.DOCS_AGENT.name,
|
||||||
|
),
|
||||||
|
).toBe(false);
|
||||||
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
@@ -133,7 +115,7 @@ describe('subagent eval test cases', () => {
|
|||||||
},
|
},
|
||||||
prompt: 'Please add a small test file that verifies add(1, 2) returns 3.',
|
prompt: 'Please add a small test file that verifies add(1, 2) returns 3.',
|
||||||
files: {
|
files: {
|
||||||
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
|
...TEST_AGENTS.TESTING_AGENT.asFile(),
|
||||||
'index.ts': INDEX_TS,
|
'index.ts': INDEX_TS,
|
||||||
'package.json': JSON.stringify(
|
'package.json': JSON.stringify(
|
||||||
{
|
{
|
||||||
@@ -150,7 +132,7 @@ describe('subagent eval test cases', () => {
|
|||||||
toolRequest: { name: string };
|
toolRequest: { name: string };
|
||||||
}>;
|
}>;
|
||||||
|
|
||||||
await rig.expectToolCallSuccess(['test-agent']);
|
await rig.expectToolCallSuccess([TEST_AGENTS.TESTING_AGENT.name]);
|
||||||
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
@@ -178,8 +160,8 @@ describe('subagent eval test cases', () => {
|
|||||||
prompt:
|
prompt:
|
||||||
'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.',
|
'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.',
|
||||||
files: {
|
files: {
|
||||||
'.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
|
...TEST_AGENTS.DOCS_AGENT.asFile(),
|
||||||
'.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
|
...TEST_AGENTS.TESTING_AGENT.asFile(),
|
||||||
'index.ts': INDEX_TS,
|
'index.ts': INDEX_TS,
|
||||||
'README.md': 'TODO: update the README.\n',
|
'README.md': 'TODO: update the README.\n',
|
||||||
'package.json': JSON.stringify(
|
'package.json': JSON.stringify(
|
||||||
@@ -198,7 +180,10 @@ describe('subagent eval test cases', () => {
|
|||||||
}>;
|
}>;
|
||||||
const readme = readProjectFile(rig, 'README.md');
|
const readme = readProjectFile(rig, 'README.md');
|
||||||
|
|
||||||
await rig.expectToolCallSuccess(['docs-agent', 'test-agent']);
|
await rig.expectToolCallSuccess([
|
||||||
|
TEST_AGENTS.DOCS_AGENT.name,
|
||||||
|
TEST_AGENTS.TESTING_AGENT.name,
|
||||||
|
]);
|
||||||
expect(readme).not.toContain('TODO: update the README.');
|
expect(readme).not.toContain('TODO: update the README.');
|
||||||
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
|
||||||
false,
|
false,
|
||||||
|
|||||||
@@ -0,0 +1,72 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a test agent used in evaluations and tests.
|
||||||
|
*/
|
||||||
|
export interface TestAgent {
|
||||||
|
/** The unique name of the agent. */
|
||||||
|
readonly name: string;
|
||||||
|
/** The full YAML/Markdown definition of the agent. */
|
||||||
|
readonly definition: string;
|
||||||
|
/** The standard path where this agent should be saved in a test project. */
|
||||||
|
readonly path: string;
|
||||||
|
/** A helper to spread this agent directly into a 'files' object for evalTest. */
|
||||||
|
readonly asFile: () => Record<string, string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to create a TestAgent with consistent formatting and pathing.
|
||||||
|
*/
|
||||||
|
function createAgent(options: {
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
tools: string[];
|
||||||
|
body: string;
|
||||||
|
}): TestAgent {
|
||||||
|
const definition = `---
|
||||||
|
name: ${options.name}
|
||||||
|
description: ${options.description}
|
||||||
|
tools:
|
||||||
|
${options.tools.map((t) => ` - ${t}`).join('\n')}
|
||||||
|
---
|
||||||
|
${options.body}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const path = `.gemini/agents/${options.name}.md`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: options.name,
|
||||||
|
definition,
|
||||||
|
path,
|
||||||
|
asFile: () => ({ [path]: definition }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection of predefined test agents for use in evaluations and tests.
|
||||||
|
*/
|
||||||
|
export const TEST_AGENTS = {
|
||||||
|
/**
|
||||||
|
* An agent with expertise in updating documentation.
|
||||||
|
*/
|
||||||
|
DOCS_AGENT: createAgent({
|
||||||
|
name: 'docs-agent',
|
||||||
|
description: 'An agent with expertise in updating documentation.',
|
||||||
|
tools: ['read_file', 'write_file'],
|
||||||
|
body: 'You are the docs agent. Update documentation clearly and accurately.',
|
||||||
|
}),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An agent with expertise in writing and updating tests.
|
||||||
|
*/
|
||||||
|
TESTING_AGENT: createAgent({
|
||||||
|
name: 'testing-agent',
|
||||||
|
description: 'An agent with expertise in writing and updating tests.',
|
||||||
|
tools: ['read_file', 'write_file'],
|
||||||
|
body: 'You are the test agent. Add or update tests.',
|
||||||
|
}),
|
||||||
|
} as const;
|
||||||
@@ -5,6 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export * from './file-system-test-helpers.js';
|
export * from './file-system-test-helpers.js';
|
||||||
export * from './test-rig.js';
|
export * from './fixtures/agents.js';
|
||||||
export * from './mock-utils.js';
|
export * from './mock-utils.js';
|
||||||
export * from './test-mcp-server.js';
|
export * from './test-mcp-server.js';
|
||||||
|
export * from './test-rig.js';
|
||||||
|
|||||||
Reference in New Issue
Block a user