Fix the subagents eval.

This commit is contained in:
Christian Gunderman
2026-02-02 23:51:06 -08:00
parent 5be38c3a27
commit 849b3b255c
4 changed files with 37 additions and 2 deletions

View File

@@ -31,7 +31,7 @@ describe('subagent eval test cases', () => {
*
* This tests the system prompt's subagent specific clauses.
*/
evalTest('USUALLY_PASSES', {
evalTest('ALWAYS_PASSES', {
name: 'should delegate to user provided agent with relevant expertise',
params: {
settings: {
@@ -46,6 +46,9 @@ describe('subagent eval test cases', () => {
'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.',
},
acknowledgedAgents: {
'docs-agent': AGENT_DEFINITION,
},
assert: async (rig, _result) => {
await rig.expectToolCallSuccess(['docs-agent']);
},

View File

@@ -112,6 +112,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
execSync('git commit --allow-empty -m "Initial commit"', execOptions);
}
if (evalCase.acknowledgedAgents) {
await rig.acknowledgeAgents(evalCase.acknowledgedAgents);
}
const result = await rig.run({
args: evalCase.prompt,
approvalMode: evalCase.approvalMode ?? 'yolo',
@@ -172,5 +176,6 @@ export interface EvalCase {
timeout?: number;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
acknowledgedAgents?: Record<string, string>;
assert: (rig: TestRig, result: string) => Promise<void>;
}

View File

@@ -135,6 +135,7 @@ export * from './prompts/mcp-prompts.js';
export * from './agents/types.js';
export * from './agents/agentLoader.js';
export * from './agents/local-executor.js';
export * from './agents/acknowledgedAgents.js';
// Export specific tool logic
export * from './tools/read-file.js';

View File

@@ -11,8 +11,13 @@ import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { env } from 'node:process';
import { setTimeout as sleep } from 'node:timers/promises';
import { DEFAULT_GEMINI_MODEL, GEMINI_DIR } from '@google/gemini-cli-core';
import {
DEFAULT_GEMINI_MODEL,
GEMINI_DIR,
AcknowledgedAgentsService,
} from '@google/gemini-cli-core';
import fs from 'node:fs';
import crypto from 'node:crypto';
import * as pty from '@lydell/node-pty';
import stripAnsi from 'strip-ansi';
import * as os from 'node:os';
@@ -356,6 +361,27 @@ export class TestRig {
);
}
async acknowledgeAgents(agents: Record<string, string>) {
// Use realpath to ensure the path matches what the CLI sees (e.g. /var vs /private/var on macOS)
const projectRoot = fs.realpathSync(this.testDir!);
const originalHome = process.env['GEMINI_CLI_HOME'];
process.env['GEMINI_CLI_HOME'] = this.homeDir!;
try {
const service = new AcknowledgedAgentsService();
for (const [name, content] of Object.entries(agents)) {
const hash = crypto.createHash('sha256').update(content).digest('hex');
await service.acknowledge(projectRoot, name, hash);
}
} finally {
if (originalHome) {
process.env['GEMINI_CLI_HOME'] = originalHome;
} else {
delete process.env['GEMINI_CLI_HOME'];
}
}
}
createFile(fileName: string, content: string) {
const filePath = join(this.testDir!, fileName);
writeFileSync(filePath, content);