Fix the subagents eval.

This commit is contained in:
Christian Gunderman
2026-02-02 23:51:06 -08:00
parent 5be38c3a27
commit 849b3b255c
4 changed files with 37 additions and 2 deletions
+4 -1
View File
@@ -31,7 +31,7 @@ describe('subagent eval test cases', () => {
*
* This tests the system prompt's subagent specific clauses.
*/
evalTest('USUALLY_PASSES', {
evalTest('ALWAYS_PASSES', {
name: 'should delegate to user provided agent with relevant expertise',
params: {
settings: {
@@ -46,6 +46,9 @@ describe('subagent eval test cases', () => {
'index.ts': INDEX_TS,
'README.md': 'TODO: update the README.',
},
acknowledgedAgents: {
'docs-agent': AGENT_DEFINITION,
},
assert: async (rig, _result) => {
await rig.expectToolCallSuccess(['docs-agent']);
},
+5
View File
@@ -112,6 +112,10 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
execSync('git commit --allow-empty -m "Initial commit"', execOptions);
}
if (evalCase.acknowledgedAgents) {
await rig.acknowledgeAgents(evalCase.acknowledgedAgents);
}
const result = await rig.run({
args: evalCase.prompt,
approvalMode: evalCase.approvalMode ?? 'yolo',
@@ -172,5 +176,6 @@ export interface EvalCase {
timeout?: number;
files?: Record<string, string>;
approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';
acknowledgedAgents?: Record<string, string>;
assert: (rig: TestRig, result: string) => Promise<void>;
}