test(evals): add comprehensive subagent delegation evaluations (#24132)

This commit is contained in:
Abhi
2026-03-29 19:13:50 -04:00
committed by GitHub
parent da8c841ef4
commit d9d2ce36f2
3 changed files with 202 additions and 19 deletions
+5
View File
@@ -61,6 +61,10 @@ export async function internalEvalTest(evalCase: EvalCase) {
try {
rig.setup(evalCase.name, evalCase.params);
if (evalCase.setup) {
await evalCase.setup(rig);
}
if (evalCase.files) {
await setupTestFiles(rig, evalCase.files);
}
@@ -371,6 +375,7 @@ export interface EvalCase {
prompt: string;
timeout?: number;
files?: Record<string, string>;
setup?: (rig: TestRig) => Promise<void> | void;
/** Conversation history to pre-load via --resume. Each entry is a message object with type, content, etc. */
messages?: Record<string, unknown>[];
/** Session ID for the resumed session. Auto-generated if not provided. */