fix(evals): update eval tests for invoke_agent telemetry and project-scoped memory (#25502)

This commit is contained in:
Sandy Tao
2026-04-15 16:14:12 -07:00
committed by GitHub
parent e827cfdf83
commit 00b7781c3c
3 changed files with 88 additions and 37 deletions
+14 -3
View File
@@ -26,11 +26,22 @@ describe('generalist_agent', () => {
prompt:
'Please use the generalist agent to create a file called "generalist_test_file.txt" containing exactly the following text: success',
assert: async (rig) => {
// 1) Verify the generalist agent was invoked
const foundToolCall = await rig.waitForToolCall('generalist');
// 1) Verify the generalist agent was invoked via invoke_agent
const foundToolCall = await rig.waitForToolCall(
'invoke_agent',
undefined,
(args) => {
try {
const parsed = JSON.parse(args);
return parsed.agent_name === 'generalist';
} catch {
return false;
}
},
);
expect(
foundToolCall,
'Expected to find a tool call for generalist agent',
'Expected to find an invoke_agent tool call for generalist agent',
).toBeTruthy();
// 2) Verify the file was created as expected