fix(evals): update eval tests for invoke_agent telemetry and project-scoped memory (#25502)

2026-05-12 12:54:07 -07:00 · 2026-04-15 16:14:12 -07:00
parent e827cfdf83
commit 00b7781c3c
3 changed files with 88 additions and 37 deletions
@@ -26,11 +26,22 @@ describe('generalist_agent', () => {
    prompt:
      'Please use the generalist agent to create a file called "generalist_test_file.txt" containing exactly the following text: success',
    assert: async (rig) => {
-      // 1) Verify the generalist agent was invoked
-      const foundToolCall = await rig.waitForToolCall('generalist');
+      // 1) Verify the generalist agent was invoked via invoke_agent
+      const foundToolCall = await rig.waitForToolCall(
+        'invoke_agent',
+        undefined,
+        (args) => {
+          try {
+            const parsed = JSON.parse(args);
+            return parsed.agent_name === 'generalist';
+          } catch {
+            return false;
+          }
+        },
+      );
      expect(
        foundToolCall,
-        'Expected to find a tool call for generalist agent',
+        'Expected to find an invoke_agent tool call for generalist agent',
      ).toBeTruthy();

      // 2) Verify the file was created as expected