diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts
index 0a3b76cea2..a069b77ac2 100644
--- a/evals/hierarchical_memory.eval.ts
+++ b/evals/hierarchical_memory.eval.ts
@@ -42,11 +42,12 @@ When asked for my favorite fruit, always say "Cherry".
What is my favorite fruit? Tell me just the name of the fruit.`,
- assert: async (_rig, result) => {
- assertModelHasOutput(result);
- expect(result).toMatch(/Cherry/i);
- expect(result).not.toMatch(/Apple/i);
- expect(result).not.toMatch(/Banana/i);
+ assert: async (rig) => {
+ const stdout = rig._lastRunStdout!;
+ assertModelHasOutput(stdout);
+ expect(stdout).toMatch(/Cherry/i);
+ expect(stdout).not.toMatch(/Apple/i);
+ expect(stdout).not.toMatch(/Banana/i);
},
});
@@ -80,11 +81,12 @@ Provide the answer as an XML block like this:
Instruction ...
Instruction ...
`,
- assert: async (_rig, result) => {
- assertModelHasOutput(result);
- expect(result).toMatch(/.*Instruction A/i);
- expect(result).toMatch(/.*Instruction B/i);
- expect(result).toMatch(/.*Instruction C/i);
+ assert: async (rig) => {
+ const stdout = rig._lastRunStdout!;
+ assertModelHasOutput(stdout);
+ expect(stdout).toMatch(/.*Instruction A/i);
+ expect(stdout).toMatch(/.*Instruction B/i);
+ expect(stdout).toMatch(/.*Instruction C/i);
},
});
@@ -108,10 +110,11 @@ Set the theme to "Dark".
What theme should I use? Tell me just the name of the theme.`,
- assert: async (_rig, result) => {
- assertModelHasOutput(result);
- expect(result).toMatch(/Dark/i);
- expect(result).not.toMatch(/Light/i);
+ assert: async (rig) => {
+ const stdout = rig._lastRunStdout!;
+ assertModelHasOutput(stdout);
+ expect(stdout).toMatch(/Dark/i);
+ expect(stdout).not.toMatch(/Light/i);
},
});
});