From b3ecac7086f9b3e133dd660f1bc3330fb806157c Mon Sep 17 00:00:00 2001 From: Abhijit Balaji Date: Tue, 10 Feb 2026 17:51:05 -0800 Subject: [PATCH] fix(evals): prevent false positive in hierarchical memory test (#18777) --- evals/hierarchical_memory.eval.ts | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index 0a3b76cea2..a069b77ac2 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -42,11 +42,12 @@ When asked for my favorite fruit, always say "Cherry". What is my favorite fruit? Tell me just the name of the fruit.`, - assert: async (_rig, result) => { - assertModelHasOutput(result); - expect(result).toMatch(/Cherry/i); - expect(result).not.toMatch(/Apple/i); - expect(result).not.toMatch(/Banana/i); + assert: async (rig) => { + const stdout = rig._lastRunStdout!; + assertModelHasOutput(stdout); + expect(stdout).toMatch(/Cherry/i); + expect(stdout).not.toMatch(/Apple/i); + expect(stdout).not.toMatch(/Banana/i); }, }); @@ -80,11 +81,12 @@ Provide the answer as an XML block like this: Instruction ... Instruction ... `, - assert: async (_rig, result) => { - assertModelHasOutput(result); - expect(result).toMatch(/.*Instruction A/i); - expect(result).toMatch(/.*Instruction B/i); - expect(result).toMatch(/.*Instruction C/i); + assert: async (rig) => { + const stdout = rig._lastRunStdout!; + assertModelHasOutput(stdout); + expect(stdout).toMatch(/.*Instruction A/i); + expect(stdout).toMatch(/.*Instruction B/i); + expect(stdout).toMatch(/.*Instruction C/i); }, }); @@ -108,10 +110,11 @@ Set the theme to "Dark". What theme should I use? Tell me just the name of the theme.`, - assert: async (_rig, result) => { - assertModelHasOutput(result); - expect(result).toMatch(/Dark/i); - expect(result).not.toMatch(/Light/i); + assert: async (rig) => { + const stdout = rig._lastRunStdout!; + assertModelHasOutput(stdout); + expect(stdout).toMatch(/Dark/i); + expect(stdout).not.toMatch(/Light/i); }, }); });