From fe8d93c75a2354d2cad5a41bf67d61989d3f94e8 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Fri, 13 Mar 2026 21:32:00 +0000 Subject: [PATCH] Promote stable tests. (#22253) --- evals/answer-vs-act.eval.ts | 2 +- evals/hierarchical_memory.eval.ts | 2 +- evals/save_memory.eval.ts | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evals/answer-vs-act.eval.ts b/evals/answer-vs-act.eval.ts index 4e30b828d0..ff87d12564 100644 --- a/evals/answer-vs-act.eval.ts +++ b/evals/answer-vs-act.eval.ts @@ -111,7 +111,7 @@ describe('Answer vs. ask eval', () => { * Ensures that when the user asks a question about style, the agent does NOT * automatically modify the file. */ - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: 'should not edit files when asked about style', prompt: 'Is app.ts following good style?', files: FILES, diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index ff7483416b..dd4f8fbbd1 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -11,7 +11,7 @@ import { assertModelHasOutput } from '../integration-tests/test-helper.js'; describe('Hierarchical Memory', () => { const conflictResolutionTest = 'Agent follows hierarchy for contradictory instructions'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: conflictResolutionTest, params: { settings: { diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index e4fe9bc687..901cbf3c17 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -14,7 +14,7 @@ import { describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, params: { settings: { tools: { core: ['save_memory'] } }, @@ -79,7 +79,7 @@ describe('save_memory', () => { const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, params: { settings: { tools: { core: ['save_memory'] } }, @@ -104,7 +104,7 @@ describe('save_memory', () => { }); const rememberingPetName = "Agent remembers user's pet's name"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingPetName, params: { settings: { tools: { core: ['save_memory'] } },