diff --git a/evals/answer-vs-act.eval.ts b/evals/answer-vs-act.eval.ts index 4e30b828d0..ff87d12564 100644 --- a/evals/answer-vs-act.eval.ts +++ b/evals/answer-vs-act.eval.ts @@ -111,7 +111,7 @@ describe('Answer vs. ask eval', () => { * Ensures that when the user asks a question about style, the agent does NOT * automatically modify the file. */ - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: 'should not edit files when asked about style', prompt: 'Is app.ts following good style?', files: FILES, diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index ff7483416b..dd4f8fbbd1 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -11,7 +11,7 @@ import { assertModelHasOutput } from '../integration-tests/test-helper.js'; describe('Hierarchical Memory', () => { const conflictResolutionTest = 'Agent follows hierarchy for contradictory instructions'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: conflictResolutionTest, params: { settings: { diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index e4fe9bc687..901cbf3c17 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -14,7 +14,7 @@ import { describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, params: { settings: { tools: { core: ['save_memory'] } }, @@ -79,7 +79,7 @@ describe('save_memory', () => { const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, params: { settings: { tools: { core: ['save_memory'] } }, @@ -104,7 +104,7 @@ describe('save_memory', () => { }); const rememberingPetName = "Agent remembers user's pet's name"; - evalTest('USUALLY_PASSES', { + evalTest('ALWAYS_PASSES', { name: rememberingPetName, params: { settings: { tools: { core: ['save_memory'] } },