diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts index a069b77ac2..71f9cc3e43 100644 --- a/evals/hierarchical_memory.eval.ts +++ b/evals/hierarchical_memory.eval.ts @@ -6,17 +6,12 @@ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; -import { - assertModelHasOutput, - checkModelOutputContent, -} from '../integration-tests/test-helper.js'; +import { assertModelHasOutput } from '../integration-tests/test-helper.js'; describe('Hierarchical Memory', () => { - const TEST_PREFIX = 'Hierarchical memory test: '; - const conflictResolutionTest = 'Agent follows hierarchy for contradictory instructions'; - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: conflictResolutionTest, params: { settings: { @@ -52,7 +47,7 @@ What is my favorite fruit? Tell me just the name of the fruit.`, }); const provenanceAwarenessTest = 'Agent is aware of memory provenance'; - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: provenanceAwarenessTest, params: { settings: { @@ -91,7 +86,7 @@ Provide the answer as an XML block like this: }); const extensionVsGlobalTest = 'Extension memory wins over Global memory'; - evalTest('ALWAYS_PASSES', { + evalTest('USUALLY_PASSES', { name: extensionVsGlobalTest, params: { settings: {