test(evals): mark all save_memory evals as USUALLY_PASSES due to unreliability (#18786)

This commit is contained in:
Jerop Kipruto
2026-02-10 21:16:52 -05:00
committed by GitHub
parent b3ecac7086
commit 9c11ff2d58
+11 -11
View File
@@ -14,7 +14,7 @@ import {
describe('save_memory', () => { describe('save_memory', () => {
const TEST_PREFIX = 'Save memory test: '; const TEST_PREFIX = 'Save memory test: ';
const rememberingFavoriteColor = "Agent remembers user's favorite color"; const rememberingFavoriteColor = "Agent remembers user's favorite color";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingFavoriteColor, name: rememberingFavoriteColor,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -36,7 +36,7 @@ describe('save_memory', () => {
}, },
}); });
const rememberingCommandRestrictions = 'Agent remembers command restrictions'; const rememberingCommandRestrictions = 'Agent remembers command restrictions';
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingCommandRestrictions, name: rememberingCommandRestrictions,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -57,7 +57,7 @@ describe('save_memory', () => {
}); });
const rememberingWorkflow = 'Agent remembers workflow preferences'; const rememberingWorkflow = 'Agent remembers workflow preferences';
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingWorkflow, name: rememberingWorkflow,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -79,7 +79,7 @@ describe('save_memory', () => {
const ignoringTemporaryInformation = const ignoringTemporaryInformation =
'Agent ignores temporary conversation details'; 'Agent ignores temporary conversation details';
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: ignoringTemporaryInformation, name: ignoringTemporaryInformation,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -104,7 +104,7 @@ describe('save_memory', () => {
}); });
const rememberingPetName = "Agent remembers user's pet's name"; const rememberingPetName = "Agent remembers user's pet's name";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingPetName, name: rememberingPetName,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -125,7 +125,7 @@ describe('save_memory', () => {
}); });
const rememberingCommandAlias = 'Agent remembers custom command aliases'; const rememberingCommandAlias = 'Agent remembers custom command aliases';
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingCommandAlias, name: rememberingCommandAlias,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -147,7 +147,7 @@ describe('save_memory', () => {
const ignoringDbSchemaLocation = const ignoringDbSchemaLocation =
"Agent ignores workspace's database schema location"; "Agent ignores workspace's database schema location";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: ignoringDbSchemaLocation, name: ignoringDbSchemaLocation,
params: { params: {
settings: { settings: {
@@ -178,7 +178,7 @@ describe('save_memory', () => {
const rememberingCodingStyle = const rememberingCodingStyle =
"Agent remembers user's coding style preference"; "Agent remembers user's coding style preference";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingCodingStyle, name: rememberingCodingStyle,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
@@ -200,7 +200,7 @@ describe('save_memory', () => {
const ignoringBuildArtifactLocation = const ignoringBuildArtifactLocation =
'Agent ignores workspace build artifact location'; 'Agent ignores workspace build artifact location';
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: ignoringBuildArtifactLocation, name: ignoringBuildArtifactLocation,
params: { params: {
settings: { settings: {
@@ -230,7 +230,7 @@ describe('save_memory', () => {
}); });
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: ignoringMainEntryPoint, name: ignoringMainEntryPoint,
params: { params: {
settings: { settings: {
@@ -260,7 +260,7 @@ describe('save_memory', () => {
}); });
const rememberingBirthday = "Agent remembers user's birthday"; const rememberingBirthday = "Agent remembers user's birthday";
evalTest('ALWAYS_PASSES', { evalTest('USUALLY_PASSES', {
name: rememberingBirthday, name: rememberingBirthday,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },