mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-29 06:25:16 -07:00
test(evals): mark all save_memory evals as USUALLY_PASSES due to unreliability (#18786)
This commit is contained in:
+11
-11
@@ -14,7 +14,7 @@ import {
|
|||||||
describe('save_memory', () => {
|
describe('save_memory', () => {
|
||||||
const TEST_PREFIX = 'Save memory test: ';
|
const TEST_PREFIX = 'Save memory test: ';
|
||||||
const rememberingFavoriteColor = "Agent remembers user's favorite color";
|
const rememberingFavoriteColor = "Agent remembers user's favorite color";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingFavoriteColor,
|
name: rememberingFavoriteColor,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -36,7 +36,7 @@ describe('save_memory', () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
const rememberingCommandRestrictions = 'Agent remembers command restrictions';
|
const rememberingCommandRestrictions = 'Agent remembers command restrictions';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingCommandRestrictions,
|
name: rememberingCommandRestrictions,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -57,7 +57,7 @@ describe('save_memory', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const rememberingWorkflow = 'Agent remembers workflow preferences';
|
const rememberingWorkflow = 'Agent remembers workflow preferences';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingWorkflow,
|
name: rememberingWorkflow,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -79,7 +79,7 @@ describe('save_memory', () => {
|
|||||||
|
|
||||||
const ignoringTemporaryInformation =
|
const ignoringTemporaryInformation =
|
||||||
'Agent ignores temporary conversation details';
|
'Agent ignores temporary conversation details';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: ignoringTemporaryInformation,
|
name: ignoringTemporaryInformation,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -104,7 +104,7 @@ describe('save_memory', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const rememberingPetName = "Agent remembers user's pet's name";
|
const rememberingPetName = "Agent remembers user's pet's name";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingPetName,
|
name: rememberingPetName,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -125,7 +125,7 @@ describe('save_memory', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const rememberingCommandAlias = 'Agent remembers custom command aliases';
|
const rememberingCommandAlias = 'Agent remembers custom command aliases';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingCommandAlias,
|
name: rememberingCommandAlias,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -147,7 +147,7 @@ describe('save_memory', () => {
|
|||||||
|
|
||||||
const ignoringDbSchemaLocation =
|
const ignoringDbSchemaLocation =
|
||||||
"Agent ignores workspace's database schema location";
|
"Agent ignores workspace's database schema location";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: ignoringDbSchemaLocation,
|
name: ignoringDbSchemaLocation,
|
||||||
params: {
|
params: {
|
||||||
settings: {
|
settings: {
|
||||||
@@ -178,7 +178,7 @@ describe('save_memory', () => {
|
|||||||
|
|
||||||
const rememberingCodingStyle =
|
const rememberingCodingStyle =
|
||||||
"Agent remembers user's coding style preference";
|
"Agent remembers user's coding style preference";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingCodingStyle,
|
name: rememberingCodingStyle,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
@@ -200,7 +200,7 @@ describe('save_memory', () => {
|
|||||||
|
|
||||||
const ignoringBuildArtifactLocation =
|
const ignoringBuildArtifactLocation =
|
||||||
'Agent ignores workspace build artifact location';
|
'Agent ignores workspace build artifact location';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: ignoringBuildArtifactLocation,
|
name: ignoringBuildArtifactLocation,
|
||||||
params: {
|
params: {
|
||||||
settings: {
|
settings: {
|
||||||
@@ -230,7 +230,7 @@ describe('save_memory', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
|
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: ignoringMainEntryPoint,
|
name: ignoringMainEntryPoint,
|
||||||
params: {
|
params: {
|
||||||
settings: {
|
settings: {
|
||||||
@@ -260,7 +260,7 @@ describe('save_memory', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const rememberingBirthday = "Agent remembers user's birthday";
|
const rememberingBirthday = "Agent remembers user's birthday";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('USUALLY_PASSES', {
|
||||||
name: rememberingBirthday,
|
name: rememberingBirthday,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
|
|||||||
Reference in New Issue
Block a user