Files
gemini-cli/evals/save_memory.eval.ts

240 lines
7.6 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
import {
assertModelHasOutput,
checkModelOutputContent,
} from '../integration-tests/test-helper.js';
describe('save_memory', () => {
const TEST_PREFIX = 'Save memory test: ';
const rememberingFavoriteColor = "Agent remembers user's favorite color";
evalTest('ALWAYS_PASSES', {
name: rememberingFavoriteColor,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `remember that my favorite color is blue.
what is my favorite color? tell me that and surround it with $ symbol`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: 'blue',
testName: `${TEST_PREFIX}${rememberingFavoriteColor}`,
});
},
});
const rememberingCommandRestrictions = 'Agent remembers command restrictions';
evalTest('ALWAYS_PASSES', {
name: rememberingCommandRestrictions,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `I don't want you to ever run npm commands.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/not run npm commands|remember|ok/i],
testName: `${TEST_PREFIX}${rememberingCommandRestrictions}`,
});
},
});
const rememberingWorkflow = 'Agent remembers workflow preferences';
evalTest('ALWAYS_PASSES', {
name: rememberingWorkflow,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `I want you to always lint after building.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/always|ok|remember|will do/i],
testName: `${TEST_PREFIX}${rememberingWorkflow}`,
});
},
});
const ignoringTemporaryInformation =
'Agent ignores temporary conversation details';
evalTest('ALWAYS_PASSES', {
name: ignoringTemporaryInformation,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `I'm going to get a coffee.`,
assert: async (rig, result) => {
await rig.waitForTelemetryReady();
const wasToolCalled = rig
.readToolLogs()
.some((log) => log.toolRequest.name === 'save_memory');
expect(
wasToolCalled,
'save_memory should not be called for temporary information',
).toBe(false);
assertModelHasOutput(result);
checkModelOutputContent(result, {
testName: `${TEST_PREFIX}${ignoringTemporaryInformation}`,
forbiddenContent: [/remember|will do/i],
});
},
});
const rememberingPetName = "Agent remembers user's pet's name";
evalTest('ALWAYS_PASSES', {
name: rememberingPetName,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `My dog's name is Buddy. What is my dog's name?`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/Buddy/i],
testName: `${TEST_PREFIX}${rememberingPetName}`,
});
},
});
const rememberingCommandAlias = 'Agent remembers custom command aliases';
evalTest('ALWAYS_PASSES', {
name: rememberingCommandAlias,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `When I say 'start server', you should run 'npm run dev'.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/npm run dev|start server|ok|remember|will do/i],
testName: `${TEST_PREFIX}${rememberingCommandAlias}`,
});
},
});
const rememberingDbSchemaLocation =
"Agent remembers project's database schema location";
evalTest('ALWAYS_PASSES', {
name: rememberingDbSchemaLocation,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `The database schema for this project is located in \`db/schema.sql\`.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/database schema|ok|remember|will do/i],
testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
});
},
});
const rememberingCodingStyle =
"Agent remembers user's coding style preference";
evalTest('ALWAYS_PASSES', {
name: rememberingCodingStyle,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `I prefer to use tabs instead of spaces for indentation.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/tabs instead of spaces|ok|remember|will do/i],
testName: `${TEST_PREFIX}${rememberingCodingStyle}`,
});
},
});
const rememberingTestCommand =
'Agent remembers specific project test command';
evalTest('ALWAYS_PASSES', {
name: rememberingTestCommand,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `The command to run all backend tests is \`npm run test:backend\`.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [
/command to run all backend tests|ok|remember|will do/i,
],
testName: `${TEST_PREFIX}${rememberingTestCommand}`,
});
},
});
const rememberingMainEntryPoint =
"Agent remembers project's main entry point";
evalTest('ALWAYS_PASSES', {
name: rememberingMainEntryPoint,
params: {
settings: { tools: { core: ['save_memory'] } },
},
prompt: `The main entry point for this project is \`src/index.js\`.`,
assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
true,
);
assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [
/main entry point for this project|ok|remember|will do/i,
],
testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
});
},
});
});