/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; import { assertModelHasOutput, checkModelOutputContent, } from '../integration-tests/test-helper.js'; describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `remember that my favorite color is blue. what is my favorite color? tell me that and surround it with $ symbol`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: 'blue', testName: `${TEST_PREFIX}${rememberingFavoriteColor}`, }); }, }); const rememberingCommandRestrictions = 'Agent remembers command restrictions'; evalTest('ALWAYS_PASSES', { name: rememberingCommandRestrictions, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `I don't want you to ever run npm commands.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/not run npm commands|remember|ok/i], testName: `${TEST_PREFIX}${rememberingCommandRestrictions}`, }); }, }); const rememberingWorkflow = 'Agent remembers workflow preferences'; evalTest('ALWAYS_PASSES', { name: rememberingWorkflow, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `I want you to always lint after building.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/always|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingWorkflow}`, }); }, }); const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `I'm going to get a coffee.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); const wasToolCalled = rig .readToolLogs() .some((log) => log.toolRequest.name === 'save_memory'); expect( wasToolCalled, 'save_memory should not be called for temporary information', ).toBe(false); assertModelHasOutput(result); checkModelOutputContent(result, { testName: `${TEST_PREFIX}${ignoringTemporaryInformation}`, forbiddenContent: [/remember|will do/i], }); }, }); const rememberingPetName = "Agent remembers user's pet's name"; evalTest('ALWAYS_PASSES', { name: rememberingPetName, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `My dog's name is Buddy. What is my dog's name?`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/Buddy/i], testName: `${TEST_PREFIX}${rememberingPetName}`, }); }, }); const rememberingCommandAlias = 'Agent remembers custom command aliases'; evalTest('ALWAYS_PASSES', { name: rememberingCommandAlias, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `When I say 'start server', you should run 'npm run dev'.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/npm run dev|start server|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingCommandAlias}`, }); }, }); const rememberingDbSchemaLocation = "Agent remembers project's database schema location"; evalTest('ALWAYS_PASSES', { name: rememberingDbSchemaLocation, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `The database schema for this project is located in \`db/schema.sql\`.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/database schema|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`, }); }, }); const rememberingCodingStyle = "Agent remembers user's coding style preference"; evalTest('ALWAYS_PASSES', { name: rememberingCodingStyle, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `I prefer to use tabs instead of spaces for indentation.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/tabs instead of spaces|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingCodingStyle}`, }); }, }); const rememberingTestCommand = 'Agent remembers specific project test command'; evalTest('ALWAYS_PASSES', { name: rememberingTestCommand, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `The command to run all backend tests is \`npm run test:backend\`.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [ /command to run all backend tests|ok|remember|will do/i, ], testName: `${TEST_PREFIX}${rememberingTestCommand}`, }); }, }); const rememberingMainEntryPoint = "Agent remembers project's main entry point"; evalTest('ALWAYS_PASSES', { name: rememberingMainEntryPoint, params: { settings: { tools: { core: ['save_memory'] } }, }, prompt: `The main entry point for this project is \`src/index.js\`.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [ /main entry point for this project|ok|remember|will do/i, ], testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`, }); }, }); });