/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; import { assertModelHasOutput, checkModelOutputContent, } from '../integration-tests/test-helper.js'; describe('save_memory', () => { const TEST_PREFIX = 'Save memory test: '; const rememberingFavoriteColor = "Agent remembers user's favorite color"; evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, prompt: `remember that my favorite color is blue. what is my favorite color? tell me that and surround it with $ symbol`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: 'blue', testName: `${TEST_PREFIX}${rememberingFavoriteColor}`, }); }, }); const rememberingCommandRestrictions = 'Agent remembers command restrictions'; evalTest('USUALLY_PASSES', { name: rememberingCommandRestrictions, prompt: `I don't want you to ever run npm commands.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/not run npm commands|remember|ok/i], testName: `${TEST_PREFIX}${rememberingCommandRestrictions}`, }); }, }); const rememberingWorkflow = 'Agent remembers workflow preferences'; evalTest('USUALLY_PASSES', { name: rememberingWorkflow, prompt: `I want you to always lint after building.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/always|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingWorkflow}`, }); }, }); const ignoringTemporaryInformation = 'Agent ignores temporary conversation details'; evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, prompt: `I'm going to get a coffee.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); const wasToolCalled = rig .readToolLogs() .some((log) => log.toolRequest.name === 'save_memory'); expect( wasToolCalled, 'save_memory should not be called for temporary information', ).toBe(false); assertModelHasOutput(result); checkModelOutputContent(result, { testName: `${TEST_PREFIX}${ignoringTemporaryInformation}`, forbiddenContent: [/remember|will do/i], }); }, }); const rememberingPetName = "Agent remembers user's pet's name"; evalTest('ALWAYS_PASSES', { name: rememberingPetName, prompt: `Please remember that my dog's name is Buddy.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/Buddy/i], testName: `${TEST_PREFIX}${rememberingPetName}`, }); }, }); const rememberingCommandAlias = 'Agent remembers custom command aliases'; evalTest('ALWAYS_PASSES', { name: rememberingCommandAlias, prompt: `When I say 'start server', you should run 'npm run dev'.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/npm run dev|start server|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingCommandAlias}`, }); }, }); const ignoringDbSchemaLocation = "Agent ignores workspace's database schema location"; evalTest('USUALLY_PASSES', { name: ignoringDbSchemaLocation, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); const wasToolCalled = rig .readToolLogs() .some((log) => log.toolRequest.name === 'save_memory'); expect( wasToolCalled, 'save_memory should not be called for workspace-specific information', ).toBe(false); assertModelHasOutput(result); }, }); const rememberingCodingStyle = "Agent remembers user's coding style preference"; evalTest('ALWAYS_PASSES', { name: rememberingCodingStyle, prompt: `I prefer to use tabs instead of spaces for indentation.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/tabs instead of spaces|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingCodingStyle}`, }); }, }); const ignoringBuildArtifactLocation = 'Agent ignores workspace build artifact location'; evalTest('USUALLY_PASSES', { name: ignoringBuildArtifactLocation, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); const wasToolCalled = rig .readToolLogs() .some((log) => log.toolRequest.name === 'save_memory'); expect( wasToolCalled, 'save_memory should not be called for workspace-specific information', ).toBe(false); assertModelHasOutput(result); }, }); const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; evalTest('USUALLY_PASSES', { name: ignoringMainEntryPoint, prompt: `The main entry point for this workspace is \`src/index.js\`.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); const wasToolCalled = rig .readToolLogs() .some((log) => log.toolRequest.name === 'save_memory'); expect( wasToolCalled, 'save_memory should not be called for workspace-specific information', ).toBe(false); assertModelHasOutput(result); }, }); const rememberingBirthday = "Agent remembers user's birthday"; evalTest('ALWAYS_PASSES', { name: rememberingBirthday, prompt: `My birthday is on June 15th.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( true, ); assertModelHasOutput(result); checkModelOutputContent(result, { expectedContent: [/June 15th|ok|remember|will do/i], testName: `${TEST_PREFIX}${rememberingBirthday}`, }); }, }); const proactiveMemoryFromLongSession = 'Agent saves preference from earlier in conversation history'; evalTest('USUALLY_PASSES', { name: proactiveMemoryFromLongSession, params: { settings: { experimental: { memoryManager: true }, }, }, messages: [ { id: 'msg-1', type: 'user', content: [ { text: 'By the way, I always prefer Vitest over Jest for testing in all my projects.', }, ], timestamp: '2026-01-01T00:00:00Z', }, { id: 'msg-2', type: 'gemini', content: [{ text: 'Noted! What are you working on today?' }], timestamp: '2026-01-01T00:00:05Z', }, { id: 'msg-3', type: 'user', content: [ { text: "I'm debugging a failing API endpoint. The /users route returns a 500 error.", }, ], timestamp: '2026-01-01T00:01:00Z', }, { id: 'msg-4', type: 'gemini', content: [ { text: 'It looks like the database connection might not be initialized before the query runs.', }, ], timestamp: '2026-01-01T00:01:10Z', }, { id: 'msg-5', type: 'user', content: [ { text: 'Good catch — I fixed the import and the route works now.' }, ], timestamp: '2026-01-01T00:02:00Z', }, { id: 'msg-6', type: 'gemini', content: [{ text: 'Great! Anything else you would like to work on?' }], timestamp: '2026-01-01T00:02:05Z', }, ], prompt: 'Please save any persistent preferences or facts about me from our conversation to memory.', assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall( 'save_memory', undefined, (args) => /vitest/i.test(args), ); expect( wasToolCalled, 'Expected save_memory to be called with the Vitest preference from the conversation history', ).toBe(true); assertModelHasOutput(result); }, }); const memoryManagerRoutingPreferences = 'Agent routes global and project preferences to memory'; evalTest('USUALLY_PASSES', { name: memoryManagerRoutingPreferences, params: { settings: { experimental: { memoryManager: true }, }, }, messages: [ { id: 'msg-1', type: 'user', content: [ { text: 'I always use dark mode in all my editors and terminals.', }, ], timestamp: '2026-01-01T00:00:00Z', }, { id: 'msg-2', type: 'gemini', content: [{ text: 'Got it, I will keep that in mind!' }], timestamp: '2026-01-01T00:00:05Z', }, { id: 'msg-3', type: 'user', content: [ { text: 'For this project specifically, we use 2-space indentation.', }, ], timestamp: '2026-01-01T00:01:00Z', }, { id: 'msg-4', type: 'gemini', content: [ { text: 'Understood, 2-space indentation for this project.' }, ], timestamp: '2026-01-01T00:01:05Z', }, ], prompt: 'Please save the preferences I mentioned earlier to memory.', assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory to be called').toBe(true); assertModelHasOutput(result); }, }); });