From fe70052bafd72e9d0aae9ab91d0e7dd2c3c52a56 Mon Sep 17 00:00:00 2001 From: "N. Taylor Mullen" Date: Mon, 9 Feb 2026 01:06:03 -0800 Subject: [PATCH] fix(evals): update save_memory evals and simplify tool description (#18610) --- evals/save_memory.eval.ts | 117 ++++++++++++++------- packages/core/src/tools/memoryTool.test.ts | 2 +- packages/core/src/tools/memoryTool.ts | 54 ++++------ 3 files changed, 100 insertions(+), 73 deletions(-) diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index c1ab748edb..f93ffb9c5b 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -109,7 +109,7 @@ describe('save_memory', () => { params: { settings: { tools: { core: ['save_memory'] } }, }, - prompt: `My dog's name is Buddy. What is my dog's name?`, + prompt: `Please remember that my dog's name is Buddy.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( @@ -145,25 +145,34 @@ describe('save_memory', () => { }, }); - const rememberingDbSchemaLocation = - "Agent remembers project's database schema location"; + const ignoringDbSchemaLocation = + "Agent ignores workspace's database schema location"; evalTest('ALWAYS_PASSES', { - name: rememberingDbSchemaLocation, + name: ignoringDbSchemaLocation, params: { - settings: { tools: { core: ['save_memory'] } }, + settings: { + tools: { + core: [ + 'save_memory', + 'list_directory', + 'read_file', + 'run_shell_command', + ], + }, + }, }, - prompt: `The database schema for this project is located in \`db/schema.sql\`.`, + prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { - const wasToolCalled = await rig.waitForToolCall('save_memory'); - expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( - true, - ); + await rig.waitForTelemetryReady(); + const wasToolCalled = rig + .readToolLogs() + .some((log) => log.toolRequest.name === 'save_memory'); + expect( + wasToolCalled, + 'save_memory should not be called for workspace-specific information', + ).toBe(false); assertModelHasOutput(result); - checkModelOutputContent(result, { - expectedContent: [/database schema|ok|remember|will do/i], - testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`, - }); }, }); @@ -189,38 +198,74 @@ describe('save_memory', () => { }, }); - const rememberingTestCommand = - 'Agent remembers specific project test command'; + const ignoringBuildArtifactLocation = + 'Agent ignores workspace build artifact location'; evalTest('ALWAYS_PASSES', { - name: rememberingTestCommand, + name: ignoringBuildArtifactLocation, params: { - settings: { tools: { core: ['save_memory'] } }, + settings: { + tools: { + core: [ + 'save_memory', + 'list_directory', + 'read_file', + 'run_shell_command', + ], + }, + }, }, - prompt: `The command to run all backend tests is \`npm run test:backend\`.`, + prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { - const wasToolCalled = await rig.waitForToolCall('save_memory'); - expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( - true, - ); + await rig.waitForTelemetryReady(); + const wasToolCalled = rig + .readToolLogs() + .some((log) => log.toolRequest.name === 'save_memory'); + expect( + wasToolCalled, + 'save_memory should not be called for workspace-specific information', + ).toBe(false); assertModelHasOutput(result); - checkModelOutputContent(result, { - expectedContent: [ - /command to run all backend tests|ok|remember|will do/i, - ], - testName: `${TEST_PREFIX}${rememberingTestCommand}`, - }); }, }); - const rememberingMainEntryPoint = - "Agent remembers project's main entry point"; + const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; evalTest('ALWAYS_PASSES', { - name: rememberingMainEntryPoint, + name: ignoringMainEntryPoint, + params: { + settings: { + tools: { + core: [ + 'save_memory', + 'list_directory', + 'read_file', + 'run_shell_command', + ], + }, + }, + }, + prompt: `The main entry point for this workspace is \`src/index.js\`.`, + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const wasToolCalled = rig + .readToolLogs() + .some((log) => log.toolRequest.name === 'save_memory'); + expect( + wasToolCalled, + 'save_memory should not be called for workspace-specific information', + ).toBe(false); + + assertModelHasOutput(result); + }, + }); + + const rememberingBirthday = "Agent remembers user's birthday"; + evalTest('ALWAYS_PASSES', { + name: rememberingBirthday, params: { settings: { tools: { core: ['save_memory'] } }, }, - prompt: `The main entry point for this project is \`src/index.js\`.`, + prompt: `My birthday is on June 15th.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( @@ -229,10 +274,8 @@ describe('save_memory', () => { assertModelHasOutput(result); checkModelOutputContent(result, { - expectedContent: [ - /main entry point for this project|ok|remember|will do/i, - ], - testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`, + expectedContent: [/June 15th|ok|remember|will do/i], + testName: `${TEST_PREFIX}${rememberingBirthday}`, }); }, }); diff --git a/packages/core/src/tools/memoryTool.test.ts b/packages/core/src/tools/memoryTool.test.ts index 6a3e03d8e5..654b5943c4 100644 --- a/packages/core/src/tools/memoryTool.test.ts +++ b/packages/core/src/tools/memoryTool.test.ts @@ -102,7 +102,7 @@ describe('MemoryTool', () => { expect(memoryTool.name).toBe('save_memory'); expect(memoryTool.displayName).toBe('SaveMemory'); expect(memoryTool.description).toContain( - 'Saves a specific piece of information', + 'Saves concise global user context', ); expect(memoryTool.schema).toBeDefined(); expect(memoryTool.schema.name).toBe('save_memory'); diff --git a/packages/core/src/tools/memoryTool.ts b/packages/core/src/tools/memoryTool.ts index cd23dffb34..4cc3014357 100644 --- a/packages/core/src/tools/memoryTool.ts +++ b/packages/core/src/tools/memoryTool.ts @@ -11,7 +11,6 @@ import { Kind, ToolConfirmationOutcome, } from './tools.js'; -import type { FunctionDeclaration } from '@google/genai'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { Storage } from '../config/storage.js'; @@ -26,41 +25,14 @@ import { ToolErrorType } from './tool-error.js'; import { MEMORY_TOOL_NAME } from './tool-names.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; -const memoryToolSchemaData: FunctionDeclaration = { - name: MEMORY_TOOL_NAME, - description: - 'Saves a specific piece of information, fact, or user preference to your long-term memory. Use this when the user explicitly asks you to remember something, or when they state a clear, concise fact or preference that seems important to retain for future interactions. Examples: "Always lint after building", "Never run sudo commands", "Remember my address".', - parametersJsonSchema: { - type: 'object', - properties: { - fact: { - type: 'string', - description: - 'The specific fact or piece of information to remember. Should be a clear, self-contained statement.', - }, - }, - required: ['fact'], - additionalProperties: false, - }, -}; - const memoryToolDescription = ` -Saves a specific piece of information or fact to your long-term memory. +Saves concise global user context (preferences, facts) for use across ALL workspaces. -Use this tool: +### CRITICAL: GLOBAL CONTEXT ONLY +NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces. -- When the user explicitly asks you to remember something (e.g., "Remember that I like pineapple on pizza", "Please save this: my cat's name is Whiskers"). -- When the user states a clear, concise fact about themselves, their preferences, or their environment that seems important for you to retain for future interactions to provide a more personalized and effective assistance. - -Do NOT use this tool: - -- To remember conversational context that is only relevant for the current session. -- To save long, complex, or rambling pieces of text. The fact should be relatively short and to the point. -- If you are unsure whether the information is a fact worth remembering long-term. If in doubt, you can ask the user, "Should I remember that for you?" - -## Parameters - -- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".`; +- Use for "Remember X" or clear personal facts. +- Do NOT use for session context.`; export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md'; export const MEMORY_SECTION_HEADER = '## Gemini Added Memories'; @@ -313,9 +285,21 @@ export class MemoryTool super( MemoryTool.Name, 'SaveMemory', - memoryToolDescription, + memoryToolDescription + + ' Examples: "Always lint after building", "Never run sudo commands", "Remember my address".', Kind.Think, - memoryToolSchemaData.parametersJsonSchema as Record, + { + type: 'object', + properties: { + fact: { + type: 'string', + description: + 'The specific fact or piece of information to remember. Should be a clear, self-contained statement.', + }, + }, + required: ['fact'], + additionalProperties: false, + }, messageBus, true, false,