fix(evals): update save_memory evals and simplify tool description (#18610)

This commit is contained in:
N. Taylor Mullen
2026-02-09 01:06:03 -08:00
committed by GitHub
parent 8cbe851339
commit fe70052baf
3 changed files with 100 additions and 73 deletions

View File

@@ -109,7 +109,7 @@ describe('save_memory', () => {
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
}, },
prompt: `My dog's name is Buddy. What is my dog's name?`, prompt: `Please remember that my dog's name is Buddy.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory'); const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
@@ -145,25 +145,34 @@ describe('save_memory', () => {
}, },
}); });
const rememberingDbSchemaLocation = const ignoringDbSchemaLocation =
"Agent remembers project's database schema location"; "Agent ignores workspace's database schema location";
evalTest('ALWAYS_PASSES', { evalTest('ALWAYS_PASSES', {
name: rememberingDbSchemaLocation, name: ignoringDbSchemaLocation,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: {
tools: {
core: [
'save_memory',
'list_directory',
'read_file',
'run_shell_command',
],
},
},
}, },
prompt: `The database schema for this project is located in \`db/schema.sql\`.`, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory'); await rig.waitForTelemetryReady();
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( const wasToolCalled = rig
true, .readToolLogs()
); .some((log) => log.toolRequest.name === 'save_memory');
expect(
wasToolCalled,
'save_memory should not be called for workspace-specific information',
).toBe(false);
assertModelHasOutput(result); assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [/database schema|ok|remember|will do/i],
testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
});
}, },
}); });
@@ -189,38 +198,74 @@ describe('save_memory', () => {
}, },
}); });
const rememberingTestCommand = const ignoringBuildArtifactLocation =
'Agent remembers specific project test command'; 'Agent ignores workspace build artifact location';
evalTest('ALWAYS_PASSES', { evalTest('ALWAYS_PASSES', {
name: rememberingTestCommand, name: ignoringBuildArtifactLocation,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: {
tools: {
core: [
'save_memory',
'list_directory',
'read_file',
'run_shell_command',
],
},
},
}, },
prompt: `The command to run all backend tests is \`npm run test:backend\`.`, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory'); await rig.waitForTelemetryReady();
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( const wasToolCalled = rig
true, .readToolLogs()
); .some((log) => log.toolRequest.name === 'save_memory');
expect(
wasToolCalled,
'save_memory should not be called for workspace-specific information',
).toBe(false);
assertModelHasOutput(result); assertModelHasOutput(result);
checkModelOutputContent(result, {
expectedContent: [
/command to run all backend tests|ok|remember|will do/i,
],
testName: `${TEST_PREFIX}${rememberingTestCommand}`,
});
}, },
}); });
const rememberingMainEntryPoint = const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
"Agent remembers project's main entry point";
evalTest('ALWAYS_PASSES', { evalTest('ALWAYS_PASSES', {
name: rememberingMainEntryPoint, name: ignoringMainEntryPoint,
params: {
settings: {
tools: {
core: [
'save_memory',
'list_directory',
'read_file',
'run_shell_command',
],
},
},
},
prompt: `The main entry point for this workspace is \`src/index.js\`.`,
assert: async (rig, result) => {
await rig.waitForTelemetryReady();
const wasToolCalled = rig
.readToolLogs()
.some((log) => log.toolRequest.name === 'save_memory');
expect(
wasToolCalled,
'save_memory should not be called for workspace-specific information',
).toBe(false);
assertModelHasOutput(result);
},
});
const rememberingBirthday = "Agent remembers user's birthday";
evalTest('ALWAYS_PASSES', {
name: rememberingBirthday,
params: { params: {
settings: { tools: { core: ['save_memory'] } }, settings: { tools: { core: ['save_memory'] } },
}, },
prompt: `The main entry point for this project is \`src/index.js\`.`, prompt: `My birthday is on June 15th.`,
assert: async (rig, result) => { assert: async (rig, result) => {
const wasToolCalled = await rig.waitForToolCall('save_memory'); const wasToolCalled = await rig.waitForToolCall('save_memory');
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe( expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
@@ -229,10 +274,8 @@ describe('save_memory', () => {
assertModelHasOutput(result); assertModelHasOutput(result);
checkModelOutputContent(result, { checkModelOutputContent(result, {
expectedContent: [ expectedContent: [/June 15th|ok|remember|will do/i],
/main entry point for this project|ok|remember|will do/i, testName: `${TEST_PREFIX}${rememberingBirthday}`,
],
testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
}); });
}, },
}); });

View File

@@ -102,7 +102,7 @@ describe('MemoryTool', () => {
expect(memoryTool.name).toBe('save_memory'); expect(memoryTool.name).toBe('save_memory');
expect(memoryTool.displayName).toBe('SaveMemory'); expect(memoryTool.displayName).toBe('SaveMemory');
expect(memoryTool.description).toContain( expect(memoryTool.description).toContain(
'Saves a specific piece of information', 'Saves concise global user context',
); );
expect(memoryTool.schema).toBeDefined(); expect(memoryTool.schema).toBeDefined();
expect(memoryTool.schema.name).toBe('save_memory'); expect(memoryTool.schema.name).toBe('save_memory');

View File

@@ -11,7 +11,6 @@ import {
Kind, Kind,
ToolConfirmationOutcome, ToolConfirmationOutcome,
} from './tools.js'; } from './tools.js';
import type { FunctionDeclaration } from '@google/genai';
import * as fs from 'node:fs/promises'; import * as fs from 'node:fs/promises';
import * as path from 'node:path'; import * as path from 'node:path';
import { Storage } from '../config/storage.js'; import { Storage } from '../config/storage.js';
@@ -26,41 +25,14 @@ import { ToolErrorType } from './tool-error.js';
import { MEMORY_TOOL_NAME } from './tool-names.js'; import { MEMORY_TOOL_NAME } from './tool-names.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js';
const memoryToolSchemaData: FunctionDeclaration = {
name: MEMORY_TOOL_NAME,
description:
'Saves a specific piece of information, fact, or user preference to your long-term memory. Use this when the user explicitly asks you to remember something, or when they state a clear, concise fact or preference that seems important to retain for future interactions. Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
parametersJsonSchema: {
type: 'object',
properties: {
fact: {
type: 'string',
description:
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
},
},
required: ['fact'],
additionalProperties: false,
},
};
const memoryToolDescription = ` const memoryToolDescription = `
Saves a specific piece of information or fact to your long-term memory. Saves concise global user context (preferences, facts) for use across ALL workspaces.
Use this tool: ### CRITICAL: GLOBAL CONTEXT ONLY
NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces.
- When the user explicitly asks you to remember something (e.g., "Remember that I like pineapple on pizza", "Please save this: my cat's name is Whiskers"). - Use for "Remember X" or clear personal facts.
- When the user states a clear, concise fact about themselves, their preferences, or their environment that seems important for you to retain for future interactions to provide a more personalized and effective assistance. - Do NOT use for session context.`;
Do NOT use this tool:
- To remember conversational context that is only relevant for the current session.
- To save long, complex, or rambling pieces of text. The fact should be relatively short and to the point.
- If you are unsure whether the information is a fact worth remembering long-term. If in doubt, you can ask the user, "Should I remember that for you?"
## Parameters
- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".`;
export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md'; export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md';
export const MEMORY_SECTION_HEADER = '## Gemini Added Memories'; export const MEMORY_SECTION_HEADER = '## Gemini Added Memories';
@@ -313,9 +285,21 @@ export class MemoryTool
super( super(
MemoryTool.Name, MemoryTool.Name,
'SaveMemory', 'SaveMemory',
memoryToolDescription, memoryToolDescription +
' Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
Kind.Think, Kind.Think,
memoryToolSchemaData.parametersJsonSchema as Record<string, unknown>, {
type: 'object',
properties: {
fact: {
type: 'string',
description:
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
},
},
required: ['fact'],
additionalProperties: false,
},
messageBus, messageBus,
true, true,
false, false,