mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
fix(evals): update save_memory evals and simplify tool description (#18610)
This commit is contained in:
@@ -109,7 +109,7 @@ describe('save_memory', () => {
|
|||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
},
|
},
|
||||||
prompt: `My dog's name is Buddy. What is my dog's name?`,
|
prompt: `Please remember that my dog's name is Buddy.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
||||||
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
||||||
@@ -145,25 +145,34 @@ describe('save_memory', () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const rememberingDbSchemaLocation =
|
const ignoringDbSchemaLocation =
|
||||||
"Agent remembers project's database schema location";
|
"Agent ignores workspace's database schema location";
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('ALWAYS_PASSES', {
|
||||||
name: rememberingDbSchemaLocation,
|
name: ignoringDbSchemaLocation,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: {
|
||||||
|
tools: {
|
||||||
|
core: [
|
||||||
|
'save_memory',
|
||||||
|
'list_directory',
|
||||||
|
'read_file',
|
||||||
|
'run_shell_command',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
prompt: `The database schema for this project is located in \`db/schema.sql\`.`,
|
prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
await rig.waitForTelemetryReady();
|
||||||
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
const wasToolCalled = rig
|
||||||
true,
|
.readToolLogs()
|
||||||
);
|
.some((log) => log.toolRequest.name === 'save_memory');
|
||||||
|
expect(
|
||||||
|
wasToolCalled,
|
||||||
|
'save_memory should not be called for workspace-specific information',
|
||||||
|
).toBe(false);
|
||||||
|
|
||||||
assertModelHasOutput(result);
|
assertModelHasOutput(result);
|
||||||
checkModelOutputContent(result, {
|
|
||||||
expectedContent: [/database schema|ok|remember|will do/i],
|
|
||||||
testName: `${TEST_PREFIX}${rememberingDbSchemaLocation}`,
|
|
||||||
});
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -189,38 +198,74 @@ describe('save_memory', () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const rememberingTestCommand =
|
const ignoringBuildArtifactLocation =
|
||||||
'Agent remembers specific project test command';
|
'Agent ignores workspace build artifact location';
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('ALWAYS_PASSES', {
|
||||||
name: rememberingTestCommand,
|
name: ignoringBuildArtifactLocation,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: {
|
||||||
|
tools: {
|
||||||
|
core: [
|
||||||
|
'save_memory',
|
||||||
|
'list_directory',
|
||||||
|
'read_file',
|
||||||
|
'run_shell_command',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
prompt: `The command to run all backend tests is \`npm run test:backend\`.`,
|
prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
await rig.waitForTelemetryReady();
|
||||||
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
const wasToolCalled = rig
|
||||||
true,
|
.readToolLogs()
|
||||||
);
|
.some((log) => log.toolRequest.name === 'save_memory');
|
||||||
|
expect(
|
||||||
|
wasToolCalled,
|
||||||
|
'save_memory should not be called for workspace-specific information',
|
||||||
|
).toBe(false);
|
||||||
|
|
||||||
assertModelHasOutput(result);
|
assertModelHasOutput(result);
|
||||||
checkModelOutputContent(result, {
|
|
||||||
expectedContent: [
|
|
||||||
/command to run all backend tests|ok|remember|will do/i,
|
|
||||||
],
|
|
||||||
testName: `${TEST_PREFIX}${rememberingTestCommand}`,
|
|
||||||
});
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const rememberingMainEntryPoint =
|
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
|
||||||
"Agent remembers project's main entry point";
|
|
||||||
evalTest('ALWAYS_PASSES', {
|
evalTest('ALWAYS_PASSES', {
|
||||||
name: rememberingMainEntryPoint,
|
name: ignoringMainEntryPoint,
|
||||||
|
params: {
|
||||||
|
settings: {
|
||||||
|
tools: {
|
||||||
|
core: [
|
||||||
|
'save_memory',
|
||||||
|
'list_directory',
|
||||||
|
'read_file',
|
||||||
|
'run_shell_command',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
prompt: `The main entry point for this workspace is \`src/index.js\`.`,
|
||||||
|
assert: async (rig, result) => {
|
||||||
|
await rig.waitForTelemetryReady();
|
||||||
|
const wasToolCalled = rig
|
||||||
|
.readToolLogs()
|
||||||
|
.some((log) => log.toolRequest.name === 'save_memory');
|
||||||
|
expect(
|
||||||
|
wasToolCalled,
|
||||||
|
'save_memory should not be called for workspace-specific information',
|
||||||
|
).toBe(false);
|
||||||
|
|
||||||
|
assertModelHasOutput(result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const rememberingBirthday = "Agent remembers user's birthday";
|
||||||
|
evalTest('ALWAYS_PASSES', {
|
||||||
|
name: rememberingBirthday,
|
||||||
params: {
|
params: {
|
||||||
settings: { tools: { core: ['save_memory'] } },
|
settings: { tools: { core: ['save_memory'] } },
|
||||||
},
|
},
|
||||||
prompt: `The main entry point for this project is \`src/index.js\`.`,
|
prompt: `My birthday is on June 15th.`,
|
||||||
assert: async (rig, result) => {
|
assert: async (rig, result) => {
|
||||||
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
const wasToolCalled = await rig.waitForToolCall('save_memory');
|
||||||
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
expect(wasToolCalled, 'Expected save_memory tool to be called').toBe(
|
||||||
@@ -229,10 +274,8 @@ describe('save_memory', () => {
|
|||||||
|
|
||||||
assertModelHasOutput(result);
|
assertModelHasOutput(result);
|
||||||
checkModelOutputContent(result, {
|
checkModelOutputContent(result, {
|
||||||
expectedContent: [
|
expectedContent: [/June 15th|ok|remember|will do/i],
|
||||||
/main entry point for this project|ok|remember|will do/i,
|
testName: `${TEST_PREFIX}${rememberingBirthday}`,
|
||||||
],
|
|
||||||
testName: `${TEST_PREFIX}${rememberingMainEntryPoint}`,
|
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ describe('MemoryTool', () => {
|
|||||||
expect(memoryTool.name).toBe('save_memory');
|
expect(memoryTool.name).toBe('save_memory');
|
||||||
expect(memoryTool.displayName).toBe('SaveMemory');
|
expect(memoryTool.displayName).toBe('SaveMemory');
|
||||||
expect(memoryTool.description).toContain(
|
expect(memoryTool.description).toContain(
|
||||||
'Saves a specific piece of information',
|
'Saves concise global user context',
|
||||||
);
|
);
|
||||||
expect(memoryTool.schema).toBeDefined();
|
expect(memoryTool.schema).toBeDefined();
|
||||||
expect(memoryTool.schema.name).toBe('save_memory');
|
expect(memoryTool.schema.name).toBe('save_memory');
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import {
|
|||||||
Kind,
|
Kind,
|
||||||
ToolConfirmationOutcome,
|
ToolConfirmationOutcome,
|
||||||
} from './tools.js';
|
} from './tools.js';
|
||||||
import type { FunctionDeclaration } from '@google/genai';
|
|
||||||
import * as fs from 'node:fs/promises';
|
import * as fs from 'node:fs/promises';
|
||||||
import * as path from 'node:path';
|
import * as path from 'node:path';
|
||||||
import { Storage } from '../config/storage.js';
|
import { Storage } from '../config/storage.js';
|
||||||
@@ -26,41 +25,14 @@ import { ToolErrorType } from './tool-error.js';
|
|||||||
import { MEMORY_TOOL_NAME } from './tool-names.js';
|
import { MEMORY_TOOL_NAME } from './tool-names.js';
|
||||||
import type { MessageBus } from '../confirmation-bus/message-bus.js';
|
import type { MessageBus } from '../confirmation-bus/message-bus.js';
|
||||||
|
|
||||||
const memoryToolSchemaData: FunctionDeclaration = {
|
|
||||||
name: MEMORY_TOOL_NAME,
|
|
||||||
description:
|
|
||||||
'Saves a specific piece of information, fact, or user preference to your long-term memory. Use this when the user explicitly asks you to remember something, or when they state a clear, concise fact or preference that seems important to retain for future interactions. Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
|
|
||||||
parametersJsonSchema: {
|
|
||||||
type: 'object',
|
|
||||||
properties: {
|
|
||||||
fact: {
|
|
||||||
type: 'string',
|
|
||||||
description:
|
|
||||||
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
required: ['fact'],
|
|
||||||
additionalProperties: false,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const memoryToolDescription = `
|
const memoryToolDescription = `
|
||||||
Saves a specific piece of information or fact to your long-term memory.
|
Saves concise global user context (preferences, facts) for use across ALL workspaces.
|
||||||
|
|
||||||
Use this tool:
|
### CRITICAL: GLOBAL CONTEXT ONLY
|
||||||
|
NEVER save workspace-specific context, local paths, or commands (e.g. "The entry point is src/index.js", "The test command is npm test"). These are local to the current workspace and must NOT be saved globally. EXCLUSIVELY for context relevant across ALL workspaces.
|
||||||
|
|
||||||
- When the user explicitly asks you to remember something (e.g., "Remember that I like pineapple on pizza", "Please save this: my cat's name is Whiskers").
|
- Use for "Remember X" or clear personal facts.
|
||||||
- When the user states a clear, concise fact about themselves, their preferences, or their environment that seems important for you to retain for future interactions to provide a more personalized and effective assistance.
|
- Do NOT use for session context.`;
|
||||||
|
|
||||||
Do NOT use this tool:
|
|
||||||
|
|
||||||
- To remember conversational context that is only relevant for the current session.
|
|
||||||
- To save long, complex, or rambling pieces of text. The fact should be relatively short and to the point.
|
|
||||||
- If you are unsure whether the information is a fact worth remembering long-term. If in doubt, you can ask the user, "Should I remember that for you?"
|
|
||||||
|
|
||||||
## Parameters
|
|
||||||
|
|
||||||
- \`fact\` (string, required): The specific fact or piece of information to remember. This should be a clear, self-contained statement. For example, if the user says "My favorite color is blue", the fact would be "My favorite color is blue".`;
|
|
||||||
|
|
||||||
export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md';
|
export const DEFAULT_CONTEXT_FILENAME = 'GEMINI.md';
|
||||||
export const MEMORY_SECTION_HEADER = '## Gemini Added Memories';
|
export const MEMORY_SECTION_HEADER = '## Gemini Added Memories';
|
||||||
@@ -313,9 +285,21 @@ export class MemoryTool
|
|||||||
super(
|
super(
|
||||||
MemoryTool.Name,
|
MemoryTool.Name,
|
||||||
'SaveMemory',
|
'SaveMemory',
|
||||||
memoryToolDescription,
|
memoryToolDescription +
|
||||||
|
' Examples: "Always lint after building", "Never run sudo commands", "Remember my address".',
|
||||||
Kind.Think,
|
Kind.Think,
|
||||||
memoryToolSchemaData.parametersJsonSchema as Record<string, unknown>,
|
{
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
fact: {
|
||||||
|
type: 'string',
|
||||||
|
description:
|
||||||
|
'The specific fact or piece of information to remember. Should be a clear, self-contained statement.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['fact'],
|
||||||
|
additionalProperties: false,
|
||||||
|
},
|
||||||
messageBus,
|
messageBus,
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
|
|||||||
Reference in New Issue
Block a user