From 33f630111f97e3d31ec09719739757d50cbbeb5c Mon Sep 17 00:00:00 2001 From: Sandy Tao Date: Thu, 19 Mar 2026 12:57:52 -0700 Subject: [PATCH] feat(core): add experimental memory manager agent to replace save_memory tool (#22726) Co-authored-by: Christian Gunderman --- .gemini/settings.json | 3 +- docs/cli/settings.md | 1 + docs/reference/configuration.md | 7 + packages/cli/src/config/config.ts | 1 + .../config/policy-engine.integration.test.ts | 6 +- packages/cli/src/config/settingsSchema.ts | 10 ++ packages/cli/src/ui/AppContainer.tsx | 14 +- .../src/agents/memory-manager-agent.test.ts | 153 +++++++++++++++++ .../core/src/agents/memory-manager-agent.ts | 156 ++++++++++++++++++ packages/core/src/agents/registry.ts | 19 +++ packages/core/src/config/config.test.ts | 29 ++++ packages/core/src/config/config.ts | 15 +- .../core/src/config/path-validation.test.ts | 68 ++++++++ .../core/__snapshots__/prompts.test.ts.snap | 20 +-- packages/core/src/core/client.test.ts | 19 ++- packages/core/src/core/client.ts | 6 + packages/core/src/core/prompts.test.ts | 2 + .../src/policy/memory-manager-policy.test.ts | 119 +++++++++++++ .../src/policy/policies/memory-manager.toml | 10 ++ .../core/src/prompts/promptProvider.test.ts | 1 + packages/core/src/prompts/promptProvider.ts | 1 + .../prompts/snippets-memory-manager.test.ts | 34 ++++ packages/core/src/prompts/snippets.legacy.ts | 7 +- packages/core/src/prompts/snippets.ts | 5 + packages/core/src/scheduler/scheduler.ts | 2 + packages/core/src/utils/toolCallContext.ts | 2 + schemas/settings.schema.json | 7 + 27 files changed, 696 insertions(+), 21 deletions(-) create mode 100644 packages/core/src/agents/memory-manager-agent.test.ts create mode 100644 packages/core/src/agents/memory-manager-agent.ts create mode 100644 packages/core/src/config/path-validation.test.ts create mode 100644 packages/core/src/policy/memory-manager-policy.test.ts create mode 100644 packages/core/src/policy/policies/memory-manager.toml create mode 100644 packages/core/src/prompts/snippets-memory-manager.test.ts diff --git a/.gemini/settings.json b/.gemini/settings.json index 1a4c889066..9051dc78de 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -2,7 +2,8 @@ "experimental": { "plan": true, "extensionReloading": true, - "modelSteering": true + "modelSteering": true, + "memoryManager": true }, "general": { "devtools": true diff --git a/docs/cli/settings.md b/docs/cli/settings.md index eb9ba4158e..9b08867cc4 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -152,6 +152,7 @@ they appear in the UI. | Plan | `experimental.plan` | Enable Plan Mode. | `true` | | Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | | Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | | Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 7df1de61f1..f57fd40747 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1431,6 +1431,13 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `"gemma3-1b-gpu-custom"` - **Requires restart:** Yes +- **`experimental.memoryManager`** (boolean): + - **Description:** Replace the built-in save_memory tool with a memory manager + subagent that supports adding, removing, de-duplicating, and organizing + memories. + - **Default:** `false` + - **Requires restart:** Yes + - **`experimental.topicUpdateNarration`** (boolean): - **Description:** Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 80c1e19443..777950c0ca 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -840,6 +840,7 @@ export async function loadCliConfig( skillsSupport: settings.skills?.enabled ?? true, disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, + experimentalMemoryManager: settings.experimental?.memoryManager, modelSteering: settings.experimental?.modelSteering, topicUpdateNarration: settings.experimental?.topicUpdateNarration, toolOutputMasking: settings.experimental?.toolOutputMasking, diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 847b47bbe3..2e74a28201 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -516,7 +516,9 @@ describe('Policy Engine Integration Tests', () => { ); expect(mcpServerRule?.priority).toBe(4.1); // MCP allowed server - const readOnlyToolRule = rules.find((r) => r.toolName === 'glob'); + const readOnlyToolRule = rules.find( + (r) => r.toolName === 'glob' && !r.subagent, + ); // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5); @@ -673,7 +675,7 @@ describe('Policy Engine Integration Tests', () => { const server1Rule = rules.find((r) => r.toolName === 'mcp_server1_*'); expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) - const globRule = rules.find((r) => r.toolName === 'glob'); + const globRule = rules.find((r) => r.toolName === 'glob' && !r.subagent); // Priority 70 in default tier → 1.07 expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 8a107c4d47..ea6b9f9239 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -2045,6 +2045,16 @@ const SETTINGS_SCHEMA = { }, }, }, + memoryManager: { + type: 'boolean', + label: 'Memory Manager Agent', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.', + showInDialog: true, + }, topicUpdateNarration: { type: 'boolean', label: 'Topic & Update Narration', diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 07edb72642..9d05f54347 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1007,10 +1007,18 @@ Logging in with Google... Restarting Gemini CLI to continue. Date.now(), ); try { - const { memoryContent, fileCount } = - await refreshServerHierarchicalMemory(config); + let flattenedMemory: string; + let fileCount: number; - const flattenedMemory = flattenMemory(memoryContent); + if (config.isJitContextEnabled()) { + await config.getContextManager()?.refresh(); + flattenedMemory = flattenMemory(config.getUserMemory()); + fileCount = config.getGeminiMdFileCount(); + } else { + const result = await refreshServerHierarchicalMemory(config); + flattenedMemory = flattenMemory(result.memoryContent); + fileCount = result.fileCount; + } historyManager.addItem( { diff --git a/packages/core/src/agents/memory-manager-agent.test.ts b/packages/core/src/agents/memory-manager-agent.test.ts new file mode 100644 index 0000000000..c4f9879e8f --- /dev/null +++ b/packages/core/src/agents/memory-manager-agent.test.ts @@ -0,0 +1,153 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { MemoryManagerAgent } from './memory-manager-agent.js'; +import { + ASK_USER_TOOL_NAME, + EDIT_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, + WRITE_FILE_TOOL_NAME, +} from '../tools/tool-names.js'; +import { Storage } from '../config/storage.js'; +import type { Config } from '../config/config.js'; +import type { HierarchicalMemory } from '../config/memory.js'; + +function createMockConfig(memory: string | HierarchicalMemory = ''): Config { + return { + getUserMemory: vi.fn().mockReturnValue(memory), + } as unknown as Config; +} + +describe('MemoryManagerAgent', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should have the correct name "save_memory"', () => { + const agent = MemoryManagerAgent(createMockConfig()); + expect(agent.name).toBe('save_memory'); + }); + + it('should be a local agent', () => { + const agent = MemoryManagerAgent(createMockConfig()); + expect(agent.kind).toBe('local'); + }); + + it('should have a description', () => { + const agent = MemoryManagerAgent(createMockConfig()); + expect(agent.description).toBeTruthy(); + expect(agent.description).toContain('memory'); + }); + + it('should have a system prompt with memory management instructions', () => { + const agent = MemoryManagerAgent(createMockConfig()); + const prompt = agent.promptConfig.systemPrompt; + const globalGeminiDir = Storage.getGlobalGeminiDir(); + expect(prompt).toContain(`Global (${globalGeminiDir}`); + expect(prompt).toContain('Project (./'); + expect(prompt).toContain('Memory Hierarchy'); + expect(prompt).toContain('De-duplicating'); + expect(prompt).toContain('Adding'); + expect(prompt).toContain('Removing stale entries'); + expect(prompt).toContain('Organizing'); + expect(prompt).toContain('Routing'); + }); + + it('should have efficiency guidelines in the system prompt', () => { + const agent = MemoryManagerAgent(createMockConfig()); + const prompt = agent.promptConfig.systemPrompt; + expect(prompt).toContain('Efficiency & Performance'); + expect(prompt).toContain('Use as few turns as possible'); + expect(prompt).toContain('Do not perform any exploration'); + expect(prompt).toContain('Be strategic with your thinking'); + expect(prompt).toContain('Context Awareness'); + }); + + it('should inject hierarchical memory into initial context', () => { + const config = createMockConfig({ + global: + '--- Context from: ../../.gemini/GEMINI.md ---\nglobal context\n--- End of Context from: ../../.gemini/GEMINI.md ---', + project: + '--- Context from: .gemini/GEMINI.md ---\nproject context\n--- End of Context from: .gemini/GEMINI.md ---', + }); + + const agent = MemoryManagerAgent(config); + const query = agent.promptConfig.query; + + expect(query).toContain('# Initial Context'); + expect(query).toContain('global context'); + expect(query).toContain('project context'); + }); + + it('should inject flat string memory into initial context', () => { + const config = createMockConfig('flat memory content'); + + const agent = MemoryManagerAgent(config); + const query = agent.promptConfig.query; + + expect(query).toContain('# Initial Context'); + expect(query).toContain('flat memory content'); + }); + + it('should exclude extension memory from initial context', () => { + const config = createMockConfig({ + global: 'global context', + extension: 'extension context that should be excluded', + project: 'project context', + }); + + const agent = MemoryManagerAgent(config); + const query = agent.promptConfig.query; + + expect(query).toContain('global context'); + expect(query).toContain('project context'); + expect(query).not.toContain('extension context'); + }); + + it('should not include initial context when memory is empty', () => { + const agent = MemoryManagerAgent(createMockConfig()); + const query = agent.promptConfig.query; + + expect(query).not.toContain('# Initial Context'); + }); + + it('should have file-management and search tools', () => { + const agent = MemoryManagerAgent(createMockConfig()); + expect(agent.toolConfig).toBeDefined(); + expect(agent.toolConfig!.tools).toEqual( + expect.arrayContaining([ + READ_FILE_TOOL_NAME, + EDIT_TOOL_NAME, + WRITE_FILE_TOOL_NAME, + LS_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + ASK_USER_TOOL_NAME, + ]), + ); + }); + + it('should require a "request" input parameter', () => { + const agent = MemoryManagerAgent(createMockConfig()); + const schema = agent.inputConfig.inputSchema as Record; + expect(schema).toBeDefined(); + expect(schema['properties']).toHaveProperty('request'); + expect(schema['required']).toContain('request'); + }); + + it('should use a fast model', () => { + const agent = MemoryManagerAgent(createMockConfig()); + expect(agent.modelConfig.model).toBe('flash'); + }); +}); diff --git a/packages/core/src/agents/memory-manager-agent.ts b/packages/core/src/agents/memory-manager-agent.ts new file mode 100644 index 0000000000..1687da6d1f --- /dev/null +++ b/packages/core/src/agents/memory-manager-agent.ts @@ -0,0 +1,156 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { z } from 'zod'; +import type { LocalAgentDefinition } from './types.js'; +import { + ASK_USER_TOOL_NAME, + EDIT_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, + WRITE_FILE_TOOL_NAME, +} from '../tools/tool-names.js'; +import { Storage } from '../config/storage.js'; +import { flattenMemory } from '../config/memory.js'; +import { GEMINI_MODEL_ALIAS_FLASH } from '../config/models.js'; +import type { Config } from '../config/config.js'; + +const MemoryManagerSchema = z.object({ + response: z + .string() + .describe('A summary of the memory operations performed.'), +}); + +/** + * A memory management agent that replaces the built-in save_memory tool. + * It provides richer memory operations: adding, removing, de-duplicating, + * and organizing memories in the global GEMINI.md file. + * + * Users can override this agent by placing a custom save_memory.md + * in ~/.gemini/agents/ or .gemini/agents/. + */ +export const MemoryManagerAgent = ( + config: Config, +): LocalAgentDefinition => { + const globalGeminiDir = Storage.getGlobalGeminiDir(); + + const getInitialContext = (): string => { + const memory = config.getUserMemory(); + // Only include global and project memory — extension memory is read-only + // and not relevant to the memory manager. + const content = + typeof memory === 'string' + ? memory + : flattenMemory({ global: memory.global, project: memory.project }); + if (!content.trim()) return ''; + return `\n# Initial Context\n\n${content}\n`; + }; + + const buildSystemPrompt = (): string => + ` +You are a memory management agent maintaining user memories in GEMINI.md files. + +# Memory Hierarchy + +## Global (${globalGeminiDir}) +- \`${globalGeminiDir}/GEMINI.md\` — Cross-project user preferences, key personal info, + and habits that apply everywhere. + +## Project (./) +- \`./GEMINI.md\` — **Table of Contents** for project-specific context: + architecture decisions, conventions, key contacts, and references to + subdirectory GEMINI.md files for detailed context. +- Subdirectory GEMINI.md files (e.g. \`src/GEMINI.md\`, \`docs/GEMINI.md\`) — + detailed, domain-specific context for that part of the project. Reference + these from the root \`./GEMINI.md\`. + +## Routing + +When adding a memory, route it to the right store: +- **Global**: User preferences, personal info, tool aliases, cross-project habits → **global** +- **Project Root**: Project architecture, conventions, workflows, team info → **project root** +- **Subdirectory**: Detailed context about a specific module or directory → **subdirectory + GEMINI.md**, with a reference added to the project root + +- **Ambiguity**: If a memory (like a coding preference or workflow) could be interpreted as either a global habit or a project-specific convention, you **MUST** use \`${ASK_USER_TOOL_NAME}\` to clarify the user's intent. Do NOT make a unilateral decision when ambiguity exists between Global and Project stores. + +# Operations + +1. **Adding** — Route to the correct store and file. Check for duplicates in your provided context first. +2. **Removing stale entries** — Delete outdated or unwanted entries. Clean up + dangling references. +3. **De-duplicating** — Semantically equivalent entries should be combined. Keep the most informative version. +4. **Organizing** — Restructure for clarity. Update references between files. + +# Restrictions +- Keep GEMINI.md files lean — they are loaded into context every session. +- Keep entries concise. +- Edit surgically — preserve existing structure and user-authored content. +- NEVER write or read any files other than GEMINI.md files. + +# Efficiency & Performance +- **Use as few turns as possible.** Execute independent reads and writes to different files in parallel by calling multiple tools in a single turn. +- **Do not perform any exploration of the codebase.** Try to use the provided file context and only search additional GEMINI.md files as needed to accomplish your task. +- **Be strategic with your thinking.** carefully decide where to route memories and how to de-duplicate memories, but be decisive with simple memory writes. +- **Minimize file system operations.** You should typically only modify the GEMINI.md files that are already provided in your context. Only read or write to other files if explicitly directed or if you are following a specific reference from an existing memory file. +- **Context Awareness.** If a file's content is already provided in the "Initial Context" section, you do not need to call \`read_file\` for it. + +# Insufficient context +If you find that you have insufficient context to read or modify the memories as described, +reply with what you need, and exit. Do not search the codebase for the missing context. +`.trim(); + + return { + kind: 'local', + name: 'save_memory', + displayName: 'Memory Manager', + description: `Writes and reads memory, preferences or facts across ALL future sessions. Use this for recurring instructions like coding styles or tool aliases.`, + inputConfig: { + inputSchema: { + type: 'object', + properties: { + request: { + type: 'string', + description: + 'The memory operation to perform. Examples: "Remember that I prefer tabs over spaces", "Clean up stale memories", "De-duplicate my memories", "Organize my memories".', + }, + }, + required: ['request'], + }, + }, + outputConfig: { + outputName: 'result', + description: 'A summary of the memory operations performed.', + schema: MemoryManagerSchema, + }, + modelConfig: { + model: GEMINI_MODEL_ALIAS_FLASH, + }, + toolConfig: { + tools: [ + READ_FILE_TOOL_NAME, + EDIT_TOOL_NAME, + WRITE_FILE_TOOL_NAME, + LS_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + ASK_USER_TOOL_NAME, + ], + }, + get promptConfig() { + return { + systemPrompt: buildSystemPrompt(), + query: `${getInitialContext()}\${request}`, + }; + }, + runConfig: { + maxTimeMinutes: 5, + maxTurns: 10, + }, + }; +}; diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index 3c681266fa..51d923001a 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -13,6 +13,7 @@ import { CodebaseInvestigatorAgent } from './codebase-investigator.js'; import { CliHelpAgent } from './cli-help-agent.js'; import { GeneralistAgent } from './generalist-agent.js'; import { BrowserAgentDefinition } from './browser/browserAgentDefinition.js'; +import { MemoryManagerAgent } from './memory-manager-agent.js'; import { A2AAuthProviderFactory } from './auth-provider/factory.js'; import type { AuthenticationHandler } from '@a2a-js/sdk/client'; import { type z } from 'zod'; @@ -249,6 +250,24 @@ export class AgentRegistry { if (browserConfig.enabled) { this.registerLocalAgent(BrowserAgentDefinition(this.config)); } + + // Register the memory manager agent as a replacement for the save_memory tool. + if (this.config.isMemoryManagerEnabled()) { + this.registerLocalAgent(MemoryManagerAgent(this.config)); + + // Ensure the global .gemini directory is accessible to tools. + // This allows the save_memory agent to read and write to it. + // Access control is enforced by the Policy Engine (memory-manager.toml). + try { + const globalDir = Storage.getGlobalGeminiDir(); + this.config.getWorkspaceContext().addDirectory(globalDir); + } catch (e) { + debugLogger.warn( + `[AgentRegistry] Could not add global .gemini directory to workspace:`, + e, + ); + } + } } private async refreshAgents(): Promise { diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index eff489dcd6..e1db5c6e8e 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -3104,6 +3104,35 @@ describe('Config JIT Initialization', () => { expect(config.getUserMemory()).toBe('Initial Memory'); }); + describe('isMemoryManagerEnabled', () => { + it('should default to false', () => { + const params: ConfigParameters = { + sessionId: 'test-session', + targetDir: '/tmp/test', + debugMode: false, + model: 'test-model', + cwd: '/tmp/test', + }; + + config = new Config(params); + expect(config.isMemoryManagerEnabled()).toBe(false); + }); + + it('should return true when experimentalMemoryManager is true', () => { + const params: ConfigParameters = { + sessionId: 'test-session', + targetDir: '/tmp/test', + debugMode: false, + model: 'test-model', + cwd: '/tmp/test', + experimentalMemoryManager: true, + }; + + config = new Config(params); + expect(config.isMemoryManagerEnabled()).toBe(true); + }); + }); + describe('reloadSkills', () => { it('should refresh disabledSkills and re-register ActivateSkillTool when skills exist', async () => { const mockOnReload = vi.fn().mockResolvedValue({ diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index aa3e9aa5b6..81bfa82bd3 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -629,6 +629,7 @@ export interface ConfigParameters { disabledSkills?: string[]; adminSkillsEnabled?: boolean; experimentalJitContext?: boolean; + experimentalMemoryManager?: boolean; topicUpdateNarration?: boolean; toolOutputMasking?: Partial; disableLLMCorrection?: boolean; @@ -853,6 +854,7 @@ export class Config implements McpContext, AgentLoopContext { private readonly adminSkillsEnabled: boolean; private readonly experimentalJitContext: boolean; + private readonly experimentalMemoryManager: boolean; private readonly topicUpdateNarration: boolean; private readonly disableLLMCorrection: boolean; private readonly planEnabled: boolean; @@ -1013,6 +1015,7 @@ export class Config implements McpContext, AgentLoopContext { ); this.experimentalJitContext = params.experimentalJitContext ?? true; + this.experimentalMemoryManager = params.experimentalMemoryManager ?? false; this.topicUpdateNarration = params.topicUpdateNarration ?? false; this.modelSteering = params.modelSteering ?? false; this.injectionService = new InjectionService(() => @@ -2157,6 +2160,10 @@ export class Config implements McpContext, AgentLoopContext { return this.experimentalJitContext; } + isMemoryManagerEnabled(): boolean { + return this.experimentalMemoryManager; + } + isTopicUpdateNarrationEnabled(): boolean { return this.topicUpdateNarration; } @@ -3184,9 +3191,11 @@ export class Config implements McpContext, AgentLoopContext { maybeRegister(ShellTool, () => registry.registerTool(new ShellTool(this, this.messageBus)), ); - maybeRegister(MemoryTool, () => - registry.registerTool(new MemoryTool(this.messageBus)), - ); + if (!this.isMemoryManagerEnabled()) { + maybeRegister(MemoryTool, () => + registry.registerTool(new MemoryTool(this.messageBus)), + ); + } maybeRegister(WebSearchTool, () => registry.registerTool(new WebSearchTool(this, this.messageBus)), ); diff --git a/packages/core/src/config/path-validation.test.ts b/packages/core/src/config/path-validation.test.ts new file mode 100644 index 0000000000..742704e394 --- /dev/null +++ b/packages/core/src/config/path-validation.test.ts @@ -0,0 +1,68 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { Config } from './config.js'; +import * as path from 'node:path'; +import * as os from 'node:os'; + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: vi.fn().mockReturnValue(true), + statSync: vi.fn().mockReturnValue({ + isDirectory: vi.fn().mockReturnValue(true), + }), + realpathSync: vi.fn((p) => p), + }; +}); + +vi.mock('../utils/paths.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + resolveToRealPath: vi.fn((p) => p), + isSubpath: (parent: string, child: string) => child.startsWith(parent), + }; +}); + +describe('Config Path Validation', () => { + let config: Config; + const targetDir = '/mock/workspace'; + const globalGeminiDir = path.join(os.homedir(), '.gemini'); + + beforeEach(() => { + config = new Config({ + targetDir, + sessionId: 'test-session', + debugMode: false, + cwd: targetDir, + model: 'test-model', + }); + }); + + it('should allow access to ~/.gemini if it is added to the workspace', () => { + const geminiMdPath = path.join(globalGeminiDir, 'GEMINI.md'); + + // Before adding, it should be denied + expect(config.isPathAllowed(geminiMdPath)).toBe(false); + + // Add to workspace + config.getWorkspaceContext().addDirectory(globalGeminiDir); + + // Now it should be allowed + expect(config.isPathAllowed(geminiMdPath)).toBe(true); + expect(config.validatePathAccess(geminiMdPath, 'read')).toBeNull(); + expect(config.validatePathAccess(geminiMdPath, 'write')).toBeNull(); + }); + + it('should still allow project workspace paths', () => { + const workspacePath = path.join(targetDir, 'src/index.ts'); + expect(config.isPathAllowed(workspacePath)).toBe(true); + expect(config.validatePathAccess(workspacePath, 'read')).toBeNull(); + }); +}); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 51468c9d8d..cf2635562d 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -447,7 +447,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -1148,7 +1148,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -1261,7 +1261,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -1382,7 +1382,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -1508,7 +1508,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -2876,7 +2876,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -3154,7 +3154,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -3268,7 +3268,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -3702,7 +3702,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details @@ -4123,7 +4123,7 @@ IT IS CRITICAL TO FOLLOW THESE GUIDELINES TO AVOID EXCESSIVE TOKEN CONSUMPTION. - **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** To run a command in the background, set the \`is_background\` parameter to true. - **Interactive Commands:** Always prefer non-interactive commands (e.g., using 'run once' or 'CI' flags for test runners to avoid persistent watch modes or 'git --no-pager') unless a persistent process is specifically required; however, some commands are only interactive and expect user input during their execution (e.g. ssh, vim). If you choose to execute an interactive command consider letting the user know they can press \`tab\` to focus into the shell to provide input. -- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" +- **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. ## Interaction Details diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 77c4a5a498..e93eedf055 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -51,7 +51,7 @@ import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js' import * as policyCatalog from '../availability/policyCatalog.js'; import { LlmRole, LoopType } from '../telemetry/types.js'; import { partToString } from '../utils/partUtils.js'; -import { coreEvents } from '../utils/events.js'; +import { coreEvents, CoreEvent } from '../utils/events.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; // Mock fs module to prevent actual file system operations during tests @@ -1997,6 +1997,23 @@ ${JSON.stringify( ); }); + it('should update system instruction when MemoryChanged event is emitted', async () => { + vi.mocked(mockConfig.getSystemInstructionMemory).mockReturnValue( + 'Updated Memory', + ); + + const { getCoreSystemPrompt } = await import('./prompts.js'); + const mockGetCoreSystemPrompt = vi.mocked(getCoreSystemPrompt); + mockGetCoreSystemPrompt.mockClear(); + + coreEvents.emit(CoreEvent.MemoryChanged, { fileCount: 2 }); + + expect(mockGetCoreSystemPrompt).toHaveBeenCalledWith( + mockConfig, + 'Updated Memory', + ); + }); + it('should recursively call sendMessageStream with "Please continue." when InvalidStream event is received for Gemini 2 models', async () => { vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue( true, diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 01577452f4..f357a0decb 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -117,6 +117,7 @@ export class GeminiClient { this.lastPromptId = this.config.getSessionId(); coreEvents.on(CoreEvent.ModelChanged, this.handleModelChanged); + coreEvents.on(CoreEvent.MemoryChanged, this.handleMemoryChanged); } private get config(): Config { @@ -127,6 +128,10 @@ export class GeminiClient { this.currentSequenceModel = null; }; + private handleMemoryChanged = () => { + this.updateSystemInstruction(); + }; + // Hook state to deduplicate BeforeAgent calls and track response for // AfterAgent private hookStateMap = new Map< @@ -306,6 +311,7 @@ export class GeminiClient { dispose() { coreEvents.off(CoreEvent.ModelChanged, this.handleModelChanged); + coreEvents.off(CoreEvent.MemoryChanged, this.handleMemoryChanged); } async resumeChat( diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 82a7943de4..d3f2087018 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -96,6 +96,7 @@ describe('Core System Prompt (prompts.ts)', () => { isInteractive: vi.fn().mockReturnValue(true), isInteractiveShellEnabled: vi.fn().mockReturnValue(true), isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), + isMemoryManagerEnabled: vi.fn().mockReturnValue(false), isAgentsEnabled: vi.fn().mockReturnValue(false), getPreviewFeatures: vi.fn().mockReturnValue(true), getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO), @@ -423,6 +424,7 @@ describe('Core System Prompt (prompts.ts)', () => { isInteractive: vi.fn().mockReturnValue(false), isInteractiveShellEnabled: vi.fn().mockReturnValue(false), isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), + isMemoryManagerEnabled: vi.fn().mockReturnValue(false), isAgentsEnabled: vi.fn().mockReturnValue(false), getModel: vi.fn().mockReturnValue('auto'), getActiveModel: vi.fn().mockReturnValue(PREVIEW_GEMINI_MODEL), diff --git a/packages/core/src/policy/memory-manager-policy.test.ts b/packages/core/src/policy/memory-manager-policy.test.ts new file mode 100644 index 0000000000..5de6586166 --- /dev/null +++ b/packages/core/src/policy/memory-manager-policy.test.ts @@ -0,0 +1,119 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { PolicyEngine } from './policy-engine.js'; +import { loadPoliciesFromToml } from './toml-loader.js'; +import { PolicyDecision, ApprovalMode } from './types.js'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +describe('Memory Manager Policy', () => { + let engine: PolicyEngine; + + beforeEach(async () => { + const policiesDir = path.join(__dirname, 'policies'); + const result = await loadPoliciesFromToml([policiesDir], () => 1); + engine = new PolicyEngine({ + rules: result.rules, + approvalMode: ApprovalMode.DEFAULT, + }); + }); + + it('should allow save_memory to read ~/.gemini/GEMINI.md', async () => { + const toolCall = { + name: 'read_file', + args: { file_path: '~/.gemini/GEMINI.md' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'save_memory', + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should allow save_memory to write ~/.gemini/GEMINI.md', async () => { + const toolCall = { + name: 'write_file', + args: { file_path: '~/.gemini/GEMINI.md', content: 'test' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'save_memory', + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should allow save_memory to list ~/.gemini/', async () => { + const toolCall = { + name: 'list_directory', + args: { dir_path: '~/.gemini/' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'save_memory', + ); + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should fall through to global allow rule for save_memory reading non-.gemini files', async () => { + const toolCall = { + name: 'read_file', + args: { file_path: '/etc/passwd' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'save_memory', + ); + // The memory-manager policy only matches .gemini/ paths. + // Other paths fall through to the global read_file allow rule (priority 50). + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should not match paths where .gemini is a substring (e.g. not.gemini)', async () => { + const toolCall = { + name: 'read_file', + args: { file_path: '/tmp/not.gemini/evil' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'save_memory', + ); + // The tighter argsPattern requires .gemini/ to be preceded by start-of-string + // or a path separator, so "not.gemini/" should NOT match the memory-manager rule. + // It falls through to the global read_file allow rule instead. + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); + + it('should fall through to global allow rule for other agents accessing ~/.gemini/', async () => { + const toolCall = { + name: 'read_file', + args: { file_path: '~/.gemini/GEMINI.md' }, + }; + const result = await engine.check( + toolCall, + undefined, + undefined, + 'other_agent', + ); + // The memory-manager policy rule (priority 100) only applies to 'save_memory'. + // Other agents fall through to the global read_file allow rule (priority 50). + expect(result.decision).toBe(PolicyDecision.ALLOW); + }); +}); diff --git a/packages/core/src/policy/policies/memory-manager.toml b/packages/core/src/policy/policies/memory-manager.toml new file mode 100644 index 0000000000..2055fcdf3a --- /dev/null +++ b/packages/core/src/policy/policies/memory-manager.toml @@ -0,0 +1,10 @@ +# Policy for Memory Manager Agent +# Allows the save_memory agent to manage memories in the ~/.gemini/ folder. + +[[rule]] +subagent = "save_memory" +toolName = ["read_file", "write_file", "replace", "list_directory", "glob", "grep_search"] +decision = "allow" +priority = 100 +argsPattern = "(^|.*/)\\.gemini/.*" +deny_message = "Memory Manager is only allowed to access the .gemini folder." diff --git a/packages/core/src/prompts/promptProvider.test.ts b/packages/core/src/prompts/promptProvider.test.ts index c2253a9b57..700062de50 100644 --- a/packages/core/src/prompts/promptProvider.test.ts +++ b/packages/core/src/prompts/promptProvider.test.ts @@ -61,6 +61,7 @@ describe('PromptProvider', () => { isInteractive: vi.fn().mockReturnValue(true), isInteractiveShellEnabled: vi.fn().mockReturnValue(true), isTopicUpdateNarrationEnabled: vi.fn().mockReturnValue(false), + isMemoryManagerEnabled: vi.fn().mockReturnValue(false), getSkillManager: vi.fn().mockReturnValue({ getSkills: vi.fn().mockReturnValue([]), }), diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index a2e1333895..bd884aeab5 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -192,6 +192,7 @@ export class PromptProvider { interactiveShellEnabled: context.config.isInteractiveShellEnabled(), topicUpdateNarration: context.config.isTopicUpdateNarrationEnabled(), + memoryManagerEnabled: context.config.isMemoryManagerEnabled(), }), ), sandbox: this.withSection('sandbox', () => getSandboxMode()), diff --git a/packages/core/src/prompts/snippets-memory-manager.test.ts b/packages/core/src/prompts/snippets-memory-manager.test.ts new file mode 100644 index 0000000000..070e49f8c0 --- /dev/null +++ b/packages/core/src/prompts/snippets-memory-manager.test.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { renderOperationalGuidelines } from './snippets.js'; + +describe('renderOperationalGuidelines - memoryManagerEnabled', () => { + const baseOptions = { + interactive: true, + interactiveShellEnabled: false, + topicUpdateNarration: false, + memoryManagerEnabled: false, + }; + + it('should include standard memory tool guidance when memoryManagerEnabled is false', () => { + const result = renderOperationalGuidelines(baseOptions); + expect(result).toContain('save_memory'); + expect(result).toContain('persistent user-related information'); + expect(result).not.toContain('subagent'); + }); + + it('should include subagent memory guidance when memoryManagerEnabled is true', () => { + const result = renderOperationalGuidelines({ + ...baseOptions, + memoryManagerEnabled: true, + }); + expect(result).toContain('save_memory'); + expect(result).toContain('subagent'); + expect(result).not.toContain('persistent user-related information'); + }); +}); diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 41e6edc183..19aaf56d78 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -67,6 +67,7 @@ export interface OperationalGuidelinesOptions { isGemini3: boolean; enableShellEfficiency: boolean; interactiveShellEnabled: boolean; + memoryManagerEnabled: boolean; } export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside'; @@ -647,8 +648,12 @@ function toolUsageInteractive( function toolUsageRememberingFacts( options: OperationalGuidelinesOptions, ): string { + if (options.memoryManagerEnabled) { + return ` +- **Memory Tool:** You MUST use the '${MEMORY_TOOL_NAME}' tool to proactively record facts, preferences, and workflows that apply across all sessions. Whenever the user explicitly tells you to "remember" something, or when they state a preference or workflow (like "always lint after editing"), you MUST immediately call the save_memory subagent. Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is strictly for persistent general knowledge.`; + } const base = ` -- **Remembering Facts:** Use the '${MEMORY_TOOL_NAME}' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information.`; +- **Remembering Facts:** Use the '${MEMORY_TOOL_NAME}' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases, or a workflow like "always lint after editing"). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information.`; const suffix = options.interactive ? ' If unsure whether to save something, you can ask the user, "Should I remember that for you?"' : ''; diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 225fa21c4a..d5ff8714b0 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -79,6 +79,7 @@ export interface OperationalGuidelinesOptions { interactive: boolean; interactiveShellEnabled: boolean; topicUpdateNarration: boolean; + memoryManagerEnabled: boolean; } export type SandboxMode = 'macos-seatbelt' | 'generic' | 'outside'; @@ -777,6 +778,10 @@ function toolUsageInteractive( function toolUsageRememberingFacts( options: OperationalGuidelinesOptions, ): string { + if (options.memoryManagerEnabled) { + return ` +- **Memory Tool:** You MUST use ${formatToolName(MEMORY_TOOL_NAME)} to proactively record facts, preferences, and workflows that apply across all sessions. Whenever the user explicitly tells you to "remember" something, or when they state a preference or workflow (like "always lint after editing"), you MUST immediately call the save_memory subagent. Never save transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is strictly for persistent general knowledge.`; + } const base = ` - **Memory Tool:** Use ${formatToolName(MEMORY_TOOL_NAME)} only for global user preferences, personal facts, or high-level information that applies across all sessions. Never save workspace-specific context, local file paths, or transient session state. Do not use memory to store summaries of code changes, bug fixes, or findings discovered during a task; this tool is for persistent user-related information only.`; const suffix = options.interactive diff --git a/packages/core/src/scheduler/scheduler.ts b/packages/core/src/scheduler/scheduler.ts index 4a92617e6d..cc14e3d875 100644 --- a/packages/core/src/scheduler/scheduler.ts +++ b/packages/core/src/scheduler/scheduler.ts @@ -363,6 +363,7 @@ export class Scheduler { callId: request.callId, schedulerId: this.schedulerId, parentCallId: this.parentCallId, + subagent: this.subagent, }, () => { try { @@ -670,6 +671,7 @@ export class Scheduler { callId: activeCall.request.callId, schedulerId: this.schedulerId, parentCallId: this.parentCallId, + subagent: this.subagent, }, () => this.executor.execute({ diff --git a/packages/core/src/utils/toolCallContext.ts b/packages/core/src/utils/toolCallContext.ts index e89d20ddef..23c3bcaa46 100644 --- a/packages/core/src/utils/toolCallContext.ts +++ b/packages/core/src/utils/toolCallContext.ts @@ -16,6 +16,8 @@ export interface ToolCallContext { schedulerId: string; /** The ID of the parent tool call, if this is a nested execution (e.g., in a subagent). */ parentCallId?: string; + /** The name of the subagent executing the tool, if applicable. */ + subagent?: string; } /** diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index f85a39bb35..2b528ad8dc 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -2451,6 +2451,13 @@ }, "additionalProperties": false }, + "memoryManager": { + "title": "Memory Manager Agent", + "description": "Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.", + "markdownDescription": "Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`", + "default": false, + "type": "boolean" + }, "topicUpdateNarration": { "title": "Topic & Update Narration", "description": "Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.",