feat(memory): add Auto Memory inbox flow with canonical-patch contract (#26338)

2026-05-13 21:32:56 -07:00 · 2026-05-04 12:07:13 -07:00
parent 60a6a47d56
commit a7beb890d0
26 changed files with 4279 additions and 115 deletions
@@ -0,0 +1,489 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Live-LLM evals that pin down the auto-memory inbox contract:
+ *   1. Canonical filename — agent uses `.inbox/<kind>/extraction.patch`.
+ *   2. Incremental merge — agent rewrites an existing extraction.patch
+ *      instead of creating new patch files alongside.
+ *   3. Absolute-path pointers — when the agent creates a sibling .md, the
+ *      paired MEMORY.md hunk references it by absolute path.
+ *   4. Project-root protection — agent never writes to
+ *      `<projectRoot>/GEMINI.md` even when content is team-shared.
+ *
+ * Each test seeds session transcripts with strong, consistent signal so the
+ * extraction agent will reasonably produce SOME output (or, in the human-only
+ * test, refrain from producing output that targets forbidden paths). Tests
+ * are USUALLY_PASSES policy because LLM behavior is stochastic; the harness
+ * already retries up to 3 times.
+ */
+
+import fsp from 'node:fs/promises';
+import path from 'node:path';
+import { describe, expect } from 'vitest';
+import {
+  type Config,
+  ApprovalMode,
+  SESSION_FILE_PREFIX,
+  getProjectHash,
+  startMemoryService,
+} from '@google/gemini-cli-core';
+import { componentEvalTest } from './component-test-helper.js';
+
+interface SeedSession {
+  sessionId: string;
+  summary: string;
+  userTurns: string[];
+  /** Minutes ago the session ended (must be ≥ 180 to clear the idle gate). */
+  timestampOffsetMinutes: number;
+}
+
+interface MessageRecord {
+  id: string;
+  timestamp: string;
+  type: string;
+  content: Array<{ text: string }>;
+}
+
+const WORKSPACE_FILES = {
+  'package.json': JSON.stringify(
+    {
+      name: 'auto-memory-contract-eval',
+      private: true,
+      scripts: { build: 'echo build', test: 'echo test' },
+    },
+    null,
+    2,
+  ),
+  'README.md': '# Auto Memory Contract Eval\n\nFixture workspace.\n',
+};
+
+const EXTRACTION_CONFIG_OVERRIDES = {
+  experimentalAutoMemory: true,
+  approvalMode: ApprovalMode.YOLO,
+};
+
+function buildMessages(userTurns: string[]): MessageRecord[] {
+  const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString();
+  return userTurns.flatMap((text, index) => [
+    {
+      id: `u${index + 1}`,
+      timestamp: baseTime,
+      type: 'user',
+      content: [{ text }],
+    },
+    {
+      id: `a${index + 1}`,
+      timestamp: baseTime,
+      type: 'gemini',
+      content: [{ text: 'Acknowledged.' }],
+    },
+  ]);
+}
+
+async function seedSessions(
+  config: Config,
+  sessions: SeedSession[],
+): Promise<void> {
+  const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats');
+  await fsp.mkdir(chatsDir, { recursive: true });
+  const projectRoot = config.storage.getProjectRoot();
+
+  for (const session of sessions) {
+    const sessionTimestamp = new Date(
+      Date.now() - session.timestampOffsetMinutes * 60 * 1000,
+    );
+    const timestamp = sessionTimestamp
+      .toISOString()
+      .slice(0, 16)
+      .replace(/:/g, '-');
+    const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`;
+    const conversation = {
+      sessionId: session.sessionId,
+      projectHash: getProjectHash(projectRoot),
+      summary: session.summary,
+      startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
+      lastUpdated: sessionTimestamp.toISOString(),
+      messages: buildMessages(session.userTurns),
+    };
+    await fsp.writeFile(
+      path.join(chatsDir, filename),
+      JSON.stringify(conversation, null, 2),
+    );
+  }
+}
+
+interface InboxSnapshot {
+  privateFiles: string[];
+  globalFiles: string[];
+  privateContents: Map<string, string>;
+}
+
+async function snapshotInbox(config: Config): Promise<InboxSnapshot> {
+  const memoryDir = config.storage.getProjectMemoryTempDir();
+  const inbox: InboxSnapshot = {
+    privateFiles: [],
+    globalFiles: [],
+    privateContents: new Map(),
+  };
+  for (const kind of ['private', 'global'] as const) {
+    const dir = path.join(memoryDir, '.inbox', kind);
+    let entries: string[];
+    try {
+      entries = await fsp.readdir(dir);
+    } catch {
+      continue;
+    }
+    const patchFiles = entries.filter((f) => f.endsWith('.patch')).sort();
+    if (kind === 'private') {
+      inbox.privateFiles = patchFiles;
+      for (const fileName of patchFiles) {
+        try {
+          inbox.privateContents.set(
+            fileName,
+            await fsp.readFile(path.join(dir, fileName), 'utf-8'),
+          );
+        } catch {
+          // ignore
+        }
+      }
+    } else {
+      inbox.globalFiles = patchFiles;
+    }
+  }
+  return inbox;
+}
+
+describe('Auto Memory Contract', () => {
+  componentEvalTest('USUALLY_PASSES', {
+    suiteName: 'auto-memory-contract',
+    suiteType: 'component-level',
+    name: 'uses canonical extraction.patch filename when writing private memory',
+    files: WORKSPACE_FILES,
+    timeout: 240000,
+    configOverrides: EXTRACTION_CONFIG_OVERRIDES,
+    setup: async (config) => {
+      await seedSessions(config, [
+        {
+          sessionId: 'verify-memory-cmd-1',
+          summary:
+            'Confirm that this project verifies memory edits with `npm run verify:memory`',
+          timestampOffsetMinutes: 420,
+          userTurns: [
+            'For this project, every memory-system change is verified with `npm run verify:memory` before we hand the change back.',
+            'That command is the gate. Without it the change is not considered done.',
+            'It runs typechecks, the related unit tests, and a snapshot diff.',
+            'Future agents working on memory should always run it after editing memoryService or commands/memory.ts.',
+            'This is a durable rule for this project, not a one-off.',
+            'The check is fast, under a minute, and failure means revert.',
+            'Treat it as part of the memory subsystem contract.',
+            'I want this remembered for next time.',
+            'It applies to anything in packages/core/src/services/memoryService.ts and packages/core/src/commands/memory.ts.',
+            'Make sure agents do not skip the verify step.',
+          ],
+        },
+        {
+          sessionId: 'verify-memory-cmd-2',
+          summary: 'Same memory-verify command in another session',
+          timestampOffsetMinutes: 360,
+          userTurns: [
+            'I had to remind the previous agent to run `npm run verify:memory` again.',
+            'It is the durable verification command for memory edits in this repo.',
+            'The agent forgot, even though we agreed last time.',
+            'Please remember it for future memory-related work.',
+            'It is the official verification step for memory changes.',
+            'Run it whenever you touch memoryService.ts or commands/memory.ts.',
+            'No exceptions. The command must finish green.',
+            'This is a recurring rule across multiple sessions now.',
+            'Make this part of your standard workflow for memory work.',
+            'Verified again that the command catches regressions in MEMORY.md handling.',
+          ],
+        },
+      ]);
+    },
+    assert: async (config) => {
+      await startMemoryService(config);
+      const inbox = await snapshotInbox(config);
+
+      // Either the agent extracted nothing (acceptable no-op) OR it extracted
+      // exactly one canonical file per kind. Multiple files per kind violates
+      // the contract.
+      expect(inbox.privateFiles.length).toBeLessThanOrEqual(1);
+      expect(inbox.globalFiles.length).toBeLessThanOrEqual(1);
+
+      // Strong assertion: when the agent DID write a private patch, it must
+      // be the canonical filename.
+      if (inbox.privateFiles.length === 1) {
+        expect(inbox.privateFiles[0]).toBe('extraction.patch');
+      }
+      if (inbox.globalFiles.length === 1) {
+        expect(inbox.globalFiles[0]).toBe('extraction.patch');
+      }
+    },
+  });
+
+  componentEvalTest('USUALLY_PASSES', {
+    suiteName: 'auto-memory-contract',
+    suiteType: 'component-level',
+    name: 'merges new findings into existing extraction.patch instead of creating new files',
+    files: WORKSPACE_FILES,
+    timeout: 240000,
+    configOverrides: EXTRACTION_CONFIG_OVERRIDES,
+    setup: async (config) => {
+      const memoryDir = config.storage.getProjectMemoryTempDir();
+      const inboxPrivate = path.join(memoryDir, '.inbox', 'private');
+      await fsp.mkdir(inboxPrivate, { recursive: true });
+
+      // Pre-existing canonical patch left over from a prior session.
+      const existingMemoryMd = path.join(memoryDir, 'MEMORY.md');
+      const preExistingPatch = [
+        `--- /dev/null`,
+        `+++ ${existingMemoryMd}`,
+        `@@ -0,0 +1,3 @@`,
+        `+# Project Memory`,
+        `+`,
+        `+- This project lints with \`npm run lint\` (recurring rule from session 1).`,
+        ``,
+      ].join('\n');
+      await fsp.writeFile(
+        path.join(inboxPrivate, 'extraction.patch'),
+        preExistingPatch,
+      );
+
+      // New session that surfaces a different durable fact.
+      await seedSessions(config, [
+        {
+          sessionId: 'incremental-typecheck-cmd',
+          summary:
+            'Confirm that typecheck for memory edits uses `npm run typecheck`',
+          timestampOffsetMinutes: 420,
+          userTurns: [
+            'Always run `npm run typecheck` after editing any *.ts file in this repo.',
+            'It is the standard typecheck command for the whole monorepo.',
+            'Future agents should follow this without being reminded.',
+            'It catches type errors before tests, much faster.',
+            'Run it on every TypeScript edit, no exceptions.',
+            'This is durable across the whole project.',
+            'It is the project-wide convention for TS work.',
+            'Make sure to run it after edits to memoryService.ts especially.',
+            'It is fast and catches regressions early.',
+            'Treat it as standard workflow.',
+          ],
+        },
+      ]);
+    },
+    assert: async (config) => {
+      await startMemoryService(config);
+      const inbox = await snapshotInbox(config);
+
+      // Contract: still ONLY ONE file in private inbox, and its name is the
+      // canonical extraction.patch.
+      expect(inbox.privateFiles).toEqual(['extraction.patch']);
+
+      // The single canonical patch must STILL contain the old hunk (the
+      // agent must merge with existing rather than replace blindly), AND
+      // ideally also contain the new typecheck fact.
+      const merged = inbox.privateContents.get('extraction.patch') ?? '';
+      expect(merged).toMatch(/npm run lint/);
+      // Soft assertion: the agent SHOULD have added the new fact too. We
+      // don't fail the test if it didn't (the agent may legitimately decide
+      // the new fact isn't durable enough), but the file must be intact.
+      // The hard assertion (no proliferation + old content preserved) is
+      // what we lock down.
+    },
+  });
+
+  componentEvalTest('USUALLY_PASSES', {
+    suiteName: 'auto-memory-contract',
+    suiteType: 'component-level',
+    name: 'uses absolute paths in MEMORY.md sibling pointer lines',
+    files: WORKSPACE_FILES,
+    timeout: 240000,
+    configOverrides: EXTRACTION_CONFIG_OVERRIDES,
+    setup: async (config) => {
+      // Sessions whose extracted memory has substantial detail — encourages
+      // the agent to spawn a sibling .md file (per prompt guidance).
+      await seedSessions(config, [
+        {
+          sessionId: 'detailed-release-workflow-1',
+          summary: 'Detailed release workflow that runs across multiple steps',
+          timestampOffsetMinutes: 420,
+          userTurns: [
+            'Our release workflow has several distinct phases that future agents need to follow exactly.',
+            'Phase 1 (preflight): run `npm run lint`, `npm run typecheck`, and `npm test` in that order.',
+            'Phase 2 (build): run `npm run build` and verify dist/ outputs against a checksum file.',
+            'Phase 3 (publish): run `npm run publish:dry-run` first, then `npm run publish` if no errors.',
+            'Phase 4 (post): tag the commit with `git tag v$(jq -r .version package.json)` and push.',
+            'There are pitfalls: phase 2 will silently succeed if dist/ is stale, so always check the checksum.',
+            'Phase 3 must NEVER be skipped for hotfixes; the dry-run catches credential issues.',
+            'The checklist is durable across all releases for this repo.',
+            'Future agents should reproduce these phases in order without omitting any.',
+            'This is the canonical release procedure for this project.',
+          ],
+        },
+        {
+          sessionId: 'detailed-release-workflow-2',
+          summary: 'Reusing the same multi-phase release workflow',
+          timestampOffsetMinutes: 360,
+          userTurns: [
+            'I just ran the release workflow again and it caught an issue in phase 2 because the checksum mismatched.',
+            'Confirms the durable rule: always check the dist/ checksum after building.',
+            'The 4-phase release procedure (preflight, build, publish, post) is the recurring workflow.',
+            'I want this captured as durable memory because we use it every release.',
+            'Each phase has multiple sub-steps and pitfalls, so it deserves substantial detail.',
+            'Please remember the phases for future agents.',
+            'The procedure has been the same for the last 6 releases.',
+            'It includes the verify-checksum step that just saved us from a bad publish.',
+            'This is a recurring multi-step workflow, not a one-off.',
+            'Make sure future sessions know about all 4 phases and their pitfalls.',
+          ],
+        },
+      ]);
+    },
+    assert: async (config) => {
+      await startMemoryService(config);
+      const inbox = await snapshotInbox(config);
+      const memoryDir = config.storage.getProjectMemoryTempDir();
+
+      // The agent might choose to add brief facts directly to MEMORY.md
+      // without spawning a sibling. That's a valid outcome; we only enforce
+      // the absolute-path rule WHEN a sibling is created.
+      if (inbox.privateFiles.length === 0) {
+        return; // No-op extraction: nothing to assert.
+      }
+      expect(inbox.privateFiles).toEqual(['extraction.patch']);
+
+      const patch = inbox.privateContents.get('extraction.patch') ?? '';
+
+      // Find any /dev/null sibling-creation hunk that targets <memoryDir>/<x>.md
+      // (where x != MEMORY).
+      const siblingPattern = new RegExp(
+        `\\+\\+\\+ ${memoryDir.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&')}/([^\\s/]+)\\.md`,
+        'g',
+      );
+      const siblingTargets: string[] = [];
+      let match: RegExpExecArray | null;
+      while ((match = siblingPattern.exec(patch)) !== null) {
+        const name = match[1];
+        // Skip MEMORY.md updates (those aren't siblings).
+        if (name.toLowerCase() !== 'memory') {
+          siblingTargets.push(`${name}.md`);
+        }
+      }
+
+      if (siblingTargets.length === 0) {
+        return; // No sibling creations; nothing more to check.
+      }
+
+      // For each created sibling, the patch must contain a MEMORY.md
+      // pointer line that uses the ABSOLUTE path. Bare basename references
+      // are the bug we're guarding against.
+      for (const sibling of siblingTargets) {
+        const absolutePath = path.join(memoryDir, sibling);
+        // Look for an added line referencing the sibling.
+        const addedLines = patch
+          .split('\n')
+          .filter((line) => line.startsWith('+'));
+        const referencingLines = addedLines.filter((line) =>
+          line.includes(sibling),
+        );
+        expect(
+          referencingLines.length,
+          `Expected a MEMORY.md pointer for ${sibling} (auto-bundle would also add one).`,
+        ).toBeGreaterThan(0);
+        const allAbsolute = referencingLines.every((line) =>
+          line.includes(absolutePath),
+        );
+        expect(
+          allAbsolute,
+          `Pointer for ${sibling} must use absolute path. Saw: ${referencingLines.join(' | ')}`,
+        ).toBe(true);
+      }
+    },
+  });
+
+  componentEvalTest('USUALLY_PASSES', {
+    suiteName: 'auto-memory-contract',
+    suiteType: 'component-level',
+    name: 'never writes to <projectRoot>/GEMINI.md even for team-shared facts',
+    files: WORKSPACE_FILES,
+    timeout: 240000,
+    configOverrides: EXTRACTION_CONFIG_OVERRIDES,
+    setup: async (config) => {
+      // Sessions that talk about TEAM CONVENTIONS — the kind of content that
+      // would be a perfect fit for <projectRoot>/GEMINI.md, but the prompt
+      // forbids the extraction agent from touching it.
+      await seedSessions(config, [
+        {
+          sessionId: 'team-convention-pnpm-1',
+          summary: 'Team convention: always use pnpm not npm for installs',
+          timestampOffsetMinutes: 420,
+          userTurns: [
+            'Important team-wide convention for this repo: always use pnpm for installs, never npm.',
+            'This is a shared rule across all engineers on the project.',
+            'It applies to every package install, every clean, every dependency add.',
+            'The rationale is workspace hoisting; npm would break the monorepo layout.',
+            'This is a durable team rule, committed to the repo conventions.',
+            'Future agents working in this repo should ALWAYS use pnpm.',
+            'It is the standard team practice, no exceptions.',
+            'Document it as part of the project conventions.',
+            'Treat it as a hard rule for the team.',
+            'I want this captured for future sessions.',
+          ],
+        },
+        {
+          sessionId: 'team-convention-pnpm-2',
+          summary: 'Reaffirming the pnpm-only team rule in another session',
+          timestampOffsetMinutes: 360,
+          userTurns: [
+            'Reminder again: this team uses pnpm exclusively, never npm.',
+            'Another agent tried npm install and broke the lockfile.',
+            'The team rule is clear: pnpm only for any install operation.',
+            'It is part of our shared conventions for this codebase.',
+            'Make sure future agents follow this team-wide rule.',
+            'It applies to all engineers, all CI runs, all dev environments.',
+            'The convention is durable and well-established for this repo.',
+            'Agents should read this rule from project conventions before installing.',
+            'No future agent should ever invoke `npm install` in this repo.',
+            'Always pnpm. Always.',
+          ],
+        },
+      ]);
+    },
+    assert: async (config) => {
+      await startMemoryService(config);
+      const inbox = await snapshotInbox(config);
+      const projectRoot = config.storage.getProjectRoot();
+
+      // No private patch should target <projectRoot>/GEMINI.md or any
+      // subdirectory GEMINI.md.
+      const projectRootRegex = new RegExp(
+        `\\+\\+\\+ ${projectRoot.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&')}.*GEMINI\\.md`,
+      );
+      for (const [name, content] of inbox.privateContents) {
+        expect(
+          projectRootRegex.test(content),
+          `Private patch "${name}" must not target a GEMINI.md under <projectRoot>. Content:\n${content}`,
+        ).toBe(false);
+      }
+
+      // Verify on disk: <projectRoot>/GEMINI.md was not created or modified
+      // by the extraction agent (snapshot rollback should also enforce this,
+      // but we double-check from the post-run state).
+      const projectGemini = path.join(projectRoot, 'GEMINI.md');
+      const exists = await fsp
+        .access(projectGemini)
+        .then(() => true)
+        .catch(() => false);
+      // The seeded workspace's WORKSPACE_FILES doesn't include GEMINI.md, so
+      // it must NOT exist after the run.
+      expect(
+        exists,
+        `<projectRoot>/GEMINI.md (${projectGemini}) must not be created by the extraction agent.`,
+      ).toBe(false);
+    },
+  });
+});
@@ -0,0 +1,447 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { afterEach, beforeEach, describe, expect, vi } from 'vitest';
+import { runEval } from './test-helper.js';
+import { SESSION_FILE_PREFIX } from '../packages/core/src/services/chatRecordingService.js';
+
+const evalState = vi.hoisted(() => ({
+  sessionFilePath: '',
+  debugLines: [] as string[],
+}));
+
+const mocks = vi.hoisted(() => ({
+  localAgentCreate: vi.fn(),
+}));
+
+vi.mock('../packages/core/src/agents/local-executor.js', () => ({
+  LocalAgentExecutor: {
+    create: mocks.localAgentCreate,
+  },
+}));
+
+vi.mock('../packages/core/src/agents/local-executor.ts', () => ({
+  LocalAgentExecutor: {
+    create: mocks.localAgentCreate,
+  },
+}));
+
+vi.mock('../packages/core/src/agents/local-executor', () => ({
+  LocalAgentExecutor: {
+    create: mocks.localAgentCreate,
+  },
+}));
+
+vi.mock('../packages/core/src/services/executionLifecycleService.js', () => ({
+  ExecutionLifecycleService: {
+    createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
+    completeExecution: vi.fn(),
+  },
+}));
+
+vi.mock('../packages/core/src/services/executionLifecycleService.ts', () => ({
+  ExecutionLifecycleService: {
+    createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
+    completeExecution: vi.fn(),
+  },
+}));
+
+vi.mock('../packages/core/src/services/executionLifecycleService', () => ({
+  ExecutionLifecycleService: {
+    createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
+    completeExecution: vi.fn(),
+  },
+}));
+
+vi.mock('../packages/core/src/utils/debugLogger.js', () => ({
+  debugLogger: {
+    debug: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    log: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    warn: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    error: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+  },
+}));
+
+vi.mock('../packages/core/src/utils/debugLogger.ts', () => ({
+  debugLogger: {
+    debug: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    log: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    warn: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    error: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+  },
+}));
+
+vi.mock('../packages/core/src/utils/debugLogger', () => ({
+  debugLogger: {
+    debug: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    log: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    warn: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+    error: (...args: unknown[]) =>
+      evalState.debugLines.push(args.map(String).join(' ')),
+  },
+}));
+
+interface MockMemoryConfig {
+  storage: {
+    getProjectMemoryDir: () => string;
+    getProjectMemoryTempDir: () => string;
+    getProjectSkillsMemoryDir: () => string;
+    getProjectTempDir: () => string;
+    getProjectRoot: () => string;
+  };
+  getTargetDir: () => string;
+  getToolRegistry: () => unknown;
+  getGeminiClient: () => unknown;
+  getSkillManager: () => { getSkills: () => unknown[] };
+  isAutoMemoryEnabled: () => boolean;
+  modelConfigService: {
+    registerRuntimeModelConfig: ReturnType<typeof vi.fn>;
+  };
+  sandboxManager: undefined;
+}
+
+interface Fixture {
+  rootDir: string;
+  homeDir: string;
+  targetDir: string;
+  projectTempDir: string;
+  memoryDir: string;
+  skillsDir: string;
+  config: MockMemoryConfig;
+}
+
+interface AutoMemoryRunSnapshot {
+  sessionIds?: string[];
+  memoryCandidatesCreated?: string[];
+  memoryFilesUpdated?: string[];
+  skillsCreated?: string[];
+}
+
+const fixtures: Fixture[] = [];
+
+beforeEach(() => {
+  vi.resetModules();
+  evalState.debugLines = [];
+  evalState.sessionFilePath = '';
+  mocks.localAgentCreate.mockReset();
+  mocks.localAgentCreate.mockImplementation(
+    async (_agent, context, onActivity) => ({
+      run: vi.fn().mockImplementation(async () => {
+        if (evalState.sessionFilePath) {
+          const callId = `read-inbox-routing`;
+          onActivity({
+            isSubagentActivityEvent: true,
+            agentName: 'auto-memory-eval',
+            type: 'TOOL_CALL_START',
+            data: {
+              name: 'read_file',
+              callId,
+              args: { file_path: evalState.sessionFilePath },
+            },
+          });
+          onActivity({
+            isSubagentActivityEvent: true,
+            agentName: 'auto-memory-eval',
+            type: 'TOOL_CALL_END',
+            data: { id: callId, data: { isError: false } },
+          });
+        }
+
+        const config = context.config as MockMemoryConfig;
+        const memoryDir = config.storage.getProjectMemoryTempDir();
+        const inboxDir = path.join(memoryDir, '.inbox');
+
+        const homeDir = process.env['GEMINI_CLI_HOME'] ?? os.homedir();
+        const globalGeminiDir = path.join(homeDir, '.gemini');
+
+        await fs.mkdir(path.join(inboxDir, 'private'), { recursive: true });
+        await fs.mkdir(path.join(inboxDir, 'global'), { recursive: true });
+
+        const privateTarget = path.join(memoryDir, 'verify-memory.md');
+        await fs.writeFile(
+          path.join(inboxDir, 'private', 'verify-memory.patch'),
+          [
+            `--- /dev/null`,
+            `+++ ${privateTarget}`,
+            `@@ -0,0 +1,3 @@`,
+            `+# Project Memory Candidate`,
+            `+`,
+            `+Future agents should remember that this project verifies memory changes with \`npm run verify:memory\`.`,
+            ``,
+          ].join('\n'),
+        );
+
+        const globalTarget = path.join(globalGeminiDir, 'GEMINI.md');
+        await fs.writeFile(
+          path.join(inboxDir, 'global', 'reply-style.patch'),
+          [
+            `--- /dev/null`,
+            `+++ ${globalTarget}`,
+            `@@ -0,0 +1,1 @@`,
+            `+User prefers concise Chinese architecture plans.`,
+            ``,
+          ].join('\n'),
+        );
+
+        return {
+          turn_count: 3,
+          duration_ms: 25,
+          terminate_reason: 'GOAL',
+        };
+      }),
+    }),
+  );
+});
+
+afterEach(async () => {
+  vi.unstubAllEnvs();
+  while (fixtures.length > 0) {
+    const fixture = fixtures.pop();
+    if (fixture) {
+      await fs.rm(fixture.rootDir, { recursive: true, force: true });
+    }
+  }
+});
+
+function autoMemoryEval(name: string, fn: () => Promise<void>): void {
+  runEval(
+    'USUALLY_PASSES',
+    {
+      suiteName: 'auto-memory-modes',
+      suiteType: 'component-level',
+      name,
+      timeout: 30000,
+    },
+    fn,
+    40000,
+  );
+}
+
+async function createFixture(): Promise<Fixture> {
+  const rootDir = await fs.mkdtemp(
+    path.join(os.tmpdir(), 'gemini-auto-memory-eval-'),
+  );
+  const homeDir = path.join(rootDir, 'home');
+  const targetDir = path.join(rootDir, 'workspace');
+  const projectTempDir = path.join(rootDir, 'project-temp');
+  const memoryDir = path.join(projectTempDir, 'memory');
+  const skillsDir = path.join(memoryDir, 'skills');
+
+  await fs.mkdir(homeDir, { recursive: true });
+  await fs.mkdir(targetDir, { recursive: true });
+  await fs.mkdir(path.join(projectTempDir, 'chats'), { recursive: true });
+  vi.stubEnv('GEMINI_CLI_HOME', homeDir);
+
+  const config: MockMemoryConfig = {
+    storage: {
+      getProjectMemoryDir: () => memoryDir,
+      getProjectMemoryTempDir: () => memoryDir,
+      getProjectSkillsMemoryDir: () => skillsDir,
+      getProjectTempDir: () => projectTempDir,
+      getProjectRoot: () => targetDir,
+    },
+    getTargetDir: () => targetDir,
+    getToolRegistry: () => ({}),
+    getGeminiClient: () => ({}),
+    getSkillManager: () => ({ getSkills: () => [] }),
+    isAutoMemoryEnabled: () => true,
+    modelConfigService: {
+      registerRuntimeModelConfig: vi.fn(),
+    },
+    sandboxManager: undefined,
+  };
+
+  const fixture = {
+    rootDir,
+    homeDir,
+    targetDir,
+    projectTempDir,
+    memoryDir,
+    skillsDir,
+    config,
+  };
+  fixtures.push(fixture);
+  return fixture;
+}
+
+async function seedSession(
+  fixture: Fixture,
+  sessionId: string,
+): Promise<string> {
+  const sessionFilePath = path.join(
+    fixture.projectTempDir,
+    'chats',
+    `${SESSION_FILE_PREFIX}2026-04-20T10-00-${sessionId}.json`,
+  );
+  const oldTimestamp = new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString();
+  const messages = Array.from({ length: 20 }, (_, index) => ({
+    id: `m${index + 1}`,
+    timestamp: oldTimestamp,
+    type: index % 2 === 0 ? 'user' : 'gemini',
+    content: [
+      {
+        text:
+          index % 2 === 0
+            ? 'For this project, durable memory changes are verified with `npm run verify:memory`.'
+            : 'Acknowledged.',
+      },
+    ],
+  }));
+
+  await fs.writeFile(
+    sessionFilePath,
+    [
+      {
+        sessionId,
+        projectHash: 'auto-memory-eval',
+        summary: 'Capture durable auto memory routing behavior',
+        startTime: oldTimestamp,
+        lastUpdated: oldTimestamp,
+        kind: 'main',
+      },
+      ...messages,
+    ]
+      .map((record) => JSON.stringify(record))
+      .join('\n') + '\n',
+  );
+
+  return sessionFilePath;
+}
+
+async function expectSeedSessionEligible(
+  fixture: Fixture,
+  sessionId: string,
+): Promise<void> {
+  const { buildSessionIndex } = await import(
+    '../packages/core/src/services/memoryService.js'
+  );
+  const { newSessionIds } = await buildSessionIndex(
+    path.join(fixture.projectTempDir, 'chats'),
+    { runs: [] },
+  );
+  expect(newSessionIds).toContain(sessionId);
+}
+
+async function readRun(fixture: Fixture): Promise<AutoMemoryRunSnapshot> {
+  const statePath = path.join(fixture.memoryDir, '.extraction-state.json');
+  let raw: string;
+  try {
+    raw = await fs.readFile(statePath, 'utf-8');
+  } catch (error) {
+    let memoryEntries = '(memory dir missing)';
+    try {
+      memoryEntries = (await fs.readdir(fixture.memoryDir, { recursive: true }))
+        .map(String)
+        .join('\n');
+    } catch {
+      // Leave default diagnostic.
+    }
+    throw new Error(
+      [
+        `Expected extraction state at ${statePath}.`,
+        `LocalAgentExecutor.create calls: ${mocks.localAgentCreate.mock.calls.length}`,
+        `Memory dir entries:\n${memoryEntries}`,
+        `Debug log:\n${evalState.debugLines.join('\n')}`,
+      ].join('\n'),
+      { cause: error },
+    );
+  }
+  const state = JSON.parse(raw) as {
+    runs?: AutoMemoryRunSnapshot[];
+  };
+  const run = state.runs?.at(-1);
+  if (!run) {
+    throw new Error('Expected an auto memory extraction run to be recorded');
+  }
+  return run;
+}
+
+async function fileExists(filePath: string): Promise<boolean> {
+  try {
+    await fs.access(filePath);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+describe('Auto Memory inbox routing', () => {
+  autoMemoryEval(
+    'every memory patch lands in .inbox/<kind>/ for review and active files stay untouched',
+    async () => {
+      const { startMemoryService } = await import(
+        '../packages/core/src/services/memoryService.js'
+      );
+      const fixture = await createFixture();
+      evalState.sessionFilePath = await seedSession(
+        fixture,
+        'inbox-routing-session',
+      );
+      await expectSeedSessionEligible(fixture, 'inbox-routing-session');
+
+      await startMemoryService(fixture.config as never);
+
+      const privatePatchPath = path.join(
+        fixture.memoryDir,
+        '.inbox',
+        'private',
+        'verify-memory.patch',
+      );
+      const globalPatchPath = path.join(
+        fixture.memoryDir,
+        '.inbox',
+        'global',
+        'reply-style.patch',
+      );
+
+      const activePrivateMemoryPath = path.join(
+        fixture.memoryDir,
+        'verify-memory.md',
+      );
+      const activeGlobalMemoryPath = path.join(
+        fixture.homeDir,
+        '.gemini',
+        'GEMINI.md',
+      );
+      const run = await readRun(fixture);
+
+      // Both patches were written to the inbox.
+      await expect(fs.readFile(privatePatchPath, 'utf-8')).resolves.toContain(
+        'npm run verify:memory',
+      );
+      await expect(fs.readFile(globalPatchPath, 'utf-8')).resolves.toContain(
+        'concise Chinese architecture plans',
+      );
+
+      // No active file was touched — every patch must be reviewed manually.
+      expect(await fileExists(activePrivateMemoryPath)).toBe(false);
+      expect(await fileExists(activeGlobalMemoryPath)).toBe(false);
+
+      // Run state records both patches as candidates and zero applied files.
+      expect(run.memoryFilesUpdated ?? []).toEqual([]);
+      expect(run.memoryCandidatesCreated ?? []).toEqual(
+        expect.arrayContaining([
+          path.relative(fixture.memoryDir, privatePatchPath),
+          path.relative(fixture.memoryDir, globalPatchPath),
+        ]),
+      );
+    },
+  );
+});