feat(memory): add Auto Memory inbox flow with canonical-patch contract (#26338)

This commit is contained in:
Sandy Tao
2026-05-04 12:07:13 -07:00
committed by GitHub
parent 60a6a47d56
commit a7beb890d0
26 changed files with 4279 additions and 115 deletions
+489
View File
@@ -0,0 +1,489 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Live-LLM evals that pin down the auto-memory inbox contract:
* 1. Canonical filename — agent uses `.inbox/<kind>/extraction.patch`.
* 2. Incremental merge — agent rewrites an existing extraction.patch
* instead of creating new patch files alongside.
* 3. Absolute-path pointers — when the agent creates a sibling .md, the
* paired MEMORY.md hunk references it by absolute path.
* 4. Project-root protection — agent never writes to
* `<projectRoot>/GEMINI.md` even when content is team-shared.
*
* Each test seeds session transcripts with strong, consistent signal so the
* extraction agent will reasonably produce SOME output (or, in the human-only
* test, refrain from producing output that targets forbidden paths). Tests
* are USUALLY_PASSES policy because LLM behavior is stochastic; the harness
* already retries up to 3 times.
*/
import fsp from 'node:fs/promises';
import path from 'node:path';
import { describe, expect } from 'vitest';
import {
type Config,
ApprovalMode,
SESSION_FILE_PREFIX,
getProjectHash,
startMemoryService,
} from '@google/gemini-cli-core';
import { componentEvalTest } from './component-test-helper.js';
interface SeedSession {
sessionId: string;
summary: string;
userTurns: string[];
/** Minutes ago the session ended (must be ≥ 180 to clear the idle gate). */
timestampOffsetMinutes: number;
}
interface MessageRecord {
id: string;
timestamp: string;
type: string;
content: Array<{ text: string }>;
}
const WORKSPACE_FILES = {
'package.json': JSON.stringify(
{
name: 'auto-memory-contract-eval',
private: true,
scripts: { build: 'echo build', test: 'echo test' },
},
null,
2,
),
'README.md': '# Auto Memory Contract Eval\n\nFixture workspace.\n',
};
const EXTRACTION_CONFIG_OVERRIDES = {
experimentalAutoMemory: true,
approvalMode: ApprovalMode.YOLO,
};
function buildMessages(userTurns: string[]): MessageRecord[] {
const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString();
return userTurns.flatMap((text, index) => [
{
id: `u${index + 1}`,
timestamp: baseTime,
type: 'user',
content: [{ text }],
},
{
id: `a${index + 1}`,
timestamp: baseTime,
type: 'gemini',
content: [{ text: 'Acknowledged.' }],
},
]);
}
async function seedSessions(
config: Config,
sessions: SeedSession[],
): Promise<void> {
const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats');
await fsp.mkdir(chatsDir, { recursive: true });
const projectRoot = config.storage.getProjectRoot();
for (const session of sessions) {
const sessionTimestamp = new Date(
Date.now() - session.timestampOffsetMinutes * 60 * 1000,
);
const timestamp = sessionTimestamp
.toISOString()
.slice(0, 16)
.replace(/:/g, '-');
const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`;
const conversation = {
sessionId: session.sessionId,
projectHash: getProjectHash(projectRoot),
summary: session.summary,
startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
lastUpdated: sessionTimestamp.toISOString(),
messages: buildMessages(session.userTurns),
};
await fsp.writeFile(
path.join(chatsDir, filename),
JSON.stringify(conversation, null, 2),
);
}
}
interface InboxSnapshot {
privateFiles: string[];
globalFiles: string[];
privateContents: Map<string, string>;
}
async function snapshotInbox(config: Config): Promise<InboxSnapshot> {
const memoryDir = config.storage.getProjectMemoryTempDir();
const inbox: InboxSnapshot = {
privateFiles: [],
globalFiles: [],
privateContents: new Map(),
};
for (const kind of ['private', 'global'] as const) {
const dir = path.join(memoryDir, '.inbox', kind);
let entries: string[];
try {
entries = await fsp.readdir(dir);
} catch {
continue;
}
const patchFiles = entries.filter((f) => f.endsWith('.patch')).sort();
if (kind === 'private') {
inbox.privateFiles = patchFiles;
for (const fileName of patchFiles) {
try {
inbox.privateContents.set(
fileName,
await fsp.readFile(path.join(dir, fileName), 'utf-8'),
);
} catch {
// ignore
}
}
} else {
inbox.globalFiles = patchFiles;
}
}
return inbox;
}
describe('Auto Memory Contract', () => {
componentEvalTest('USUALLY_PASSES', {
suiteName: 'auto-memory-contract',
suiteType: 'component-level',
name: 'uses canonical extraction.patch filename when writing private memory',
files: WORKSPACE_FILES,
timeout: 240000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await seedSessions(config, [
{
sessionId: 'verify-memory-cmd-1',
summary:
'Confirm that this project verifies memory edits with `npm run verify:memory`',
timestampOffsetMinutes: 420,
userTurns: [
'For this project, every memory-system change is verified with `npm run verify:memory` before we hand the change back.',
'That command is the gate. Without it the change is not considered done.',
'It runs typechecks, the related unit tests, and a snapshot diff.',
'Future agents working on memory should always run it after editing memoryService or commands/memory.ts.',
'This is a durable rule for this project, not a one-off.',
'The check is fast, under a minute, and failure means revert.',
'Treat it as part of the memory subsystem contract.',
'I want this remembered for next time.',
'It applies to anything in packages/core/src/services/memoryService.ts and packages/core/src/commands/memory.ts.',
'Make sure agents do not skip the verify step.',
],
},
{
sessionId: 'verify-memory-cmd-2',
summary: 'Same memory-verify command in another session',
timestampOffsetMinutes: 360,
userTurns: [
'I had to remind the previous agent to run `npm run verify:memory` again.',
'It is the durable verification command for memory edits in this repo.',
'The agent forgot, even though we agreed last time.',
'Please remember it for future memory-related work.',
'It is the official verification step for memory changes.',
'Run it whenever you touch memoryService.ts or commands/memory.ts.',
'No exceptions. The command must finish green.',
'This is a recurring rule across multiple sessions now.',
'Make this part of your standard workflow for memory work.',
'Verified again that the command catches regressions in MEMORY.md handling.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inbox = await snapshotInbox(config);
// Either the agent extracted nothing (acceptable no-op) OR it extracted
// exactly one canonical file per kind. Multiple files per kind violates
// the contract.
expect(inbox.privateFiles.length).toBeLessThanOrEqual(1);
expect(inbox.globalFiles.length).toBeLessThanOrEqual(1);
// Strong assertion: when the agent DID write a private patch, it must
// be the canonical filename.
if (inbox.privateFiles.length === 1) {
expect(inbox.privateFiles[0]).toBe('extraction.patch');
}
if (inbox.globalFiles.length === 1) {
expect(inbox.globalFiles[0]).toBe('extraction.patch');
}
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'auto-memory-contract',
suiteType: 'component-level',
name: 'merges new findings into existing extraction.patch instead of creating new files',
files: WORKSPACE_FILES,
timeout: 240000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
const memoryDir = config.storage.getProjectMemoryTempDir();
const inboxPrivate = path.join(memoryDir, '.inbox', 'private');
await fsp.mkdir(inboxPrivate, { recursive: true });
// Pre-existing canonical patch left over from a prior session.
const existingMemoryMd = path.join(memoryDir, 'MEMORY.md');
const preExistingPatch = [
`--- /dev/null`,
`+++ ${existingMemoryMd}`,
`@@ -0,0 +1,3 @@`,
`+# Project Memory`,
`+`,
`+- This project lints with \`npm run lint\` (recurring rule from session 1).`,
``,
].join('\n');
await fsp.writeFile(
path.join(inboxPrivate, 'extraction.patch'),
preExistingPatch,
);
// New session that surfaces a different durable fact.
await seedSessions(config, [
{
sessionId: 'incremental-typecheck-cmd',
summary:
'Confirm that typecheck for memory edits uses `npm run typecheck`',
timestampOffsetMinutes: 420,
userTurns: [
'Always run `npm run typecheck` after editing any *.ts file in this repo.',
'It is the standard typecheck command for the whole monorepo.',
'Future agents should follow this without being reminded.',
'It catches type errors before tests, much faster.',
'Run it on every TypeScript edit, no exceptions.',
'This is durable across the whole project.',
'It is the project-wide convention for TS work.',
'Make sure to run it after edits to memoryService.ts especially.',
'It is fast and catches regressions early.',
'Treat it as standard workflow.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inbox = await snapshotInbox(config);
// Contract: still ONLY ONE file in private inbox, and its name is the
// canonical extraction.patch.
expect(inbox.privateFiles).toEqual(['extraction.patch']);
// The single canonical patch must STILL contain the old hunk (the
// agent must merge with existing rather than replace blindly), AND
// ideally also contain the new typecheck fact.
const merged = inbox.privateContents.get('extraction.patch') ?? '';
expect(merged).toMatch(/npm run lint/);
// Soft assertion: the agent SHOULD have added the new fact too. We
// don't fail the test if it didn't (the agent may legitimately decide
// the new fact isn't durable enough), but the file must be intact.
// The hard assertion (no proliferation + old content preserved) is
// what we lock down.
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'auto-memory-contract',
suiteType: 'component-level',
name: 'uses absolute paths in MEMORY.md sibling pointer lines',
files: WORKSPACE_FILES,
timeout: 240000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
// Sessions whose extracted memory has substantial detail — encourages
// the agent to spawn a sibling .md file (per prompt guidance).
await seedSessions(config, [
{
sessionId: 'detailed-release-workflow-1',
summary: 'Detailed release workflow that runs across multiple steps',
timestampOffsetMinutes: 420,
userTurns: [
'Our release workflow has several distinct phases that future agents need to follow exactly.',
'Phase 1 (preflight): run `npm run lint`, `npm run typecheck`, and `npm test` in that order.',
'Phase 2 (build): run `npm run build` and verify dist/ outputs against a checksum file.',
'Phase 3 (publish): run `npm run publish:dry-run` first, then `npm run publish` if no errors.',
'Phase 4 (post): tag the commit with `git tag v$(jq -r .version package.json)` and push.',
'There are pitfalls: phase 2 will silently succeed if dist/ is stale, so always check the checksum.',
'Phase 3 must NEVER be skipped for hotfixes; the dry-run catches credential issues.',
'The checklist is durable across all releases for this repo.',
'Future agents should reproduce these phases in order without omitting any.',
'This is the canonical release procedure for this project.',
],
},
{
sessionId: 'detailed-release-workflow-2',
summary: 'Reusing the same multi-phase release workflow',
timestampOffsetMinutes: 360,
userTurns: [
'I just ran the release workflow again and it caught an issue in phase 2 because the checksum mismatched.',
'Confirms the durable rule: always check the dist/ checksum after building.',
'The 4-phase release procedure (preflight, build, publish, post) is the recurring workflow.',
'I want this captured as durable memory because we use it every release.',
'Each phase has multiple sub-steps and pitfalls, so it deserves substantial detail.',
'Please remember the phases for future agents.',
'The procedure has been the same for the last 6 releases.',
'It includes the verify-checksum step that just saved us from a bad publish.',
'This is a recurring multi-step workflow, not a one-off.',
'Make sure future sessions know about all 4 phases and their pitfalls.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inbox = await snapshotInbox(config);
const memoryDir = config.storage.getProjectMemoryTempDir();
// The agent might choose to add brief facts directly to MEMORY.md
// without spawning a sibling. That's a valid outcome; we only enforce
// the absolute-path rule WHEN a sibling is created.
if (inbox.privateFiles.length === 0) {
return; // No-op extraction: nothing to assert.
}
expect(inbox.privateFiles).toEqual(['extraction.patch']);
const patch = inbox.privateContents.get('extraction.patch') ?? '';
// Find any /dev/null sibling-creation hunk that targets <memoryDir>/<x>.md
// (where x != MEMORY).
const siblingPattern = new RegExp(
`\\+\\+\\+ ${memoryDir.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&')}/([^\\s/]+)\\.md`,
'g',
);
const siblingTargets: string[] = [];
let match: RegExpExecArray | null;
while ((match = siblingPattern.exec(patch)) !== null) {
const name = match[1];
// Skip MEMORY.md updates (those aren't siblings).
if (name.toLowerCase() !== 'memory') {
siblingTargets.push(`${name}.md`);
}
}
if (siblingTargets.length === 0) {
return; // No sibling creations; nothing more to check.
}
// For each created sibling, the patch must contain a MEMORY.md
// pointer line that uses the ABSOLUTE path. Bare basename references
// are the bug we're guarding against.
for (const sibling of siblingTargets) {
const absolutePath = path.join(memoryDir, sibling);
// Look for an added line referencing the sibling.
const addedLines = patch
.split('\n')
.filter((line) => line.startsWith('+'));
const referencingLines = addedLines.filter((line) =>
line.includes(sibling),
);
expect(
referencingLines.length,
`Expected a MEMORY.md pointer for ${sibling} (auto-bundle would also add one).`,
).toBeGreaterThan(0);
const allAbsolute = referencingLines.every((line) =>
line.includes(absolutePath),
);
expect(
allAbsolute,
`Pointer for ${sibling} must use absolute path. Saw: ${referencingLines.join(' | ')}`,
).toBe(true);
}
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'auto-memory-contract',
suiteType: 'component-level',
name: 'never writes to <projectRoot>/GEMINI.md even for team-shared facts',
files: WORKSPACE_FILES,
timeout: 240000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
// Sessions that talk about TEAM CONVENTIONS — the kind of content that
// would be a perfect fit for <projectRoot>/GEMINI.md, but the prompt
// forbids the extraction agent from touching it.
await seedSessions(config, [
{
sessionId: 'team-convention-pnpm-1',
summary: 'Team convention: always use pnpm not npm for installs',
timestampOffsetMinutes: 420,
userTurns: [
'Important team-wide convention for this repo: always use pnpm for installs, never npm.',
'This is a shared rule across all engineers on the project.',
'It applies to every package install, every clean, every dependency add.',
'The rationale is workspace hoisting; npm would break the monorepo layout.',
'This is a durable team rule, committed to the repo conventions.',
'Future agents working in this repo should ALWAYS use pnpm.',
'It is the standard team practice, no exceptions.',
'Document it as part of the project conventions.',
'Treat it as a hard rule for the team.',
'I want this captured for future sessions.',
],
},
{
sessionId: 'team-convention-pnpm-2',
summary: 'Reaffirming the pnpm-only team rule in another session',
timestampOffsetMinutes: 360,
userTurns: [
'Reminder again: this team uses pnpm exclusively, never npm.',
'Another agent tried npm install and broke the lockfile.',
'The team rule is clear: pnpm only for any install operation.',
'It is part of our shared conventions for this codebase.',
'Make sure future agents follow this team-wide rule.',
'It applies to all engineers, all CI runs, all dev environments.',
'The convention is durable and well-established for this repo.',
'Agents should read this rule from project conventions before installing.',
'No future agent should ever invoke `npm install` in this repo.',
'Always pnpm. Always.',
],
},
]);
},
assert: async (config) => {
await startMemoryService(config);
const inbox = await snapshotInbox(config);
const projectRoot = config.storage.getProjectRoot();
// No private patch should target <projectRoot>/GEMINI.md or any
// subdirectory GEMINI.md.
const projectRootRegex = new RegExp(
`\\+\\+\\+ ${projectRoot.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&')}.*GEMINI\\.md`,
);
for (const [name, content] of inbox.privateContents) {
expect(
projectRootRegex.test(content),
`Private patch "${name}" must not target a GEMINI.md under <projectRoot>. Content:\n${content}`,
).toBe(false);
}
// Verify on disk: <projectRoot>/GEMINI.md was not created or modified
// by the extraction agent (snapshot rollback should also enforce this,
// but we double-check from the post-run state).
const projectGemini = path.join(projectRoot, 'GEMINI.md');
const exists = await fsp
.access(projectGemini)
.then(() => true)
.catch(() => false);
// The seeded workspace's WORKSPACE_FILES doesn't include GEMINI.md, so
// it must NOT exist after the run.
expect(
exists,
`<projectRoot>/GEMINI.md (${projectGemini}) must not be created by the extraction agent.`,
).toBe(false);
},
});
});
+447
View File
@@ -0,0 +1,447 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs/promises';
import path from 'node:path';
import os from 'node:os';
import { afterEach, beforeEach, describe, expect, vi } from 'vitest';
import { runEval } from './test-helper.js';
import { SESSION_FILE_PREFIX } from '../packages/core/src/services/chatRecordingService.js';
const evalState = vi.hoisted(() => ({
sessionFilePath: '',
debugLines: [] as string[],
}));
const mocks = vi.hoisted(() => ({
localAgentCreate: vi.fn(),
}));
vi.mock('../packages/core/src/agents/local-executor.js', () => ({
LocalAgentExecutor: {
create: mocks.localAgentCreate,
},
}));
vi.mock('../packages/core/src/agents/local-executor.ts', () => ({
LocalAgentExecutor: {
create: mocks.localAgentCreate,
},
}));
vi.mock('../packages/core/src/agents/local-executor', () => ({
LocalAgentExecutor: {
create: mocks.localAgentCreate,
},
}));
vi.mock('../packages/core/src/services/executionLifecycleService.js', () => ({
ExecutionLifecycleService: {
createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
completeExecution: vi.fn(),
},
}));
vi.mock('../packages/core/src/services/executionLifecycleService.ts', () => ({
ExecutionLifecycleService: {
createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
completeExecution: vi.fn(),
},
}));
vi.mock('../packages/core/src/services/executionLifecycleService', () => ({
ExecutionLifecycleService: {
createExecution: vi.fn().mockReturnValue({ pid: 1001, result: {} }),
completeExecution: vi.fn(),
},
}));
vi.mock('../packages/core/src/utils/debugLogger.js', () => ({
debugLogger: {
debug: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
log: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
warn: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
error: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
},
}));
vi.mock('../packages/core/src/utils/debugLogger.ts', () => ({
debugLogger: {
debug: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
log: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
warn: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
error: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
},
}));
vi.mock('../packages/core/src/utils/debugLogger', () => ({
debugLogger: {
debug: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
log: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
warn: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
error: (...args: unknown[]) =>
evalState.debugLines.push(args.map(String).join(' ')),
},
}));
interface MockMemoryConfig {
storage: {
getProjectMemoryDir: () => string;
getProjectMemoryTempDir: () => string;
getProjectSkillsMemoryDir: () => string;
getProjectTempDir: () => string;
getProjectRoot: () => string;
};
getTargetDir: () => string;
getToolRegistry: () => unknown;
getGeminiClient: () => unknown;
getSkillManager: () => { getSkills: () => unknown[] };
isAutoMemoryEnabled: () => boolean;
modelConfigService: {
registerRuntimeModelConfig: ReturnType<typeof vi.fn>;
};
sandboxManager: undefined;
}
interface Fixture {
rootDir: string;
homeDir: string;
targetDir: string;
projectTempDir: string;
memoryDir: string;
skillsDir: string;
config: MockMemoryConfig;
}
interface AutoMemoryRunSnapshot {
sessionIds?: string[];
memoryCandidatesCreated?: string[];
memoryFilesUpdated?: string[];
skillsCreated?: string[];
}
const fixtures: Fixture[] = [];
beforeEach(() => {
vi.resetModules();
evalState.debugLines = [];
evalState.sessionFilePath = '';
mocks.localAgentCreate.mockReset();
mocks.localAgentCreate.mockImplementation(
async (_agent, context, onActivity) => ({
run: vi.fn().mockImplementation(async () => {
if (evalState.sessionFilePath) {
const callId = `read-inbox-routing`;
onActivity({
isSubagentActivityEvent: true,
agentName: 'auto-memory-eval',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
callId,
args: { file_path: evalState.sessionFilePath },
},
});
onActivity({
isSubagentActivityEvent: true,
agentName: 'auto-memory-eval',
type: 'TOOL_CALL_END',
data: { id: callId, data: { isError: false } },
});
}
const config = context.config as MockMemoryConfig;
const memoryDir = config.storage.getProjectMemoryTempDir();
const inboxDir = path.join(memoryDir, '.inbox');
const homeDir = process.env['GEMINI_CLI_HOME'] ?? os.homedir();
const globalGeminiDir = path.join(homeDir, '.gemini');
await fs.mkdir(path.join(inboxDir, 'private'), { recursive: true });
await fs.mkdir(path.join(inboxDir, 'global'), { recursive: true });
const privateTarget = path.join(memoryDir, 'verify-memory.md');
await fs.writeFile(
path.join(inboxDir, 'private', 'verify-memory.patch'),
[
`--- /dev/null`,
`+++ ${privateTarget}`,
`@@ -0,0 +1,3 @@`,
`+# Project Memory Candidate`,
`+`,
`+Future agents should remember that this project verifies memory changes with \`npm run verify:memory\`.`,
``,
].join('\n'),
);
const globalTarget = path.join(globalGeminiDir, 'GEMINI.md');
await fs.writeFile(
path.join(inboxDir, 'global', 'reply-style.patch'),
[
`--- /dev/null`,
`+++ ${globalTarget}`,
`@@ -0,0 +1,1 @@`,
`+User prefers concise Chinese architecture plans.`,
``,
].join('\n'),
);
return {
turn_count: 3,
duration_ms: 25,
terminate_reason: 'GOAL',
};
}),
}),
);
});
afterEach(async () => {
vi.unstubAllEnvs();
while (fixtures.length > 0) {
const fixture = fixtures.pop();
if (fixture) {
await fs.rm(fixture.rootDir, { recursive: true, force: true });
}
}
});
function autoMemoryEval(name: string, fn: () => Promise<void>): void {
runEval(
'USUALLY_PASSES',
{
suiteName: 'auto-memory-modes',
suiteType: 'component-level',
name,
timeout: 30000,
},
fn,
40000,
);
}
async function createFixture(): Promise<Fixture> {
const rootDir = await fs.mkdtemp(
path.join(os.tmpdir(), 'gemini-auto-memory-eval-'),
);
const homeDir = path.join(rootDir, 'home');
const targetDir = path.join(rootDir, 'workspace');
const projectTempDir = path.join(rootDir, 'project-temp');
const memoryDir = path.join(projectTempDir, 'memory');
const skillsDir = path.join(memoryDir, 'skills');
await fs.mkdir(homeDir, { recursive: true });
await fs.mkdir(targetDir, { recursive: true });
await fs.mkdir(path.join(projectTempDir, 'chats'), { recursive: true });
vi.stubEnv('GEMINI_CLI_HOME', homeDir);
const config: MockMemoryConfig = {
storage: {
getProjectMemoryDir: () => memoryDir,
getProjectMemoryTempDir: () => memoryDir,
getProjectSkillsMemoryDir: () => skillsDir,
getProjectTempDir: () => projectTempDir,
getProjectRoot: () => targetDir,
},
getTargetDir: () => targetDir,
getToolRegistry: () => ({}),
getGeminiClient: () => ({}),
getSkillManager: () => ({ getSkills: () => [] }),
isAutoMemoryEnabled: () => true,
modelConfigService: {
registerRuntimeModelConfig: vi.fn(),
},
sandboxManager: undefined,
};
const fixture = {
rootDir,
homeDir,
targetDir,
projectTempDir,
memoryDir,
skillsDir,
config,
};
fixtures.push(fixture);
return fixture;
}
async function seedSession(
fixture: Fixture,
sessionId: string,
): Promise<string> {
const sessionFilePath = path.join(
fixture.projectTempDir,
'chats',
`${SESSION_FILE_PREFIX}2026-04-20T10-00-${sessionId}.json`,
);
const oldTimestamp = new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString();
const messages = Array.from({ length: 20 }, (_, index) => ({
id: `m${index + 1}`,
timestamp: oldTimestamp,
type: index % 2 === 0 ? 'user' : 'gemini',
content: [
{
text:
index % 2 === 0
? 'For this project, durable memory changes are verified with `npm run verify:memory`.'
: 'Acknowledged.',
},
],
}));
await fs.writeFile(
sessionFilePath,
[
{
sessionId,
projectHash: 'auto-memory-eval',
summary: 'Capture durable auto memory routing behavior',
startTime: oldTimestamp,
lastUpdated: oldTimestamp,
kind: 'main',
},
...messages,
]
.map((record) => JSON.stringify(record))
.join('\n') + '\n',
);
return sessionFilePath;
}
async function expectSeedSessionEligible(
fixture: Fixture,
sessionId: string,
): Promise<void> {
const { buildSessionIndex } = await import(
'../packages/core/src/services/memoryService.js'
);
const { newSessionIds } = await buildSessionIndex(
path.join(fixture.projectTempDir, 'chats'),
{ runs: [] },
);
expect(newSessionIds).toContain(sessionId);
}
async function readRun(fixture: Fixture): Promise<AutoMemoryRunSnapshot> {
const statePath = path.join(fixture.memoryDir, '.extraction-state.json');
let raw: string;
try {
raw = await fs.readFile(statePath, 'utf-8');
} catch (error) {
let memoryEntries = '(memory dir missing)';
try {
memoryEntries = (await fs.readdir(fixture.memoryDir, { recursive: true }))
.map(String)
.join('\n');
} catch {
// Leave default diagnostic.
}
throw new Error(
[
`Expected extraction state at ${statePath}.`,
`LocalAgentExecutor.create calls: ${mocks.localAgentCreate.mock.calls.length}`,
`Memory dir entries:\n${memoryEntries}`,
`Debug log:\n${evalState.debugLines.join('\n')}`,
].join('\n'),
{ cause: error },
);
}
const state = JSON.parse(raw) as {
runs?: AutoMemoryRunSnapshot[];
};
const run = state.runs?.at(-1);
if (!run) {
throw new Error('Expected an auto memory extraction run to be recorded');
}
return run;
}
async function fileExists(filePath: string): Promise<boolean> {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
describe('Auto Memory inbox routing', () => {
autoMemoryEval(
'every memory patch lands in .inbox/<kind>/ for review and active files stay untouched',
async () => {
const { startMemoryService } = await import(
'../packages/core/src/services/memoryService.js'
);
const fixture = await createFixture();
evalState.sessionFilePath = await seedSession(
fixture,
'inbox-routing-session',
);
await expectSeedSessionEligible(fixture, 'inbox-routing-session');
await startMemoryService(fixture.config as never);
const privatePatchPath = path.join(
fixture.memoryDir,
'.inbox',
'private',
'verify-memory.patch',
);
const globalPatchPath = path.join(
fixture.memoryDir,
'.inbox',
'global',
'reply-style.patch',
);
const activePrivateMemoryPath = path.join(
fixture.memoryDir,
'verify-memory.md',
);
const activeGlobalMemoryPath = path.join(
fixture.homeDir,
'.gemini',
'GEMINI.md',
);
const run = await readRun(fixture);
// Both patches were written to the inbox.
await expect(fs.readFile(privatePatchPath, 'utf-8')).resolves.toContain(
'npm run verify:memory',
);
await expect(fs.readFile(globalPatchPath, 'utf-8')).resolves.toContain(
'concise Chinese architecture plans',
);
// No active file was touched — every patch must be reviewed manually.
expect(await fileExists(activePrivateMemoryPath)).toBe(false);
expect(await fileExists(activeGlobalMemoryPath)).toBe(false);
// Run state records both patches as candidates and zero applied files.
expect(run.memoryFilesUpdated ?? []).toEqual([]);
expect(run.memoryCandidatesCreated ?? []).toEqual(
expect.arrayContaining([
path.relative(fixture.memoryDir, privatePatchPath),
path.relative(fixture.memoryDir, globalPatchPath),
]),
);
},
);
});