feat(memory): persist auto-memory scratchpad for skill extraction (#25873)

This commit is contained in:
Sandy Tao
2026-04-24 17:21:12 -07:00
committed by GitHub
parent a5b030b424
commit 42587de733
17 changed files with 2418 additions and 171 deletions
+163
View File
@@ -5,12 +5,78 @@
*/
import { describe, expect } from 'vitest';
import fs from 'node:fs';
import path from 'node:path';
import {
loadConversationRecord,
SESSION_FILE_PREFIX,
} from '@google/gemini-cli-core';
import {
evalTest,
assertModelHasOutput,
checkModelOutputContent,
} from './test-helper.js';
function findDir(base: string, name: string): string | null {
if (!fs.existsSync(base)) return null;
const files = fs.readdirSync(base);
for (const file of files) {
const fullPath = path.join(base, file);
if (fs.statSync(fullPath).isDirectory()) {
if (file === name) return fullPath;
const found = findDir(fullPath, name);
if (found) return found;
}
}
return null;
}
async function loadLatestSessionRecord(homeDir: string, sessionId: string) {
const chatsDir = findDir(path.join(homeDir, '.gemini'), 'chats');
if (!chatsDir) {
throw new Error('Could not find chats directory for eval session logs');
}
const candidates = fs
.readdirSync(chatsDir)
.filter(
(file) =>
file.startsWith(SESSION_FILE_PREFIX) &&
(file.endsWith('.json') || file.endsWith('.jsonl')),
);
const matchingRecords = [];
for (const file of candidates) {
const filePath = path.join(chatsDir, file);
const record = await loadConversationRecord(filePath);
if (record?.sessionId === sessionId) {
matchingRecords.push(record);
}
}
matchingRecords.sort(
(a, b) => Date.parse(b.lastUpdated) - Date.parse(a.lastUpdated),
);
return matchingRecords[0] ?? null;
}
async function waitForSessionScratchpad(
homeDir: string,
sessionId: string,
timeoutMs = 30000,
) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const record = await loadLatestSessionRecord(homeDir, sessionId);
if (record?.memoryScratchpad) {
return record;
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
return loadLatestSessionRecord(homeDir, sessionId);
}
describe('save_memory', () => {
const TEST_PREFIX = 'Save memory test: ';
const rememberingFavoriteColor = "Agent remembers user's favorite color";
@@ -569,6 +635,103 @@ describe('save_memory', () => {
},
});
const memoryV2SessionScratchpad =
'Session summary persists memory scratchpad for memory-saving sessions';
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: memoryV2SessionScratchpad,
sessionId: 'memory-scratchpad-eval',
params: {
settings: {
experimental: { memoryV2: true },
},
},
messages: [
{
id: 'msg-1',
type: 'user',
content: [
{
text: 'Across all my projects, I prefer Vitest over Jest for testing.',
},
],
timestamp: '2026-01-01T00:00:00Z',
},
{
id: 'msg-2',
type: 'gemini',
content: [{ text: 'Noted. What else should I keep in mind?' }],
timestamp: '2026-01-01T00:00:05Z',
},
{
id: 'msg-3',
type: 'user',
content: [
{
text: 'For this repo I was debugging a flaky API test earlier, but that was just transient context.',
},
],
timestamp: '2026-01-01T00:01:00Z',
},
{
id: 'msg-4',
type: 'gemini',
content: [
{ text: 'Understood. I will only save the durable preference.' },
],
timestamp: '2026-01-01T00:01:05Z',
},
],
prompt:
'Please save any persistent preferences or facts about me from our conversation to memory.',
assert: async (rig, result) => {
await rig.waitForToolCall('write_file').catch(() => {});
const writeCalls = rig
.readToolLogs()
.filter((log) =>
['write_file', 'replace'].includes(log.toolRequest.name),
);
expect(
writeCalls.length,
'Expected memoryV2 save flow to edit a markdown memory file',
).toBeGreaterThan(0);
await rig.run({
args: ['--list-sessions'],
approvalMode: 'yolo',
timeout: 120000,
});
const record = await waitForSessionScratchpad(
rig.homeDir!,
'memory-scratchpad-eval',
);
expect(
record?.memoryScratchpad,
'Expected the resumed session log to contain a memoryScratchpad after session summary generation',
).toBeDefined();
expect(record?.memoryScratchpad?.version).toBe(1);
expect(
record?.memoryScratchpad?.toolSequence?.some((toolName) =>
['write_file', 'replace'].includes(toolName),
),
'Expected memoryScratchpad.toolSequence to include the markdown editing tool used for memory persistence',
).toBe(true);
expect(
record?.memoryScratchpad?.touchedPaths?.length,
'Expected memoryScratchpad to capture at least one touched path',
).toBeGreaterThan(0);
expect(
record?.memoryScratchpad?.workflowSummary,
'Expected memoryScratchpad.workflowSummary to be populated',
).toMatch(/write_file|replace/i);
assertModelHasOutput(result);
},
});
const memoryV2RoutesUserProject =
'Agent routes personal-to-user project notes to user-project memory';
evalTest('USUALLY_PASSES', {