feat(memory): persist auto-memory scratchpad for skill extraction (#25873)

This commit is contained in:
Sandy Tao
2026-04-24 17:21:12 -07:00
committed by GitHub
parent a5b030b424
commit 42587de733
17 changed files with 2418 additions and 171 deletions
+1
View File
@@ -337,6 +337,7 @@ jobs:
if: "${{ steps.check_evals.outputs.should_run == 'true' }}"
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_CLI_TRUST_WORKSPACE: true
GEMINI_MODEL: 'gemini-3-pro-preview'
# Only run always passes behavioral tests.
EVAL_SUITE_TYPE: 'behavioral'
+1
View File
@@ -66,6 +66,7 @@ jobs:
continue-on-error: true
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_CLI_TRUST_WORKSPACE: true
GEMINI_MODEL: '${{ matrix.model }}'
RUN_EVALS: 'true'
EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
+163
View File
@@ -5,12 +5,78 @@
*/
import { describe, expect } from 'vitest';
import fs from 'node:fs';
import path from 'node:path';
import {
loadConversationRecord,
SESSION_FILE_PREFIX,
} from '@google/gemini-cli-core';
import {
evalTest,
assertModelHasOutput,
checkModelOutputContent,
} from './test-helper.js';
function findDir(base: string, name: string): string | null {
if (!fs.existsSync(base)) return null;
const files = fs.readdirSync(base);
for (const file of files) {
const fullPath = path.join(base, file);
if (fs.statSync(fullPath).isDirectory()) {
if (file === name) return fullPath;
const found = findDir(fullPath, name);
if (found) return found;
}
}
return null;
}
async function loadLatestSessionRecord(homeDir: string, sessionId: string) {
const chatsDir = findDir(path.join(homeDir, '.gemini'), 'chats');
if (!chatsDir) {
throw new Error('Could not find chats directory for eval session logs');
}
const candidates = fs
.readdirSync(chatsDir)
.filter(
(file) =>
file.startsWith(SESSION_FILE_PREFIX) &&
(file.endsWith('.json') || file.endsWith('.jsonl')),
);
const matchingRecords = [];
for (const file of candidates) {
const filePath = path.join(chatsDir, file);
const record = await loadConversationRecord(filePath);
if (record?.sessionId === sessionId) {
matchingRecords.push(record);
}
}
matchingRecords.sort(
(a, b) => Date.parse(b.lastUpdated) - Date.parse(a.lastUpdated),
);
return matchingRecords[0] ?? null;
}
async function waitForSessionScratchpad(
homeDir: string,
sessionId: string,
timeoutMs = 30000,
) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const record = await loadLatestSessionRecord(homeDir, sessionId);
if (record?.memoryScratchpad) {
return record;
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
return loadLatestSessionRecord(homeDir, sessionId);
}
describe('save_memory', () => {
const TEST_PREFIX = 'Save memory test: ';
const rememberingFavoriteColor = "Agent remembers user's favorite color";
@@ -569,6 +635,103 @@ describe('save_memory', () => {
},
});
const memoryV2SessionScratchpad =
'Session summary persists memory scratchpad for memory-saving sessions';
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: memoryV2SessionScratchpad,
sessionId: 'memory-scratchpad-eval',
params: {
settings: {
experimental: { memoryV2: true },
},
},
messages: [
{
id: 'msg-1',
type: 'user',
content: [
{
text: 'Across all my projects, I prefer Vitest over Jest for testing.',
},
],
timestamp: '2026-01-01T00:00:00Z',
},
{
id: 'msg-2',
type: 'gemini',
content: [{ text: 'Noted. What else should I keep in mind?' }],
timestamp: '2026-01-01T00:00:05Z',
},
{
id: 'msg-3',
type: 'user',
content: [
{
text: 'For this repo I was debugging a flaky API test earlier, but that was just transient context.',
},
],
timestamp: '2026-01-01T00:01:00Z',
},
{
id: 'msg-4',
type: 'gemini',
content: [
{ text: 'Understood. I will only save the durable preference.' },
],
timestamp: '2026-01-01T00:01:05Z',
},
],
prompt:
'Please save any persistent preferences or facts about me from our conversation to memory.',
assert: async (rig, result) => {
await rig.waitForToolCall('write_file').catch(() => {});
const writeCalls = rig
.readToolLogs()
.filter((log) =>
['write_file', 'replace'].includes(log.toolRequest.name),
);
expect(
writeCalls.length,
'Expected memoryV2 save flow to edit a markdown memory file',
).toBeGreaterThan(0);
await rig.run({
args: ['--list-sessions'],
approvalMode: 'yolo',
timeout: 120000,
});
const record = await waitForSessionScratchpad(
rig.homeDir!,
'memory-scratchpad-eval',
);
expect(
record?.memoryScratchpad,
'Expected the resumed session log to contain a memoryScratchpad after session summary generation',
).toBeDefined();
expect(record?.memoryScratchpad?.version).toBe(1);
expect(
record?.memoryScratchpad?.toolSequence?.some((toolName) =>
['write_file', 'replace'].includes(toolName),
),
'Expected memoryScratchpad.toolSequence to include the markdown editing tool used for memory persistence',
).toBe(true);
expect(
record?.memoryScratchpad?.touchedPaths?.length,
'Expected memoryScratchpad to capture at least one touched path',
).toBeGreaterThan(0);
expect(
record?.memoryScratchpad?.workflowSummary,
'Expected memoryScratchpad.workflowSummary to be populated',
).toMatch(/write_file|replace/i);
assertModelHasOutput(result);
},
});
const memoryV2RoutesUserProject =
'Agent routes personal-to-user project notes to user-project memory';
evalTest('USUALLY_PASSES', {
+630 -17
View File
@@ -6,21 +6,30 @@
import fsp from 'node:fs/promises';
import path from 'node:path';
import { describe, expect } from 'vitest';
import { describe, expect, it } from 'vitest';
import {
type Config,
ApprovalMode,
type MemoryScratchpad,
SESSION_FILE_PREFIX,
getProjectHash,
startMemoryService,
} from '@google/gemini-cli-core';
import { componentEvalTest } from './component-test-helper.js';
import { ComponentRig, componentEvalTest } from './component-test-helper.js';
import {
average,
averageNullable,
countMatchingIds,
roundStat,
} from './statistics-helper.js';
import { prepareWorkspace } from './test-helper.js';
interface SeedSession {
sessionId: string;
summary: string;
userTurns: string[];
timestampOffsetMinutes: number;
memoryScratchpad?: MemoryScratchpad;
}
interface MessageRecord {
@@ -30,6 +39,81 @@ interface MessageRecord {
content: Array<{ text: string }>;
}
interface SessionVersion {
sessionId: string;
lastUpdated: string;
}
interface ExtractionRunSnapshot {
sessionIds: string[];
skillsCreated: string[];
candidateSessions: SessionVersion[];
processedSessions: SessionVersion[];
turnCount?: number;
durationMs?: number;
terminateReason?: string;
}
interface ExtractionOutcome {
state: { runs: ExtractionRunSnapshot[] };
skillsDir: string;
skillBodies: string[];
}
interface SkillQualitySignal {
label: string;
pattern: RegExp;
}
interface ScratchpadRunMetrics {
turnCount: number | null;
durationMs: number | null;
terminateReason: string | null;
skillsCreated: number;
candidateSessions: number;
processedSessions: number;
relevantReads: number;
distractorReads: number;
totalReads: number;
recall: number;
precision: number;
signalScore: number;
skillQualityScore: number;
skillQualityMax: number;
skillQualityRatio: number;
missingQualitySignals: string[];
}
interface ScratchpadStatsTrial {
trial: number;
baseline: ScratchpadRunMetrics;
enhanced: ScratchpadRunMetrics;
}
interface ScratchpadStatsAggregate {
turnCountAvg: number | null;
durationMsAvg: number | null;
recallAvg: number;
precisionAvg: number;
signalScoreAvg: number;
relevantReadsAvg: number;
distractorReadsAvg: number;
skillsCreatedAvg: number;
skillQualityScoreAvg: number;
skillQualityRatioAvg: number;
}
interface ScratchpadStatsReport {
generatedAt: string;
trials: number;
aggregate: {
baseline: ScratchpadStatsAggregate;
enhanced: ScratchpadStatsAggregate;
};
deltas: ScratchpadStatsAggregate;
results: ScratchpadStatsTrial[];
}
const WORKSPACE_FILES = {
'package.json': JSON.stringify(
{
@@ -68,6 +152,143 @@ function buildMessages(userTurns: string[]): MessageRecord[] {
]);
}
function padTurns(turns: string[]): string[] {
if (turns.length >= 10) {
return turns;
}
const padded = [...turns];
for (let i = turns.length; i < 10; i++) {
padded.push(`${turns[i % turns.length]} (repeat ${i + 1})`);
}
return padded;
}
function createScratchpad(
workflowSummary: string,
touchedPaths: string[],
validationStatus: MemoryScratchpad['validationStatus'] = 'passed',
): MemoryScratchpad {
return {
version: 1,
workflowSummary,
toolSequence: ['run_shell_command'],
touchedPaths,
validationStatus,
};
}
function createWorkflowComparisonSessions(withScratchpad: boolean): {
sessions: SeedSession[];
relevantSessionIds: string[];
distractorSessionIds: string[];
} {
const relevantWorkflowSummary =
'run_shell_command -> run_shell_command | paths packages/cli/src/config/settings.ts, docs/settings.md | validated';
const relevantScratchpad = withScratchpad
? createScratchpad(relevantWorkflowSummary, [
'packages/cli/src/config/settings.ts',
'docs/settings.md',
])
: undefined;
const sessions: SeedSession[] = [
{
sessionId: 'hidden-settings-workflow-a',
summary: 'Prepare release notes for settings launch',
timestampOffsetMinutes: 420,
memoryScratchpad: relevantScratchpad,
userTurns: padTurns([
'When we add a new setting, the durable workflow is to regenerate the settings docs instead of editing them by hand.',
'The sequence that worked was npm run predocs:settings, npm run schema:settings, then npm run docs:settings.',
'Skipping predocs leaves stale defaults in the generated docs.',
'We verify the workflow by checking that both the schema output and docs update together.',
'This exact command order is the recurring workflow we use for settings changes.',
]),
},
{
sessionId: 'hidden-settings-workflow-b',
summary: 'Investigate CI drift in generated config reference',
timestampOffsetMinutes: 390,
memoryScratchpad: relevantScratchpad,
userTurns: padTurns([
'The config reference drift was fixed by rerunning the standard settings regeneration workflow.',
'We again used npm run predocs:settings before npm run schema:settings and npm run docs:settings.',
'The recurring rule is never to hand-edit generated settings docs.',
'The validation step is to confirm the schema artifact and docs changed together after regeneration.',
'This is the same recurring workflow we use every time a setting changes.',
]),
},
{
sessionId: 'distractor-release-notes',
summary: 'Prepare release notes for auth launch',
timestampOffsetMinutes: 360,
memoryScratchpad: undefined,
userTurns: padTurns([
'This release-notes task was one-off and just needed manual wording updates.',
'I edited CHANGELOG.md and docs/release-notes.md directly.',
'There was no reusable command sequence here beyond proofreading the copy.',
'This task should not become a standing workflow.',
'Once the wording landed, we were done.',
]),
},
{
sessionId: 'distractor-ci-snapshots',
summary: 'Investigate CI drift in auth snapshots',
timestampOffsetMinutes: 330,
memoryScratchpad: undefined,
userTurns: padTurns([
'This auth snapshot issue was specific to a flaky test in CI.',
'The only commands we ran were npm test -- auth and an isolated snapshot update.',
'It was not the recurring settings-doc workflow.',
'Once the flaky snapshot passed, there was no broader reusable procedure.',
'Treat this as a one-off CI cleanup.',
]),
},
{
sessionId: 'distractor-onboarding-docs',
summary: 'Refresh onboarding documentation copy',
timestampOffsetMinutes: 300,
memoryScratchpad: undefined,
userTurns: padTurns([
'This was just a docs wording cleanup in docs/onboarding.md.',
'No command sequence was involved.',
'We manually edited the copy and reviewed it.',
'There is no recurring operational workflow to capture here.',
'This should stay a one-off docs edit.',
]),
},
{
sessionId: 'distractor-deploy-copy',
summary: 'Adjust deployment checklist wording',
timestampOffsetMinutes: 270,
memoryScratchpad: undefined,
userTurns: padTurns([
'This was a wording-only change to docs/deploy.md.',
'We did not run a reusable command sequence.',
'It should not become a skill.',
'The edit was only for this deploy checklist cleanup.',
'After the copy change, the task was complete.',
]),
},
];
return {
sessions,
relevantSessionIds: [
'hidden-settings-workflow-a',
'hidden-settings-workflow-b',
],
distractorSessionIds: [
'distractor-release-notes',
'distractor-ci-snapshots',
'distractor-onboarding-docs',
'distractor-deploy-copy',
],
};
}
async function seedSessions(
config: Config,
sessions: SeedSession[],
@@ -78,9 +299,10 @@ async function seedSessions(
const projectRoot = config.storage.getProjectRoot();
for (const session of sessions) {
const timestamp = new Date(
const sessionTimestamp = new Date(
Date.now() - session.timestampOffsetMinutes * 60 * 1000,
)
);
const timestamp = sessionTimestamp
.toISOString()
.slice(0, 16)
.replace(/:/g, '-');
@@ -89,8 +311,9 @@ async function seedSessions(
sessionId: session.sessionId,
projectHash: getProjectHash(projectRoot),
summary: session.summary,
memoryScratchpad: session.memoryScratchpad,
startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(),
lastUpdated: sessionTimestamp.toISOString(),
messages: buildMessages(session.userTurns),
};
@@ -101,10 +324,9 @@ async function seedSessions(
}
}
async function runExtractionAndReadState(config: Config): Promise<{
state: { runs: Array<{ sessionIds: string[]; skillsCreated: string[] }> };
skillsDir: string;
}> {
async function runExtractionAndReadState(
config: Config,
): Promise<ExtractionOutcome> {
await startMemoryService(config);
const memoryDir = config.storage.getProjectMemoryTempDir();
@@ -113,7 +335,15 @@ async function runExtractionAndReadState(config: Config): Promise<{
const raw = await fsp.readFile(statePath, 'utf-8');
const state = JSON.parse(raw) as {
runs?: Array<{ sessionIds?: string[]; skillsCreated?: string[] }>;
runs?: Array<{
sessionIds?: string[];
skillsCreated?: string[];
candidateSessions?: SessionVersion[];
processedSessions?: SessionVersion[];
turnCount?: number;
durationMs?: number;
terminateReason?: string;
}>;
};
if (!Array.isArray(state.runs) || state.runs.length === 0) {
throw new Error('Skill extraction finished without writing any run state');
@@ -126,27 +356,292 @@ async function runExtractionAndReadState(config: Config): Promise<{
skillsCreated: Array.isArray(run.skillsCreated)
? run.skillsCreated
: [],
candidateSessions: Array.isArray(run.candidateSessions)
? run.candidateSessions
: [],
processedSessions: Array.isArray(run.processedSessions)
? run.processedSessions
: [],
turnCount:
typeof run.turnCount === 'number' ? run.turnCount : undefined,
durationMs:
typeof run.durationMs === 'number' ? run.durationMs : undefined,
terminateReason:
typeof run.terminateReason === 'string'
? run.terminateReason
: undefined,
})),
},
skillsDir,
skillBodies: await readSkillBodies(skillsDir),
};
}
async function summarizeScratchpadRun(
outcome: ExtractionOutcome,
run: ExtractionRunSnapshot,
scenario: ReturnType<typeof createWorkflowComparisonSessions>,
): Promise<ScratchpadRunMetrics> {
const relevantReads = countMatchingIds(
run.processedSessions,
scenario.relevantSessionIds,
);
const distractorReads = countMatchingIds(
run.processedSessions,
scenario.distractorSessionIds,
);
const totalReads = run.processedSessions.length;
const quality = scoreSkillQuality(
outcome.skillBodies,
SETTINGS_SKILL_QUALITY_SIGNALS,
);
return {
turnCount: run.turnCount ?? null,
durationMs: run.durationMs ?? null,
terminateReason: run.terminateReason ?? null,
skillsCreated: run.skillsCreated.length,
candidateSessions: run.candidateSessions.length,
processedSessions: totalReads,
relevantReads,
distractorReads,
totalReads,
recall: relevantReads / scenario.relevantSessionIds.length,
precision: totalReads === 0 ? 0 : relevantReads / totalReads,
signalScore: relevantReads - distractorReads,
skillQualityScore: quality.score,
skillQualityMax: quality.maxScore,
skillQualityRatio:
quality.maxScore === 0 ? 0 : quality.score / quality.maxScore,
missingQualitySignals: quality.missing,
};
}
function averageScratchpadRuns(
runs: ScratchpadRunMetrics[],
): ScratchpadStatsAggregate {
return {
turnCountAvg: roundStat(averageNullable(runs.map((run) => run.turnCount))),
durationMsAvg: roundStat(
averageNullable(runs.map((run) => run.durationMs)),
),
recallAvg: roundStat(average(runs.map((run) => run.recall))) ?? 0,
precisionAvg: roundStat(average(runs.map((run) => run.precision))) ?? 0,
signalScoreAvg: roundStat(average(runs.map((run) => run.signalScore))) ?? 0,
relevantReadsAvg:
roundStat(average(runs.map((run) => run.relevantReads))) ?? 0,
distractorReadsAvg:
roundStat(average(runs.map((run) => run.distractorReads))) ?? 0,
skillsCreatedAvg:
roundStat(average(runs.map((run) => run.skillsCreated))) ?? 0,
skillQualityScoreAvg:
roundStat(average(runs.map((run) => run.skillQualityScore))) ?? 0,
skillQualityRatioAvg:
roundStat(average(runs.map((run) => run.skillQualityRatio))) ?? 0,
};
}
function diffScratchpadAggregates(
baseline: ScratchpadStatsAggregate,
enhanced: ScratchpadStatsAggregate,
): ScratchpadStatsAggregate {
return {
turnCountAvg:
baseline.turnCountAvg === null || enhanced.turnCountAvg === null
? null
: roundStat(enhanced.turnCountAvg - baseline.turnCountAvg),
durationMsAvg:
baseline.durationMsAvg === null || enhanced.durationMsAvg === null
? null
: roundStat(enhanced.durationMsAvg - baseline.durationMsAvg),
recallAvg: roundStat(enhanced.recallAvg - baseline.recallAvg) ?? 0,
precisionAvg: roundStat(enhanced.precisionAvg - baseline.precisionAvg) ?? 0,
signalScoreAvg:
roundStat(enhanced.signalScoreAvg - baseline.signalScoreAvg) ?? 0,
relevantReadsAvg:
roundStat(enhanced.relevantReadsAvg - baseline.relevantReadsAvg) ?? 0,
distractorReadsAvg:
roundStat(enhanced.distractorReadsAvg - baseline.distractorReadsAvg) ?? 0,
skillsCreatedAvg:
roundStat(enhanced.skillsCreatedAvg - baseline.skillsCreatedAvg) ?? 0,
skillQualityScoreAvg:
roundStat(
enhanced.skillQualityScoreAvg - baseline.skillQualityScoreAvg,
) ?? 0,
skillQualityRatioAvg:
roundStat(
enhanced.skillQualityRatioAvg - baseline.skillQualityRatioAvg,
) ?? 0,
};
}
async function runScenarioWithFreshRig(
sessions: SeedSession[],
): Promise<ExtractionOutcome> {
const rig = new ComponentRig({
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
});
try {
await rig.initialize();
await prepareWorkspace(rig.testDir, rig.testDir, WORKSPACE_FILES);
await seedSessions(rig.config!, sessions);
return await runExtractionAndReadState(rig.config!);
} finally {
await rig.cleanup();
}
}
async function runScratchpadStatsTrial(
trial: number,
): Promise<ScratchpadStatsTrial> {
const baselineScenario = createWorkflowComparisonSessions(false);
const enhancedScenario = createWorkflowComparisonSessions(true);
const baselineOutcome = await runScenarioWithFreshRig(
baselineScenario.sessions,
);
const enhancedOutcome = await runScenarioWithFreshRig(
enhancedScenario.sessions,
);
const baselineRun = baselineOutcome.state.runs.at(-1);
const enhancedRun = enhancedOutcome.state.runs.at(-1);
if (!baselineRun || !enhancedRun) {
throw new Error('Expected both baseline and scratchpad runs to exist');
}
expectSuccessfulExtractionRun(baselineRun);
expectSuccessfulExtractionRun(enhancedRun);
return {
trial,
baseline: await summarizeScratchpadRun(
baselineOutcome,
baselineRun,
baselineScenario,
),
enhanced: await summarizeScratchpadRun(
enhancedOutcome,
enhancedRun,
enhancedScenario,
),
};
}
async function runScratchpadStatsReport(
trials: number,
): Promise<ScratchpadStatsReport> {
const results: ScratchpadStatsTrial[] = [];
for (let trial = 1; trial <= trials; trial++) {
results.push(await runScratchpadStatsTrial(trial));
}
const baseline = averageScratchpadRuns(
results.map((result) => result.baseline),
);
const enhanced = averageScratchpadRuns(
results.map((result) => result.enhanced),
);
return {
generatedAt: new Date().toISOString(),
trials,
aggregate: {
baseline,
enhanced,
},
deltas: diffScratchpadAggregates(baseline, enhanced),
results,
};
}
async function writeScratchpadStatsReport(
report: ScratchpadStatsReport,
): Promise<string> {
const outputPath = path.resolve(
process.cwd(),
'evals/logs/skill_extraction_scratchpad_stats.json',
);
await fsp.mkdir(path.dirname(outputPath), { recursive: true });
await fsp.writeFile(outputPath, `${JSON.stringify(report, null, 2)}\n`);
return outputPath;
}
async function readSkillBodies(skillsDir: string): Promise<string[]> {
const bodies: string[] = [];
try {
const entries = await fsp.readdir(skillsDir, { withFileTypes: true });
const skillDirs = entries.filter((entry) => entry.isDirectory());
const bodies = await Promise.all(
skillDirs.map((entry) =>
fsp.readFile(path.join(skillsDir, entry.name, 'SKILL.md'), 'utf-8'),
),
);
for (const entry of entries) {
if (!entry.isDirectory()) {
continue;
}
try {
bodies.push(
await fsp.readFile(
path.join(skillsDir, entry.name, 'SKILL.md'),
'utf-8',
),
);
} catch {
// Ignore incomplete skill directories so one bad artifact does not hide
// valid skills created in the same eval run.
}
}
return bodies;
} catch {
return [];
}
}
function expectSuccessfulExtractionRun(run: ExtractionRunSnapshot): void {
expect(run.turnCount).toBeGreaterThan(0);
expect(run.turnCount).toBeLessThanOrEqual(30);
expect(run.durationMs).toBeGreaterThan(0);
expect(run.terminateReason).toBe('GOAL');
}
function scoreSkillQuality(
skillBodies: string[],
signals: SkillQualitySignal[],
): { score: number; maxScore: number; missing: string[] } {
const combined = skillBodies.join('\n\n');
const matched = signals.filter((signal) => signal.pattern.test(combined));
return {
score: matched.length,
maxScore: signals.length,
missing: signals
.filter((signal) => !signal.pattern.test(combined))
.map((signal) => signal.label),
};
}
const SETTINGS_SKILL_QUALITY_SIGNALS: SkillQualitySignal[] = [
{ label: 'predocs command', pattern: /npm run predocs:settings/i },
{ label: 'schema command', pattern: /npm run schema:settings/i },
{ label: 'docs command', pattern: /npm run docs:settings/i },
{ label: 'verification guidance', pattern: /verif(?:y|ication)/i },
{
label: 'generated docs warning or ordering constraint',
pattern:
/do not hand-edit|manual edits|exact command order|preserve.*order/i,
},
];
const DB_MIGRATION_SKILL_QUALITY_SIGNALS: SkillQualitySignal[] = [
{ label: 'db check command', pattern: /npm run db:check/i },
{ label: 'db migrate command', pattern: /npm run db:migrate/i },
{ label: 'db validate command', pattern: /npm run db:validate/i },
{ label: 'rollback guidance', pattern: /npm run db:rollback|rollback/i },
{
label: 'ordering constraint',
pattern: /check.*migrate.*validate|ordering is critical|mandatory/i,
},
];
/**
* Shared configOverrides for all skill extraction component evals.
* - experimentalAutoMemory: enables the Auto Memory skill extraction pipeline.
@@ -158,6 +653,16 @@ const EXTRACTION_CONFIG_OVERRIDES = {
approvalMode: ApprovalMode.YOLO,
};
function parseScratchpadStatsTrials(): number {
const configured = Number.parseInt(
process.env['SCRATCHPAD_STATS_TRIALS'] ?? '8',
10,
);
return Number.isFinite(configured) && configured > 0 ? configured : 8;
}
const SCRATCHPAD_STATS_TRIALS = parseScratchpadStatsTrials();
describe('Skill Extraction', () => {
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
@@ -264,15 +769,24 @@ describe('Skill Extraction', () => {
const { state, skillsDir } = await runExtractionAndReadState(config);
const skillBodies = await readSkillBodies(skillsDir);
const combinedSkills = skillBodies.join('\n\n');
const quality = scoreSkillQuality(
skillBodies,
SETTINGS_SKILL_QUALITY_SIGNALS,
);
expect(state.runs).toHaveLength(1);
expect(state.runs[0].sessionIds).toHaveLength(2);
expectSuccessfulExtractionRun(state.runs[0]);
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
expect(combinedSkills).toContain('npm run predocs:settings');
expect(combinedSkills).toContain('npm run schema:settings');
expect(combinedSkills).toContain('npm run docs:settings');
expect(combinedSkills).toMatch(/Verification/i);
expect(combinedSkills).toMatch(/verif(?:y|ication)/i);
expect(
quality.score,
`missing quality signals: ${quality.missing.join(', ')}`,
).toBeGreaterThanOrEqual(4);
// Verify the extraction agent activated skill-creator for design guidance.
expect(config.getSkillManager().isSkillActive('skill-creator')).toBe(
@@ -281,6 +795,96 @@ describe('Skill Extraction', () => {
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
name: 'memory scratchpad improves repeated-workflow recall versus summary-only index',
files: WORKSPACE_FILES,
timeout: 360000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
assert: async () => {
const baselineScenario = createWorkflowComparisonSessions(false);
const enhancedScenario = createWorkflowComparisonSessions(true);
const baselineOutcome = await runScenarioWithFreshRig(
baselineScenario.sessions,
);
const enhancedOutcome = await runScenarioWithFreshRig(
enhancedScenario.sessions,
);
const baselineRun = baselineOutcome.state.runs.at(-1);
const enhancedRun = enhancedOutcome.state.runs.at(-1);
if (!baselineRun || !enhancedRun) {
throw new Error('Expected both baseline and scratchpad runs to exist');
}
expectSuccessfulExtractionRun(baselineRun);
expectSuccessfulExtractionRun(enhancedRun);
const baselineRelevantReads = countMatchingIds(
baselineRun.processedSessions,
baselineScenario.relevantSessionIds,
);
const enhancedRelevantReads = countMatchingIds(
enhancedRun.processedSessions,
enhancedScenario.relevantSessionIds,
);
const baselineDistractorReads = countMatchingIds(
baselineRun.processedSessions,
baselineScenario.distractorSessionIds,
);
const enhancedDistractorReads = countMatchingIds(
enhancedRun.processedSessions,
enhancedScenario.distractorSessionIds,
);
const baselineSignalScore =
baselineRelevantReads - baselineDistractorReads;
const enhancedSignalScore =
enhancedRelevantReads - enhancedDistractorReads;
expect(enhancedRun.candidateSessions).toHaveLength(
enhancedScenario.sessions.length,
);
expect(enhancedRelevantReads).toBeGreaterThanOrEqual(2);
expect(enhancedRelevantReads).toBeGreaterThanOrEqual(
baselineRelevantReads,
);
expect(enhancedDistractorReads).toBeLessThanOrEqual(
baselineDistractorReads,
);
expect(enhancedSignalScore).toBeGreaterThan(baselineSignalScore);
},
});
if (process.env['RUN_SCRATCHPAD_STATS'] === '1') {
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
name: 'reports memory scratchpad retrieval statistics',
timeout: Math.max(360000, SCRATCHPAD_STATS_TRIALS * 150000),
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
assert: async () => {
const report = await runScratchpadStatsReport(SCRATCHPAD_STATS_TRIALS);
const outputPath = await writeScratchpadStatsReport(report);
console.info(
`Wrote scratchpad stats report to ${outputPath}\n${JSON.stringify(
report.aggregate,
null,
2,
)}`,
);
expect(report.results).toHaveLength(SCRATCHPAD_STATS_TRIALS);
expect(report.aggregate.baseline.recallAvg).toBeGreaterThan(0);
expect(report.aggregate.enhanced.recallAvg).toBeGreaterThan(0);
},
});
} else {
it.skip('reports memory scratchpad retrieval statistics', () => {});
}
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
@@ -330,15 +934,24 @@ describe('Skill Extraction', () => {
const { state, skillsDir } = await runExtractionAndReadState(config);
const skillBodies = await readSkillBodies(skillsDir);
const combinedSkills = skillBodies.join('\n\n');
const quality = scoreSkillQuality(
skillBodies,
DB_MIGRATION_SKILL_QUALITY_SIGNALS,
);
expect(state.runs).toHaveLength(1);
expect(state.runs[0].sessionIds).toHaveLength(2);
expectSuccessfulExtractionRun(state.runs[0]);
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
expect(combinedSkills).toContain('npm run db:check');
expect(combinedSkills).toContain('npm run db:migrate');
expect(combinedSkills).toContain('npm run db:validate');
expect(combinedSkills).toMatch(/rollback/i);
expect(
quality.score,
`missing quality signals: ${quality.missing.join(', ')}`,
).toBeGreaterThanOrEqual(4);
// Verify the extraction agent activated skill-creator for design guidance.
expect(config.getSkillManager().isSkillActive('skill-creator')).toBe(
+26
View File
@@ -0,0 +1,26 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export function countMatchingIds<T extends { sessionId: string }>(
items: T[],
expectedIds: string[],
): number {
const expected = new Set(expectedIds);
return items.filter((item) => expected.has(item.sessionId)).length;
}
export function roundStat(value: number | null): number | null {
return value === null ? null : Number(value.toFixed(4));
}
export function average(values: number[]): number {
return values.reduce((sum, value) => sum + value, 0) / values.length;
}
export function averageNullable(values: Array<number | null>): number | null {
const numericValues = values.filter((value) => value !== null);
return numericValues.length === 0 ? null : average(numericValues);
}
@@ -779,6 +779,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult || 'Task completed.',
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
@@ -786,6 +788,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
result:
finalResult || 'Agent execution was terminated before completion.',
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
} catch (error) {
// Check if the error is an AbortError caused by our internal timeout.
@@ -826,6 +830,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult,
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
}
@@ -840,6 +846,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult,
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
@@ -74,12 +74,14 @@ describe('SkillExtractionAgent', () => {
expect(query).toContain(existingSkillsSummary);
expect(query).toContain(sessionIndex);
expect(query).toContain('optional workflow hint');
expect(query).toContain(
'The summary is a user-intent summary, not a workflow summary.',
'workflow hints alone is never enough evidence for a reusable skill.',
);
expect(query).toContain(
'The session summaries describe user intent, not workflow details.',
'Session summaries describe user intent; optional workflow hints describe likely procedural traces.',
);
expect(query).toContain('Use workflow hints for routing');
expect(query).toContain(
'Only write a skill if the evidence shows a durable, recurring workflow',
);
@@ -303,10 +303,11 @@ export const SkillExtractionAgent = (
'# Session Index',
'',
'Below is an index of past conversation sessions. Each line shows:',
'[NEW] or [old] status, a 1-line summary, message count, and the file path.',
'[NEW] or [old] status, a 1-line user-intent summary, optional workflow hint, message count, and the file path.',
'',
'The summary is a user-intent summary, not a workflow summary.',
'Matching summary text alone is never enough evidence for a reusable skill.',
'Some lines may include "| workflow: ..."; this is a compact workflow hint from session metadata.',
'Use workflow hints to prioritize which sessions to read and to group likely recurring workflows.',
'Matching summary text or workflow hints alone is never enough evidence for a reusable skill.',
'',
'[NEW] = not yet processed for skill extraction (focus on these)',
'[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)',
@@ -326,7 +327,7 @@ export const SkillExtractionAgent = (
return {
systemPrompt: buildSystemPrompt(skillsDir),
query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
query: `${initialContext}\n\nAnalyze the session index above. Session summaries describe user intent; optional workflow hints describe likely procedural traces. Use workflow hints for routing, then read sessions that suggest repeated workflows using read_file to verify recurrence from transcript evidence. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
};
},
runConfig: {
+2
View File
@@ -36,6 +36,8 @@ export enum AgentTerminateMode {
export interface OutputObject {
result: string;
terminate_reason: AgentTerminateMode;
turn_count?: number;
duration_ms?: number;
}
/**
@@ -112,6 +112,7 @@ export async function loadConversationRecord(
userMessageCount?: number;
firstUserMessage?: string;
hasUserOrAssistantMessage?: boolean;
memoryScratchpadIsStale?: boolean;
})
| null
> {
@@ -133,6 +134,8 @@ export async function loadConversationRecord(
string,
{ isUser: boolean; isUserOrAssistant: boolean }
>();
let isTrackingMemoryScratchpadFreshness = false;
let memoryScratchpadIsStale = false;
let firstUserMessageStr: string | undefined;
for await (const line of rl) {
@@ -140,6 +143,9 @@ export async function loadConversationRecord(
try {
const record = JSON.parse(line) as unknown;
if (isRewindRecord(record)) {
if (isTrackingMemoryScratchpadFreshness) {
memoryScratchpadIsStale = true;
}
const rewindId = record.$rewindTo;
if (options?.metadataOnly) {
const idx = messageIds.indexOf(rewindId);
@@ -168,6 +174,9 @@ export async function loadConversationRecord(
}
}
} else if (isMessageRecord(record)) {
if (isTrackingMemoryScratchpadFreshness) {
memoryScratchpadIsStale = true;
}
const id = record.id;
const isUser = hasProperty(record, 'type') && record.type === 'user';
const isUserOrAssistant =
@@ -206,6 +215,12 @@ export async function loadConversationRecord(
}
}
} else if (isMetadataUpdateRecord(record)) {
if (hasProperty(record.$set, 'memoryScratchpad')) {
isTrackingMemoryScratchpadFreshness = Boolean(
record.$set.memoryScratchpad,
);
memoryScratchpadIsStale = false;
}
// Metadata update
metadata = {
...metadata,
@@ -257,6 +272,7 @@ export async function loadConversationRecord(
startTime: metadata.startTime || new Date().toISOString(),
lastUpdated: metadata.lastUpdated || new Date().toISOString(),
summary: metadata.summary,
memoryScratchpad: metadata.memoryScratchpad,
directories: metadata.directories,
kind: metadata.kind,
messages: options?.metadataOnly ? [] : loadedMessages,
@@ -267,6 +283,9 @@ export async function loadConversationRecord(
options?.metadataOnly && metadataMessages.length > 0
? metadataMessages.filter((m) => m.type === 'user').length
: userMessageCount,
memoryScratchpadIsStale: isTrackingMemoryScratchpadFreshness
? memoryScratchpadIsStale
: undefined,
firstUserMessage: fallbackFirstUserMessage,
hasUserOrAssistantMessage:
options?.metadataOnly && metadataMessages.length > 0
@@ -332,6 +351,13 @@ export class ChatRecordingService {
for (const msg of this.cachedConversation.messages) {
this.appendRecord(msg);
}
if (this.cachedConversation.memoryScratchpad) {
this.appendRecord({
$set: {
memoryScratchpad: this.cachedConversation.memoryScratchpad,
},
});
}
}
// Update the session ID in the existing file
@@ -25,6 +25,19 @@ export interface TokensSummary {
total: number; // totalTokenCount
}
export type MemoryValidationStatus = 'passed' | 'failed' | 'unknown';
/**
* Lightweight workflow metadata attached to a session for memory extraction.
*/
export interface MemoryScratchpad {
version: 1;
workflowSummary?: string;
toolSequence?: string[];
touchedPaths?: string[];
validationStatus?: MemoryValidationStatus;
}
/**
* Base fields common to all messages.
*/
@@ -83,6 +96,7 @@ export interface ConversationRecord {
lastUpdated: string;
messages: MessageRecord[];
summary?: string;
memoryScratchpad?: MemoryScratchpad;
/** Workspace directories added during the session via /dir add */
directories?: string[];
/** The kind of conversation (main agent or subagent) */
@@ -120,6 +134,7 @@ export interface PartialMetadataRecord {
startTime?: string;
lastUpdated?: string;
summary?: string;
memoryScratchpad?: MemoryScratchpad;
directories?: string[];
kind?: 'main' | 'subagent';
}
+343 -13
View File
@@ -127,6 +127,7 @@ async function writeConversationJsonl(
startTime: conversation.startTime,
lastUpdated: conversation.lastUpdated,
summary: conversation.summary,
memoryScratchpad: conversation.memoryScratchpad,
directories: conversation.directories,
kind: conversation.kind,
};
@@ -565,7 +566,7 @@ describe('memoryService', () => {
);
});
it('records only sessions whose read_file calls succeed as processed', async () => {
it('records only sessions whose read_file completed successfully as processed', async () => {
const { startMemoryService, readExtractionState } = await import(
'./memoryService.js'
);
@@ -595,17 +596,69 @@ describe('memoryService', () => {
messageCount: 20,
lastUpdated: '2025-01-01T01:00:00Z',
});
const failedConversation = createConversation({
sessionId: 'failed-session',
summary: 'read_file errors on this one',
messageCount: 20,
lastUpdated: '2025-01-03T01:00:00Z',
});
const rejectedConversation = createConversation({
sessionId: 'rejected-session',
summary: 'read_file was rejected for this one',
messageCount: 20,
lastUpdated: '2025-01-02T02:00:00Z',
});
const mismatchedEndConversation = createConversation({
sessionId: 'mismatched-end-session',
summary: 'read_file start with a mismatched tool end',
messageCount: 20,
lastUpdated: '2025-01-02T03:00:00Z',
});
const mismatchedErrorConversation = createConversation({
sessionId: 'mismatched-error-session',
summary: 'read_file recovers after a mismatched tool error',
messageCount: 20,
lastUpdated: '2025-01-02T04:00:00Z',
});
const openedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-opened.jsonl`,
);
const skippedPath = path.join(
const failedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
`${SESSION_FILE_PREFIX}2025-01-03T00-00-failed.jsonl`,
);
const rejectedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-rejected.jsonl`,
);
const mismatchedEndPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-end.jsonl`,
);
const mismatchedErrorPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-error.jsonl`,
);
await writeConversationJsonl(openedPath, openedConversation);
await writeConversationJsonl(skippedPath, skippedConversation);
await writeConversationJsonl(failedPath, failedConversation);
await writeConversationJsonl(rejectedPath, rejectedConversation);
await writeConversationJsonl(
mismatchedEndPath,
mismatchedEndConversation,
);
await writeConversationJsonl(
mismatchedErrorPath,
mismatchedErrorConversation,
);
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
),
skippedConversation,
);
vi.mocked(LocalAgentExecutor.create).mockImplementationOnce(
async (_definition, _context, onActivity) =>
@@ -624,21 +677,21 @@ describe('memoryService', () => {
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
args: { file_path: skippedPath },
callId: 'call-skipped',
id: 'call-opened',
data: {},
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
callId: 'call-skipped',
error: 'access denied',
args: { file_path: failedPath },
callId: 'call-failed',
},
});
onActivity?.({
@@ -647,8 +700,28 @@ describe('memoryService', () => {
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
id: 'call-opened',
data: { content: 'Read this one' },
id: 'call-failed',
data: { isError: true },
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: rejectedPath },
callId: 'call-rejected',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
data: {
name: 'read_file',
callId: 'call-rejected',
error: 'User rejected this operation.',
},
});
onActivity?.({
@@ -661,6 +734,56 @@ describe('memoryService', () => {
callId: 'call-unrelated',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: mismatchedEndPath },
callId: 'call-mismatched-end',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_END',
data: {
name: 'write_file',
id: 'call-mismatched-end',
data: {},
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: mismatchedErrorPath },
callId: 'call-mismatched-error',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
data: {
name: 'write_file',
callId: 'call-mismatched-error',
error: 'Different tool failed.',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
id: 'call-mismatched-error',
data: {},
},
});
return undefined;
}),
}) as never,
@@ -691,6 +814,22 @@ describe('memoryService', () => {
);
expect(state.runs).toHaveLength(1);
expect(state.runs[0].candidateSessions).toEqual([
{
sessionId: 'failed-session',
lastUpdated: '2025-01-03T01:00:00Z',
},
{
sessionId: 'mismatched-error-session',
lastUpdated: '2025-01-02T04:00:00Z',
},
{
sessionId: 'mismatched-end-session',
lastUpdated: '2025-01-02T03:00:00Z',
},
{
sessionId: 'rejected-session',
lastUpdated: '2025-01-02T02:00:00Z',
},
{
sessionId: 'opened-session',
lastUpdated: '2025-01-02T01:00:00Z',
@@ -701,12 +840,19 @@ describe('memoryService', () => {
},
]);
expect(state.runs[0].processedSessions).toEqual([
{
sessionId: 'mismatched-error-session',
lastUpdated: '2025-01-02T04:00:00Z',
},
{
sessionId: 'opened-session',
lastUpdated: '2025-01-02T01:00:00Z',
},
]);
expect(state.runs[0].sessionIds).toEqual(['opened-session']);
expect(state.runs[0].sessionIds).toEqual([
'mismatched-error-session',
'opened-session',
]);
});
});
@@ -902,6 +1048,178 @@ describe('memoryService', () => {
expect(result.sessionIndex).toContain(path.join(chatsDir, fileName));
});
it('falls back to scratchpad workflow summary when summary is missing', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'scratchpad-only',
summary: undefined,
memoryScratchpad: {
version: 1,
workflowSummary:
'read_file -> edit | paths packages/core/src/services/memoryService.ts | validated',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch01.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('read_file -> edit');
expect(result.sessionIndex).not.toContain('(no summary)');
});
it('ignores malformed scratchpad workflow summaries while indexing sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const malformedConversation = createConversation({
sessionId: 'malformed-scratchpad',
summary: undefined,
memoryScratchpad: {
version: 1,
workflowSummary: 123,
} as unknown as ConversationRecord['memoryScratchpad'],
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-badpad.jsonl`,
),
malformedConversation,
);
const validConversation = createConversation({
sessionId: 'valid-session',
summary: 'Still indexes other sessions',
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-valid.jsonl`,
),
validConversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('(no summary)');
expect(result.sessionIndex).toContain('Still indexes other sessions');
expect(result.sessionIndex).not.toContain('123');
});
it('appends workflow summary when both summary and scratchpad are present', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'summary-and-scratchpad',
summary: 'Fix session scanning',
memoryScratchpad: {
version: 1,
workflowSummary:
'read_file -> edit | paths packages/core/src/services/sessionSummaryUtils.ts',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch02.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('Fix session scanning | workflow:');
expect(result.sessionIndex).toContain('sessionSummaryUtils.ts');
});
it('omits stale scratchpad workflow summaries from resumed JSONL sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'stale-scratchpad',
summary: 'Resume memory work',
messageCount: 20,
lastUpdated: '2025-01-01T01:00:00Z',
});
const filePath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-stale001.jsonl`,
);
await writeConversationJsonl(filePath, conversation);
await fs.appendFile(
filePath,
`${JSON.stringify({
$set: {
memoryScratchpad: {
version: 1,
workflowSummary: 'stale_workflow | paths stale.ts',
},
},
})}\n`,
);
await fs.appendFile(
filePath,
[
JSON.stringify({
id: 'resumed-user-message',
timestamp: '2025-01-02T01:00:00Z',
type: 'user',
content: [{ text: 'Continue after the scratchpad was written' }],
}),
JSON.stringify({
$set: { lastUpdated: '2025-01-02T01:00:01Z' },
}),
].join('\n') + '\n',
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('Resume memory work');
expect(result.sessionIndex).not.toContain('stale_workflow');
expect(result.sessionIndex).not.toContain('stale.ts');
});
it('sanitizes shell command workflow summaries before indexing sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'raw-shell-scratchpad',
summary: 'Investigate API migration',
memoryScratchpad: {
version: 1,
workflowSummary:
'run_shell_command: curl https://api.example.com -H "Authorization: Bearer sk-secret-token" -> read_file | paths package.json',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-shellraw.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain(
'workflow: run_shell_command: curl -> read_file | paths package.json',
);
expect(result.sessionIndex).not.toContain('Authorization');
expect(result.sessionIndex).not.toContain('sk-secret-token');
expect(result.sessionIndex).not.toContain('https://api.example.com');
});
it('filters out subagent sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
@@ -1176,6 +1494,9 @@ describe('memoryService', () => {
},
],
skillsCreated: ['debug-helper', 'test-gen'],
turnCount: 4,
durationMs: 1875,
terminateReason: 'GOAL',
},
],
};
@@ -1202,6 +1523,9 @@ describe('memoryService', () => {
]);
expect(result.runs[0].sessionIds).toEqual(['s1']);
expect(result.runs[0].runAt).toBe('2025-06-01T00:00:00Z');
expect(result.runs[0].turnCount).toBe(4);
expect(result.runs[0].durationMs).toBe(1875);
expect(result.runs[0].terminateReason).toBe('GOAL');
});
it('writeExtractionState + readExtractionState roundtrips runs correctly', async () => {
@@ -1235,11 +1559,17 @@ describe('memoryService', () => {
},
],
skillsCreated: ['skill-x'],
turnCount: 3,
durationMs: 2400,
terminateReason: 'GOAL',
},
{
runAt: '2025-01-02T00:00:00Z',
sessionIds: ['c'],
skillsCreated: [],
turnCount: 1,
durationMs: 900,
terminateReason: 'GOAL',
},
];
const state: ExtractionState = { runs };
+112 -86
View File
@@ -14,6 +14,7 @@ import {
SESSION_FILE_PREFIX,
loadConversationRecord,
type ConversationRecord,
type MemoryScratchpad,
} from './chatRecordingService.js';
import { debugLogger } from '../utils/debugLogger.js';
import { coreEvents } from '../utils/events.js';
@@ -22,7 +23,10 @@ import { FRONTMATTER_REGEX, parseFrontmatter } from '../skills/skillLoader.js';
import { LocalAgentExecutor } from '../agents/local-executor.js';
import { SkillExtractionAgent } from '../agents/skill-extraction-agent.js';
import { getModelConfigAlias } from '../agents/registry.js';
import type { SubagentActivityEvent } from '../agents/types.js';
import {
isToolActivityError,
type SubagentActivityEvent,
} from '../agents/types.js';
import { ExecutionLifecycleService } from './executionLifecycleService.js';
import { PromptRegistry } from '../prompts/prompt-registry.js';
import { ResourceRegistry } from '../resources/resource-registry.js';
@@ -36,6 +40,7 @@ import {
applyParsedSkillPatches,
hasParsedPatchHunks,
} from './memoryPatchUtils.js';
import { sanitizeWorkflowSummaryForScratchpad } from './sessionScratchpadUtils.js';
const LOCK_FILENAME = '.extraction.lock';
const STATE_FILENAME = '.extraction-state.json';
@@ -53,20 +58,6 @@ interface LockInfo {
startedAt: string;
}
function hasProperty<T extends string>(
obj: unknown,
prop: T,
): obj is { [key in T]: unknown } {
return obj !== null && typeof obj === 'object' && prop in obj;
}
function isStringProperty<T extends string>(
obj: unknown,
prop: T,
): obj is { [key in T]: string } {
return hasProperty(obj, prop) && typeof obj[prop] === 'string';
}
interface SessionVersion {
sessionId: string;
lastUpdated: string;
@@ -75,6 +66,7 @@ interface SessionVersion {
interface IndexedSession extends SessionVersion {
filePath: string;
summary?: string;
memoryScratchpad?: MemoryScratchpad;
userMessageCount: number;
}
@@ -87,6 +79,9 @@ export interface ExtractionRun {
candidateSessions?: SessionVersion[];
processedSessions?: SessionVersion[];
skillsCreated: string[];
turnCount?: number;
durationMs?: number;
terminateReason?: string;
}
/**
@@ -153,12 +148,25 @@ function normalizeStringArray(value: unknown): string[] {
return value.filter((item): item is string => typeof item === 'string');
}
function normalizeOptionalNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value)
? value
: undefined;
}
function normalizeOptionalString(value: unknown): string | undefined {
return typeof value === 'string' ? value : undefined;
}
function isExtractionRunLike(value: unknown): value is {
runAt: string;
sessionIds?: unknown;
candidateSessions?: unknown;
processedSessions?: unknown;
skillsCreated: unknown;
turnCount?: unknown;
durationMs?: unknown;
terminateReason?: unknown;
} {
return (
typeof value === 'object' &&
@@ -198,6 +206,9 @@ function buildExtractionRun(value: unknown): ExtractionRun | null {
processedSessions:
processedSessions.length > 0 ? processedSessions : undefined,
skillsCreated: normalizeStringArray(value.skillsCreated),
turnCount: normalizeOptionalNumber(value.turnCount),
durationMs: normalizeOptionalNumber(value.durationMs),
terminateReason: normalizeOptionalString(value.terminateReason),
};
}
@@ -291,7 +302,7 @@ function shouldReplaceIndexedSession(
return compareIndexedSessions(candidate, existing) < 0;
}
function isReadFileStartActivity(
function isReadFileActivity(
activity: SubagentActivityEvent,
): activity is SubagentActivityEvent & {
data: { name: string; args?: { file_path?: unknown }; callId?: unknown };
@@ -302,11 +313,36 @@ function isReadFileStartActivity(
);
}
function getResolvedReadFilePath(
function getReadFileCallId(activity: SubagentActivityEvent): string | null {
if (isReadFileActivity(activity)) {
const { callId } = activity.data;
return typeof callId === 'string' ? callId : null;
}
if (
activity.type === 'TOOL_CALL_END' &&
activity.data['name'] === READ_FILE_TOOL_NAME
) {
const id = activity.data['id'];
return typeof id === 'string' ? id : null;
}
if (
activity.type === 'ERROR' &&
activity.data['name'] === READ_FILE_TOOL_NAME
) {
const callId = activity.data['callId'];
return typeof callId === 'string' ? callId : null;
}
return null;
}
function getResolvedActivityFilePath(
config: Config,
activity: SubagentActivityEvent,
): string | null {
if (!isReadFileStartActivity(activity)) {
if (!isReadFileActivity(activity)) {
return null;
}
@@ -320,48 +356,11 @@ function getResolvedReadFilePath(
return null;
}
return path.resolve(config.getTargetDir(), args.file_path);
}
function getReadFileStartCallId(
activity: SubagentActivityEvent,
): string | null {
if (
!isReadFileStartActivity(activity) ||
!isStringProperty(activity.data, 'callId')
) {
return null;
}
return activity.data.callId;
}
function getCompletedReadFileCallId(
activity: SubagentActivityEvent,
): string | null {
if (
activity.type !== 'TOOL_CALL_END' ||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
!isStringProperty(activity.data, 'id')
) {
return null;
}
return activity.data['id'];
}
function getFailedReadFileCallId(
activity: SubagentActivityEvent,
): string | null {
if (
activity.type !== 'ERROR' ||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
!isStringProperty(activity.data, 'callId')
) {
return null;
}
return activity.data['callId'];
const targetDir =
'getTargetDir' in config && typeof config.getTargetDir === 'function'
? config.getTargetDir()
: process.cwd();
return path.resolve(targetDir, args.file_path);
}
function getUserMessageCount(
@@ -580,6 +579,10 @@ async function scanEligibleSessions(
lastUpdated: conversation.lastUpdated,
filePath,
summary: conversation.summary,
memoryScratchpad:
conversation.memoryScratchpadIsStale === true
? undefined
: conversation.memoryScratchpad,
userMessageCount: getUserMessageCount(conversation),
};
@@ -595,6 +598,28 @@ async function scanEligibleSessions(
return Array.from(latestBySessionId.values()).sort(compareIndexedSessions);
}
function formatSessionHeadline(session: IndexedSession): string {
const rawWorkflowSummary = session.memoryScratchpad?.workflowSummary;
const sanitizedWorkflowSummary =
typeof rawWorkflowSummary === 'string'
? sanitizeWorkflowSummaryForScratchpad(rawWorkflowSummary)
: undefined;
const workflowSummary = sanitizedWorkflowSummary?.trim()
? sanitizedWorkflowSummary
: undefined;
const summary = session.summary ?? workflowSummary ?? '(no summary)';
if (
session.summary &&
workflowSummary &&
workflowSummary !== session.summary
) {
return `${summary} | workflow: ${workflowSummary}`;
}
return summary;
}
/**
* Builds a session index for the extraction agent: a compact listing of all
* eligible sessions with their summary, file path, and new/previously-processed status.
@@ -651,8 +676,7 @@ export async function buildSessionIndex(
const status = candidateSessionIds.has(getSessionVersionKey(session))
? '[NEW]'
: '[old]';
const summary = session.summary ?? '(no summary)';
return `${status} ${summary} (${session.userMessageCount} user msgs) — ${session.filePath}`;
return `${status} ${formatSessionHeadline(session)} (${session.userMessageCount} user msgs) — ${session.filePath}`;
},
);
@@ -999,18 +1023,19 @@ export async function startMemoryService(config: Config): Promise<void> {
session,
]),
);
const pendingReadFileSessions = new Map<string, SessionVersion>();
const processedSessionKeys = new Set<string>();
const pendingReadFileSessions = new Map<string, string>();
// Create and run the extraction agent
const executor = await LocalAgentExecutor.create(
agentDefinition,
context,
(activity) => {
const readFileCallId = getReadFileStartCallId(activity);
if (readFileCallId) {
const resolvedPath = getResolvedReadFilePath(config, activity);
if (!resolvedPath) {
const readFileCallId = getReadFileCallId(activity);
if (activity.type === 'TOOL_CALL_START') {
const resolvedPath = getResolvedActivityFilePath(config, activity);
if (!resolvedPath || !readFileCallId) {
return;
}
@@ -1019,35 +1044,31 @@ export async function startMemoryService(config: Config): Promise<void> {
return;
}
pendingReadFileSessions.set(
readFileCallId,
getSessionVersionKey(session),
);
pendingReadFileSessions.set(readFileCallId, session);
return;
}
const completedReadFileCallId = getCompletedReadFileCallId(activity);
if (completedReadFileCallId) {
const sessionKey = pendingReadFileSessions.get(
completedReadFileCallId,
);
if (!sessionKey) {
return;
}
processedSessionKeys.add(sessionKey);
pendingReadFileSessions.delete(completedReadFileCallId);
if (!readFileCallId) {
return;
}
const failedReadFileCallId = getFailedReadFileCallId(activity);
if (failedReadFileCallId) {
pendingReadFileSessions.delete(failedReadFileCallId);
const session = pendingReadFileSessions.get(readFileCallId);
if (!session) {
return;
}
pendingReadFileSessions.delete(readFileCallId);
if (
activity.type === 'TOOL_CALL_END' &&
!isToolActivityError(activity.data['data'])
) {
processedSessionKeys.add(getSessionVersionKey(session));
}
},
);
await executor.run(
const executorResult = await executor.run(
{ request: 'Extract skills from the provided sessions.' },
abortController.signal,
);
@@ -1107,6 +1128,11 @@ export async function startMemoryService(config: Config): Promise<void> {
})),
processedSessions,
skillsCreated,
turnCount: normalizeOptionalNumber(executorResult?.turn_count),
durationMs: normalizeOptionalNumber(executorResult?.duration_ms),
terminateReason: normalizeOptionalString(
executorResult?.terminate_reason,
),
};
const updatedState: ExtractionState = {
runs: [...state.runs, run],
@@ -0,0 +1,45 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
import {
sanitizeWorkflowSummaryForScratchpad,
summarizeShellCommandForScratchpad,
} from './sessionScratchpadUtils.js';
describe('sessionScratchpadUtils', () => {
describe('summarizeShellCommandForScratchpad', () => {
it('summarizes quoted and assignment-prefixed shell commands', () => {
expect(summarizeShellCommandForScratchpad('"npm" run test')).toBe('npm');
expect(
summarizeShellCommandForScratchpad(
'DATABASE_URL=postgres://user:password@example/db pnpm test',
),
).toBe('pnpm');
});
it('handles adversarial unterminated quoted input without exposing arguments', () => {
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
expect(summarizeShellCommandForScratchpad(adversarialCommand)).toBe(
'shell',
);
});
});
describe('sanitizeWorkflowSummaryForScratchpad', () => {
it('sanitizes adversarial shell commands in workflow summaries', () => {
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
expect(
sanitizeWorkflowSummaryForScratchpad(
`${SHELL_TOOL_NAME}: ${adversarialCommand} -> read_file`,
),
).toBe(`${SHELL_TOOL_NAME}: shell -> read_file`);
});
});
});
@@ -0,0 +1,155 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
const WORKFLOW_PART_SEPARATOR = ' | ';
const TOOL_SEQUENCE_SEPARATOR = ' -> ';
const SHELL_ASSIGNMENT_REGEX = /^[A-Za-z_][A-Za-z0-9_]*=/;
const SAFE_COMMAND_NAME_REGEX = /^[A-Za-z0-9_.@+-]+$/;
const SAFE_TOOL_SEQUENCE_ENTRY_REGEX = /^[A-Za-z_][A-Za-z0-9_:.]*$/;
function tokenizeShellCommand(command: string): string[] {
const tokens: string[] = [];
let currentToken = '';
let quote: '"' | "'" | '`' | undefined;
for (let i = 0; i < command.length; i++) {
const char = command[i];
if (quote) {
if (char === quote) {
quote = undefined;
continue;
}
if (quote === '"' && char === '\\' && i + 1 < command.length) {
currentToken += command[i + 1];
i++;
continue;
}
currentToken += char;
continue;
}
if (char === ' ' || char === '\t' || char === '\n' || char === '\r') {
if (currentToken) {
tokens.push(currentToken);
currentToken = '';
}
continue;
}
if (char === '"' || char === "'" || char === '`') {
quote = char;
continue;
}
currentToken += char;
}
if (currentToken) {
tokens.push(currentToken);
}
return tokens;
}
function getSafeCommandName(token: string): string | undefined {
if (!token || SHELL_ASSIGNMENT_REGEX.test(token)) {
return undefined;
}
const pathParts = token.split(/[/\\]/).filter(Boolean);
const basename = pathParts[pathParts.length - 1] ?? token;
if (!basename || basename.includes('://')) {
return 'shell';
}
return SAFE_COMMAND_NAME_REGEX.test(basename) ? basename : 'shell';
}
export function summarizeShellCommandForScratchpad(
command: string,
): string | undefined {
const normalized = command.replace(/\s+/g, ' ').trim();
if (normalized.length === 0) {
return undefined;
}
for (const token of tokenizeShellCommand(normalized)) {
const commandName = getSafeCommandName(token);
if (commandName) {
return commandName;
}
}
return undefined;
}
function sanitizeWorkflowToolSequenceEntry(entry: string): string | undefined {
const trimmed = entry.trim();
if (!trimmed) {
return undefined;
}
const shellPrefix = `${SHELL_TOOL_NAME}:`;
if (trimmed.startsWith(shellPrefix)) {
const command = trimmed.slice(shellPrefix.length).trim();
const commandSummary = summarizeShellCommandForScratchpad(command);
return commandSummary
? `${SHELL_TOOL_NAME}: ${commandSummary}`
: SHELL_TOOL_NAME;
}
if (
trimmed === SHELL_TOOL_NAME ||
SAFE_TOOL_SEQUENCE_ENTRY_REGEX.test(trimmed)
) {
return trimmed;
}
return undefined;
}
export function sanitizeWorkflowSummaryForScratchpad(summary: string): string {
const normalized = summary.replace(/\s+/g, ' ').trim();
if (!normalized.includes(`${SHELL_TOOL_NAME}:`)) {
return normalized;
}
const sanitizedParts: string[] = [];
for (const part of normalized.split(WORKFLOW_PART_SEPARATOR)) {
const trimmed = part.trim();
if (!trimmed) {
continue;
}
if (trimmed.includes(`${SHELL_TOOL_NAME}:`)) {
const sanitizedToolSequence = trimmed
.split(TOOL_SEQUENCE_SEPARATOR)
.map(sanitizeWorkflowToolSequenceEntry)
.filter((entry): entry is string => Boolean(entry));
if (sanitizedToolSequence.length > 0) {
sanitizedParts.push(
sanitizedToolSequence.join(TOOL_SEQUENCE_SEPARATOR),
);
}
continue;
}
if (
trimmed.startsWith('paths ') ||
trimmed === 'validated' ||
trimmed === 'validation failed'
) {
sanitizedParts.push(trimmed);
}
}
return sanitizedParts.join(WORKFLOW_PART_SEPARATOR);
}
@@ -9,6 +9,8 @@ import { generateSummary, getPreviousSession } from './sessionSummaryUtils.js';
import type { Config } from '../config/config.js';
import type { ContentGenerator } from '../core/contentGenerator.js';
import * as chatRecordingService from './chatRecordingService.js';
import type { ConversationRecord } from './chatRecordingService.js';
import { CoreToolCallStatus } from '../scheduler/types.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
@@ -37,25 +39,33 @@ vi.mock('./chatRecordingService.js', async () => {
interface SessionFixture {
summary?: string;
memoryScratchpad?: unknown;
sessionId?: string;
startTime?: string;
lastUpdated?: string;
kind?: ConversationRecord['kind'];
messages?: ConversationRecord['messages'];
userMessageCount: number;
}
function buildLegacySessionJson(fixture: SessionFixture): string {
const messages =
fixture.messages ??
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}));
return JSON.stringify({
sessionId: fixture.sessionId ?? 'session-id',
projectHash: 'abc123',
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
summary: fixture.summary,
messages: Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
})),
memoryScratchpad: fixture.memoryScratchpad,
...(fixture.kind ? { kind: fixture.kind } : {}),
messages,
});
}
@@ -66,17 +76,22 @@ function buildJsonlSession(fixture: SessionFixture): string {
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
...(fixture.summary !== undefined ? { summary: fixture.summary } : {}),
...(fixture.memoryScratchpad !== undefined
? { memoryScratchpad: fixture.memoryScratchpad }
: {}),
...(fixture.kind ? { kind: fixture.kind } : {}),
};
const messages =
fixture.messages ??
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}));
const lines: string[] = [JSON.stringify(metadata)];
for (let i = 0; i < fixture.userMessageCount; i++) {
lines.push(
JSON.stringify({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}),
);
for (const message of messages) {
lines.push(JSON.stringify(message));
}
return lines.join('\n') + '\n';
}
@@ -119,6 +134,7 @@ describe('sessionSummaryUtils', () => {
mockConfig = {
getContentGenerator: vi.fn().mockReturnValue(mockContentGenerator),
getProjectRoot: vi.fn().mockReturnValue(projectTempDir),
getSessionId: vi.fn().mockReturnValue('current-session'),
storage: {
getProjectTempDir: vi.fn().mockReturnValue(projectTempDir),
@@ -157,13 +173,50 @@ describe('sessionSummaryUtils', () => {
expect(result).toBeNull();
});
it('should return null if most recent session already has summary', async () => {
it('should return null if most recent session already has summary metadata', async () => {
await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
summary: 'Existing summary',
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file -> edit',
},
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBeNull();
});
it('should return path if most recent session has summary but no scratchpad', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
summary: 'Existing summary',
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBe(filePath);
});
it('should return null if most recent session has scratchpad but no summary', async () => {
await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file -> edit',
},
}),
);
@@ -302,6 +355,36 @@ describe('sessionSummaryUtils', () => {
metadataOnly: true,
});
});
it('should skip subagent sessions when backfilling scratchpads', async () => {
const mainPath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-main0001.jsonl',
buildJsonlSession({
sessionId: 'main-session',
userMessageCount: 2,
lastUpdated: '2024-01-01T10:00:00Z',
summary: 'Main session summary',
}),
);
await setSessionMtime(mainPath, '2024-01-01T10:00:00Z');
await writeSession(
chatsDir,
'session-2024-01-02T10-00-sub00001.jsonl',
buildJsonlSession({
sessionId: 'subagent-session',
userMessageCount: 2,
lastUpdated: '2024-01-02T10:00:00Z',
summary: 'Subagent summary',
kind: 'subagent',
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBe(mainPath);
});
});
describe('generateSummary', () => {
@@ -324,6 +407,7 @@ describe('sessionSummaryUtils', () => {
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
const written = JSON.parse(await fs.readFile(filePath, 'utf-8'));
expect(written.summary).toBe('Add dark mode to the app');
expect(written.memoryScratchpad).toEqual({ version: 1 });
expect(written.lastUpdated).toBe(lastUpdated);
});
@@ -356,10 +440,160 @@ describe('sessionSummaryUtils', () => {
expect(lastRecord).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
});
it('should backfill scratchpad without regenerating summary', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-backfill.jsonl',
buildJsonlSession({
userMessageCount: 2,
summary: 'Existing summary',
}),
);
await generateSummary(mockConfig);
expect(mockGenerateSummary).not.toHaveBeenCalled();
const lines = (await fs.readFile(filePath, 'utf-8'))
.split('\n')
.filter(Boolean);
const lastRecord = JSON.parse(lines[lines.length - 1]);
expect(lastRecord).toEqual({
$set: {
memoryScratchpad: {
version: 1,
},
},
});
});
it('should not retry summary generation after writing a scratchpad fallback', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-summary-fallback.jsonl',
buildJsonlSession({
sessionId: 'summary-fallback-session',
userMessageCount: 2,
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Read package metadata' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Reading package.json' }],
toolCalls: [
{
id: 'tool-1',
name: 'read_file',
args: { file_path: 'package.json' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
mockGenerateSummary.mockResolvedValue(undefined);
await generateSummary(mockConfig);
await generateSummary(mockConfig);
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.summary).toBeUndefined();
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'read_file | paths package.json',
toolSequence: ['read_file'],
touchedPaths: ['package.json'],
});
});
it('should refresh stale scratchpads when messages were appended after metadata', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-resumed1.jsonl',
buildJsonlSession({
sessionId: 'resumed-session',
userMessageCount: 2,
summary: 'Existing summary',
lastUpdated: '2024-01-01T10:00:00Z',
}),
);
await fs.appendFile(
filePath,
`${JSON.stringify({
$set: {
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file',
toolSequence: ['read_file'],
},
},
})}\n`,
);
await fs.appendFile(
filePath,
[
JSON.stringify({
id: 'u-resumed',
timestamp: '2024-01-02T00:00:00Z',
type: 'user',
content: [{ text: 'Update src/app.ts' }],
}),
JSON.stringify({
id: 'g-resumed',
timestamp: '2024-01-02T00:00:01Z',
type: 'gemini',
content: [{ text: 'Editing file' }],
toolCalls: [
{
id: 'tool-resumed',
name: 'replace',
args: { file_path: 'src/app.ts' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-02T00:00:01Z',
},
],
}),
JSON.stringify({
$set: { lastUpdated: '2024-01-02T00:00:02Z' },
}),
].join('\n') + '\n',
);
await generateSummary(mockConfig);
expect(mockGenerateSummary).not.toHaveBeenCalled();
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'replace | paths src/app.ts',
toolSequence: ['replace'],
touchedPaths: ['src/app.ts'],
});
});
it('should preserve a newer JSONL lastUpdated written concurrently', async () => {
const initialLastUpdated = '2024-01-01T10:00:00Z';
const newerLastUpdated = '2024-01-02T12:34:56Z';
@@ -411,6 +645,7 @@ describe('sessionSummaryUtils', () => {
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.summary).toBe('Add dark mode to the app');
expect(savedConversation?.memoryScratchpad).toEqual({ version: 1 });
expect(savedConversation?.lastUpdated).toBe(newerLastUpdated);
const lines = (await fs.readFile(filePath, 'utf-8'))
@@ -420,6 +655,9 @@ describe('sessionSummaryUtils', () => {
expect(lastRecord).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
});
@@ -454,6 +692,9 @@ describe('sessionSummaryUtils', () => {
expect(JSON.parse(previousLines[previousLines.length - 1])).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
@@ -462,5 +703,312 @@ describe('sessionSummaryUtils', () => {
.filter(Boolean);
expect(currentLines).toHaveLength(2);
});
it('should preserve repo-root file names in scratchpad touched paths', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-rootpath.jsonl',
buildJsonlSession({
sessionId: 'root-path-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Inspect package.json' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Reading files' }],
toolCalls: [
{
id: 'tool-1',
name: 'read_file',
args: { file_path: 'package.json' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'read_file | paths package.json',
toolSequence: ['read_file'],
touchedPaths: ['package.json'],
});
});
it('should summarize shell commands without raw arguments in scratchpad tool sequence', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-shellcmd.jsonl',
buildJsonlSession({
sessionId: 'shell-command-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Run the migration and regenerate docs' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running commands' }],
toolCalls: [
{
id: 'tool-1',
name: 'run_shell_command',
args: {
command:
'curl https://api.example.com -H "Authorization: Bearer sk-secret-token"',
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
{
id: 'tool-2',
name: 'run_shell_command',
args: {
command:
'DATABASE_URL=postgresql://user:password@localhost/db npm run migrate -- --name add-users',
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:02Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:03Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'run_shell_command: curl -> run_shell_command: npm',
toolSequence: ['run_shell_command: curl', 'run_shell_command: npm'],
});
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('Authorization');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('sk-secret-token');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('password');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('add-users');
});
it('should not classify validation substrings as validation tools', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-validation-substring.jsonl',
buildJsonlSession({
sessionId: 'validation-substring-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Run the contest helper' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running helper' }],
toolCalls: [
{
id: 'tool-1',
name: 'contest_runner',
args: {},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'contest_runner',
toolSequence: ['contest_runner'],
});
});
it('should cap nested path extraction depth', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-deep-paths.jsonl',
buildJsonlSession({
sessionId: 'deep-paths-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Edit shallow and deeply nested files' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Editing files' }],
toolCalls: [
{
id: 'tool-1',
name: 'replace',
args: {
file_path: 'src/shallow.ts',
level1: {
level2: {
level3: {
level4: {
level5: {
level6: {
level7: {
file_path: 'src/deep.ts',
},
},
},
},
},
},
},
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'replace | paths src/shallow.ts',
toolSequence: ['replace'],
touchedPaths: ['src/shallow.ts'],
});
});
it('should use the latest validation result in scratchpad metadata', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-validation.jsonl',
buildJsonlSession({
sessionId: 'validation-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Fix the tests' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running tests' }],
toolCalls: [
{
id: 'tool-1',
name: 'run_shell_command',
args: { command: 'npm test' },
status: CoreToolCallStatus.Error,
timestamp: '2024-01-01T00:00:01Z',
},
{
id: 'tool-2',
name: 'run_shell_command',
args: { command: 'npm test' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:02Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:03Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'run_shell_command: npm | validated',
toolSequence: ['run_shell_command: npm'],
validationStatus: 'passed',
});
});
});
});
+318 -33
View File
@@ -12,15 +12,29 @@ import {
SESSION_FILE_PREFIX,
loadConversationRecord,
type ConversationRecord,
type MemoryScratchpad,
type ToolCallRecord,
} from './chatRecordingService.js';
import { CoreToolCallStatus } from '../scheduler/types.js';
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
import { summarizeShellCommandForScratchpad } from './sessionScratchpadUtils.js';
import fs from 'node:fs/promises';
import path from 'node:path';
const MIN_MESSAGES_FOR_SUMMARY = 1;
const MAX_SCRATCHPAD_TOOLS = 6;
const MAX_SCRATCHPAD_PATHS = 4;
const MAX_SCRATCHPAD_PATH_DEPTH = 6;
const MAX_WORKFLOW_SUMMARY_LENGTH = 160;
const VALIDATION_COMMAND_REGEX =
/\b(test|tests|vitest|jest|pytest|cargo test|npm test|pnpm test|yarn test|bun test|lint|build|check|typecheck)\b/i;
const PATH_KEY_REGEX = /(path|file|dir|directory|cwd|root)/i;
const VALIDATION_TOOL_REGEX = /\b(test|lint|build|check|typecheck)\b/i;
type LoadedSession = ConversationRecord & {
messageCount?: number;
userMessageCount?: number;
memoryScratchpadIsStale?: boolean;
};
interface SessionFileCandidate {
@@ -72,6 +86,238 @@ function getSessionTimestampMs(session: LoadedSession): number {
return Number.isNaN(parsed) ? 0 : parsed;
}
function normalizeToolName(name: string): string {
const trimmed = name.trim();
return trimmed.length > 0 ? trimmed : 'unknown_tool';
}
function pushUniqueLimited(
target: string[],
value: string,
limit: number,
): void {
if (!value || target.includes(value) || target.length >= limit) {
return;
}
target.push(value);
}
function normalizePathCandidate(
candidate: string,
projectRoot: string,
): string | null {
const trimmed = candidate.trim();
if (
trimmed.length === 0 ||
trimmed.length > 240 ||
trimmed.includes('\n') ||
(!trimmed.includes('/') &&
!trimmed.includes('\\') &&
!trimmed.startsWith('.') &&
path.extname(trimmed).length === 0)
) {
return null;
}
let normalized = trimmed.replace(/\\/g, '/');
if (path.isAbsolute(trimmed)) {
const relative = path.relative(projectRoot, trimmed);
normalized =
relative && !relative.startsWith('..') && !path.isAbsolute(relative)
? relative.replace(/\\/g, '/')
: path.basename(trimmed);
}
if (normalized.length > 120) {
normalized = normalized.split('/').slice(-3).join('/');
}
return normalized.length > 0 ? normalized : null;
}
function collectPathsFromValue(
value: unknown,
projectRoot: string,
paths: string[],
keyHint?: string,
depth = 0,
): void {
if (
paths.length >= MAX_SCRATCHPAD_PATHS ||
depth > MAX_SCRATCHPAD_PATH_DEPTH
) {
return;
}
if (typeof value === 'string') {
if (!keyHint || !PATH_KEY_REGEX.test(keyHint)) {
return;
}
const normalized = normalizePathCandidate(value, projectRoot);
if (normalized) {
pushUniqueLimited(paths, normalized, MAX_SCRATCHPAD_PATHS);
}
return;
}
if (Array.isArray(value)) {
for (const item of value) {
collectPathsFromValue(item, projectRoot, paths, keyHint, depth + 1);
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
return;
}
}
return;
}
if (typeof value !== 'object' || value === null) {
return;
}
for (const [key, nestedValue] of Object.entries(value)) {
collectPathsFromValue(nestedValue, projectRoot, paths, key, depth + 1);
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
return;
}
}
}
function getToolCallCommand(toolCall: ToolCallRecord): string | undefined {
for (const key of ['command', 'cmd', 'script']) {
const value = toolCall.args[key];
if (typeof value === 'string' && value.trim().length > 0) {
return value;
}
}
return undefined;
}
function getToolSequenceEntry(toolCall: ToolCallRecord): string {
const toolName = normalizeToolName(toolCall.name);
if (toolName !== SHELL_TOOL_NAME) {
return toolName;
}
const command = getToolCallCommand(toolCall);
const commandSummary = command
? summarizeShellCommandForScratchpad(command)
: undefined;
return commandSummary ? `${toolName}: ${commandSummary}` : toolName;
}
function getValidationStatusForToolCall(
toolCall: ToolCallRecord,
): MemoryScratchpad['validationStatus'] | undefined {
const command = getToolCallCommand(toolCall);
const isValidationTool =
VALIDATION_TOOL_REGEX.test(toolCall.name) ||
(command ? VALIDATION_COMMAND_REGEX.test(command) : false);
if (!isValidationTool) {
return undefined;
}
if (toolCall.status === CoreToolCallStatus.Success) {
return 'passed';
}
if (
toolCall.status === CoreToolCallStatus.Error ||
toolCall.status === CoreToolCallStatus.Cancelled
) {
return 'failed';
}
return 'unknown';
}
function buildWorkflowSummary(
toolSequence: string[],
touchedPaths: string[],
validationStatus?: MemoryScratchpad['validationStatus'],
): string | undefined {
const parts: string[] = [];
if (toolSequence.length > 0) {
parts.push(toolSequence.join(' -> '));
}
if (touchedPaths.length > 0) {
parts.push(`paths ${touchedPaths.join(', ')}`);
}
if (validationStatus === 'passed') {
parts.push('validated');
} else if (validationStatus === 'failed') {
parts.push('validation failed');
}
if (parts.length === 0) {
return undefined;
}
const summary = parts.join(' | ');
if (summary.length === 0) {
return undefined;
}
return summary.length > MAX_WORKFLOW_SUMMARY_LENGTH
? `${summary.slice(0, MAX_WORKFLOW_SUMMARY_LENGTH - 3)}...`
: summary;
}
function buildMemoryScratchpad(
messages: ConversationRecord['messages'],
projectRoot: string,
): MemoryScratchpad {
const toolSequence: string[] = [];
const touchedPaths: string[] = [];
let validationStatus: MemoryScratchpad['validationStatus'];
for (const message of messages) {
if (message.type !== 'gemini' || !message.toolCalls) {
continue;
}
for (const toolCall of message.toolCalls) {
pushUniqueLimited(
toolSequence,
getToolSequenceEntry(toolCall),
MAX_SCRATCHPAD_TOOLS,
);
collectPathsFromValue(toolCall.args, projectRoot, touchedPaths);
const toolValidationStatus = getValidationStatusForToolCall(toolCall);
if (toolValidationStatus) {
validationStatus = toolValidationStatus;
}
}
}
const workflowSummary = buildWorkflowSummary(
toolSequence,
touchedPaths,
validationStatus,
);
return {
version: 1,
...(workflowSummary ? { workflowSummary } : {}),
...(toolSequence.length > 0 ? { toolSequence } : {}),
...(touchedPaths.length > 0 ? { touchedPaths } : {}),
...(validationStatus ? { validationStatus } : {}),
};
}
function hasCurrentMemoryScratchpad(session: LoadedSession): boolean {
return Boolean(
session.memoryScratchpad && session.memoryScratchpadIsStale !== true,
);
}
function hasSessionSummaryMetadata(session: LoadedSession): boolean {
return hasCurrentMemoryScratchpad(session);
}
function getLoadedMessageCount(session: LoadedSession): number {
return session.messageCount ?? session.messages.length;
}
/**
* Generates and saves a summary for a session file.
*/
@@ -85,10 +331,11 @@ async function generateAndSaveSummary(
return;
}
// Skip if summary already exists
if (conversation.summary) {
// Skip if workflow metadata already exists; memory extraction can use the
// scratchpad even when summary generation was unavailable.
if (hasSessionSummaryMetadata(conversation)) {
debugLogger.debug(
`[SessionSummary] Summary already exists for ${sessionPath}, skipping`,
`[SessionSummary] Summary metadata already exists for ${sessionPath}, skipping`,
);
return;
}
@@ -101,29 +348,31 @@ async function generateAndSaveSummary(
return;
}
// Create summary service
const contentGenerator = config.getContentGenerator();
if (!contentGenerator) {
debugLogger.debug(
'[SessionSummary] Content generator not available, skipping summary generation',
);
return;
}
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
const summaryService = new SessionSummaryService(baseLlmClient);
// Generate summary
const summary = await summaryService.generateSummary({
messages: conversation.messages,
});
let summary = conversation.summary;
if (!summary) {
debugLogger.warn(
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
);
return;
const contentGenerator = config.getContentGenerator();
if (!contentGenerator) {
debugLogger.debug(
'[SessionSummary] Content generator not available, skipping summary generation',
);
} else {
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
const summaryService = new SessionSummaryService(baseLlmClient);
summary =
(await summaryService.generateSummary({
messages: conversation.messages,
})) ?? undefined;
if (!summary) {
debugLogger.warn(
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
);
}
}
}
let scratchpadSourceConversation = conversation;
// Re-read the file before writing to handle race conditions. For JSONL we
// only need the metadata; for legacy JSON we need the full record so we can
// round-trip the messages back to disk.
@@ -136,18 +385,53 @@ async function generateAndSaveSummary(
return;
}
// Check if summary was added by another process
if (freshConversation.summary) {
// Check if summary metadata was added by another process
if (hasSessionSummaryMetadata(freshConversation)) {
debugLogger.debug(
`[SessionSummary] Summary was added by another process for ${sessionPath}`,
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
);
return;
}
if (
!hasCurrentMemoryScratchpad(freshConversation) &&
(getLoadedMessageCount(freshConversation) !==
getLoadedMessageCount(conversation) ||
freshConversation.lastUpdated !== conversation.lastUpdated)
) {
const latestConversation = await loadConversationRecord(sessionPath);
if (!latestConversation) {
debugLogger.debug(`[SessionSummary] Could not re-read ${sessionPath}`);
return;
}
if (hasSessionSummaryMetadata(latestConversation)) {
debugLogger.debug(
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
);
return;
}
scratchpadSourceConversation = latestConversation;
}
const metadataUpdate: Partial<ConversationRecord> = {};
if (!freshConversation.summary && summary) {
metadataUpdate.summary = summary;
}
if (!hasCurrentMemoryScratchpad(freshConversation)) {
metadataUpdate.memoryScratchpad = buildMemoryScratchpad(
scratchpadSourceConversation.messages,
config.getProjectRoot(),
);
}
if (Object.keys(metadataUpdate).length === 0) {
return;
}
if (isJsonl) {
await fs.appendFile(
sessionPath,
`${JSON.stringify({ $set: { summary } })}\n`,
`${JSON.stringify({ $set: metadataUpdate })}\n`,
);
} else {
const lastUpdated = freshConversation.lastUpdated;
@@ -156,7 +440,7 @@ async function generateAndSaveSummary(
JSON.stringify(
{
...freshConversation,
summary,
...metadataUpdate,
lastUpdated,
},
null,
@@ -165,13 +449,13 @@ async function generateAndSaveSummary(
);
}
debugLogger.debug(
`[SessionSummary] Saved summary for ${sessionPath}: "${summary}"`,
`[SessionSummary] Saved summary metadata for ${sessionPath}${summary ? `: "${summary}"` : ''}`,
);
}
/**
* Finds the most recently updated previous session that still needs a summary.
* Returns the path if it needs a summary, null otherwise.
* Finds the most recently updated previous session that still needs workflow metadata.
* Returns the path if it needs a scratchpad, null otherwise.
*/
export async function getPreviousSession(
config: Config,
@@ -217,7 +501,8 @@ export async function getPreviousSession(
});
if (!conversation) continue;
if (conversation.sessionId === config.getSessionId()) continue;
if (conversation.summary) continue;
if (conversation.kind === 'subagent') continue;
if (hasSessionSummaryMetadata(conversation)) continue;
// Only generate summaries for sessions with more than 1 user message.
// `loadConversationRecord` populates `userMessageCount` in metadataOnly
@@ -264,7 +549,7 @@ export async function getPreviousSession(
}
/**
* Generates summary for the previous session if it lacks one.
* Generates summary metadata for the previous session if it lacks a scratchpad.
* This is designed to be called fire-and-forget on startup.
*/
export async function generateSummary(config: Config): Promise<void> {