feat(memory): persist auto-memory scratchpad for skill extraction (#25873)

This commit is contained in:
Sandy Tao
2026-04-24 17:21:12 -07:00
committed by GitHub
parent a5b030b424
commit 42587de733
17 changed files with 2418 additions and 171 deletions
@@ -779,6 +779,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult || 'Task completed.',
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
@@ -786,6 +788,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
result:
finalResult || 'Agent execution was terminated before completion.',
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
} catch (error) {
// Check if the error is an AbortError caused by our internal timeout.
@@ -826,6 +830,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult,
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
}
@@ -840,6 +846,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
return {
result: finalResult,
terminate_reason: terminateReason,
turn_count: turnCounter,
duration_ms: Date.now() - startTime,
};
}
@@ -74,12 +74,14 @@ describe('SkillExtractionAgent', () => {
expect(query).toContain(existingSkillsSummary);
expect(query).toContain(sessionIndex);
expect(query).toContain('optional workflow hint');
expect(query).toContain(
'The summary is a user-intent summary, not a workflow summary.',
'workflow hints alone is never enough evidence for a reusable skill.',
);
expect(query).toContain(
'The session summaries describe user intent, not workflow details.',
'Session summaries describe user intent; optional workflow hints describe likely procedural traces.',
);
expect(query).toContain('Use workflow hints for routing');
expect(query).toContain(
'Only write a skill if the evidence shows a durable, recurring workflow',
);
@@ -303,10 +303,11 @@ export const SkillExtractionAgent = (
'# Session Index',
'',
'Below is an index of past conversation sessions. Each line shows:',
'[NEW] or [old] status, a 1-line summary, message count, and the file path.',
'[NEW] or [old] status, a 1-line user-intent summary, optional workflow hint, message count, and the file path.',
'',
'The summary is a user-intent summary, not a workflow summary.',
'Matching summary text alone is never enough evidence for a reusable skill.',
'Some lines may include "| workflow: ..."; this is a compact workflow hint from session metadata.',
'Use workflow hints to prioritize which sessions to read and to group likely recurring workflows.',
'Matching summary text or workflow hints alone is never enough evidence for a reusable skill.',
'',
'[NEW] = not yet processed for skill extraction (focus on these)',
'[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)',
@@ -326,7 +327,7 @@ export const SkillExtractionAgent = (
return {
systemPrompt: buildSystemPrompt(skillsDir),
query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
query: `${initialContext}\n\nAnalyze the session index above. Session summaries describe user intent; optional workflow hints describe likely procedural traces. Use workflow hints for routing, then read sessions that suggest repeated workflows using read_file to verify recurrence from transcript evidence. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
};
},
runConfig: {
+2
View File
@@ -36,6 +36,8 @@ export enum AgentTerminateMode {
export interface OutputObject {
result: string;
terminate_reason: AgentTerminateMode;
turn_count?: number;
duration_ms?: number;
}
/**
@@ -112,6 +112,7 @@ export async function loadConversationRecord(
userMessageCount?: number;
firstUserMessage?: string;
hasUserOrAssistantMessage?: boolean;
memoryScratchpadIsStale?: boolean;
})
| null
> {
@@ -133,6 +134,8 @@ export async function loadConversationRecord(
string,
{ isUser: boolean; isUserOrAssistant: boolean }
>();
let isTrackingMemoryScratchpadFreshness = false;
let memoryScratchpadIsStale = false;
let firstUserMessageStr: string | undefined;
for await (const line of rl) {
@@ -140,6 +143,9 @@ export async function loadConversationRecord(
try {
const record = JSON.parse(line) as unknown;
if (isRewindRecord(record)) {
if (isTrackingMemoryScratchpadFreshness) {
memoryScratchpadIsStale = true;
}
const rewindId = record.$rewindTo;
if (options?.metadataOnly) {
const idx = messageIds.indexOf(rewindId);
@@ -168,6 +174,9 @@ export async function loadConversationRecord(
}
}
} else if (isMessageRecord(record)) {
if (isTrackingMemoryScratchpadFreshness) {
memoryScratchpadIsStale = true;
}
const id = record.id;
const isUser = hasProperty(record, 'type') && record.type === 'user';
const isUserOrAssistant =
@@ -206,6 +215,12 @@ export async function loadConversationRecord(
}
}
} else if (isMetadataUpdateRecord(record)) {
if (hasProperty(record.$set, 'memoryScratchpad')) {
isTrackingMemoryScratchpadFreshness = Boolean(
record.$set.memoryScratchpad,
);
memoryScratchpadIsStale = false;
}
// Metadata update
metadata = {
...metadata,
@@ -257,6 +272,7 @@ export async function loadConversationRecord(
startTime: metadata.startTime || new Date().toISOString(),
lastUpdated: metadata.lastUpdated || new Date().toISOString(),
summary: metadata.summary,
memoryScratchpad: metadata.memoryScratchpad,
directories: metadata.directories,
kind: metadata.kind,
messages: options?.metadataOnly ? [] : loadedMessages,
@@ -267,6 +283,9 @@ export async function loadConversationRecord(
options?.metadataOnly && metadataMessages.length > 0
? metadataMessages.filter((m) => m.type === 'user').length
: userMessageCount,
memoryScratchpadIsStale: isTrackingMemoryScratchpadFreshness
? memoryScratchpadIsStale
: undefined,
firstUserMessage: fallbackFirstUserMessage,
hasUserOrAssistantMessage:
options?.metadataOnly && metadataMessages.length > 0
@@ -332,6 +351,13 @@ export class ChatRecordingService {
for (const msg of this.cachedConversation.messages) {
this.appendRecord(msg);
}
if (this.cachedConversation.memoryScratchpad) {
this.appendRecord({
$set: {
memoryScratchpad: this.cachedConversation.memoryScratchpad,
},
});
}
}
// Update the session ID in the existing file
@@ -25,6 +25,19 @@ export interface TokensSummary {
total: number; // totalTokenCount
}
export type MemoryValidationStatus = 'passed' | 'failed' | 'unknown';
/**
* Lightweight workflow metadata attached to a session for memory extraction.
*/
export interface MemoryScratchpad {
version: 1;
workflowSummary?: string;
toolSequence?: string[];
touchedPaths?: string[];
validationStatus?: MemoryValidationStatus;
}
/**
* Base fields common to all messages.
*/
@@ -83,6 +96,7 @@ export interface ConversationRecord {
lastUpdated: string;
messages: MessageRecord[];
summary?: string;
memoryScratchpad?: MemoryScratchpad;
/** Workspace directories added during the session via /dir add */
directories?: string[];
/** The kind of conversation (main agent or subagent) */
@@ -120,6 +134,7 @@ export interface PartialMetadataRecord {
startTime?: string;
lastUpdated?: string;
summary?: string;
memoryScratchpad?: MemoryScratchpad;
directories?: string[];
kind?: 'main' | 'subagent';
}
+343 -13
View File
@@ -127,6 +127,7 @@ async function writeConversationJsonl(
startTime: conversation.startTime,
lastUpdated: conversation.lastUpdated,
summary: conversation.summary,
memoryScratchpad: conversation.memoryScratchpad,
directories: conversation.directories,
kind: conversation.kind,
};
@@ -565,7 +566,7 @@ describe('memoryService', () => {
);
});
it('records only sessions whose read_file calls succeed as processed', async () => {
it('records only sessions whose read_file completed successfully as processed', async () => {
const { startMemoryService, readExtractionState } = await import(
'./memoryService.js'
);
@@ -595,17 +596,69 @@ describe('memoryService', () => {
messageCount: 20,
lastUpdated: '2025-01-01T01:00:00Z',
});
const failedConversation = createConversation({
sessionId: 'failed-session',
summary: 'read_file errors on this one',
messageCount: 20,
lastUpdated: '2025-01-03T01:00:00Z',
});
const rejectedConversation = createConversation({
sessionId: 'rejected-session',
summary: 'read_file was rejected for this one',
messageCount: 20,
lastUpdated: '2025-01-02T02:00:00Z',
});
const mismatchedEndConversation = createConversation({
sessionId: 'mismatched-end-session',
summary: 'read_file start with a mismatched tool end',
messageCount: 20,
lastUpdated: '2025-01-02T03:00:00Z',
});
const mismatchedErrorConversation = createConversation({
sessionId: 'mismatched-error-session',
summary: 'read_file recovers after a mismatched tool error',
messageCount: 20,
lastUpdated: '2025-01-02T04:00:00Z',
});
const openedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-opened.jsonl`,
);
const skippedPath = path.join(
const failedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
`${SESSION_FILE_PREFIX}2025-01-03T00-00-failed.jsonl`,
);
const rejectedPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-rejected.jsonl`,
);
const mismatchedEndPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-end.jsonl`,
);
const mismatchedErrorPath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-error.jsonl`,
);
await writeConversationJsonl(openedPath, openedConversation);
await writeConversationJsonl(skippedPath, skippedConversation);
await writeConversationJsonl(failedPath, failedConversation);
await writeConversationJsonl(rejectedPath, rejectedConversation);
await writeConversationJsonl(
mismatchedEndPath,
mismatchedEndConversation,
);
await writeConversationJsonl(
mismatchedErrorPath,
mismatchedErrorConversation,
);
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
),
skippedConversation,
);
vi.mocked(LocalAgentExecutor.create).mockImplementationOnce(
async (_definition, _context, onActivity) =>
@@ -624,21 +677,21 @@ describe('memoryService', () => {
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
args: { file_path: skippedPath },
callId: 'call-skipped',
id: 'call-opened',
data: {},
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
callId: 'call-skipped',
error: 'access denied',
args: { file_path: failedPath },
callId: 'call-failed',
},
});
onActivity?.({
@@ -647,8 +700,28 @@ describe('memoryService', () => {
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
id: 'call-opened',
data: { content: 'Read this one' },
id: 'call-failed',
data: { isError: true },
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: rejectedPath },
callId: 'call-rejected',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
data: {
name: 'read_file',
callId: 'call-rejected',
error: 'User rejected this operation.',
},
});
onActivity?.({
@@ -661,6 +734,56 @@ describe('memoryService', () => {
callId: 'call-unrelated',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: mismatchedEndPath },
callId: 'call-mismatched-end',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_END',
data: {
name: 'write_file',
id: 'call-mismatched-end',
data: {},
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_START',
data: {
name: 'read_file',
args: { file_path: mismatchedErrorPath },
callId: 'call-mismatched-error',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'ERROR',
data: {
name: 'write_file',
callId: 'call-mismatched-error',
error: 'Different tool failed.',
},
});
onActivity?.({
isSubagentActivityEvent: true,
agentName: 'Skill Extractor',
type: 'TOOL_CALL_END',
data: {
name: 'read_file',
id: 'call-mismatched-error',
data: {},
},
});
return undefined;
}),
}) as never,
@@ -691,6 +814,22 @@ describe('memoryService', () => {
);
expect(state.runs).toHaveLength(1);
expect(state.runs[0].candidateSessions).toEqual([
{
sessionId: 'failed-session',
lastUpdated: '2025-01-03T01:00:00Z',
},
{
sessionId: 'mismatched-error-session',
lastUpdated: '2025-01-02T04:00:00Z',
},
{
sessionId: 'mismatched-end-session',
lastUpdated: '2025-01-02T03:00:00Z',
},
{
sessionId: 'rejected-session',
lastUpdated: '2025-01-02T02:00:00Z',
},
{
sessionId: 'opened-session',
lastUpdated: '2025-01-02T01:00:00Z',
@@ -701,12 +840,19 @@ describe('memoryService', () => {
},
]);
expect(state.runs[0].processedSessions).toEqual([
{
sessionId: 'mismatched-error-session',
lastUpdated: '2025-01-02T04:00:00Z',
},
{
sessionId: 'opened-session',
lastUpdated: '2025-01-02T01:00:00Z',
},
]);
expect(state.runs[0].sessionIds).toEqual(['opened-session']);
expect(state.runs[0].sessionIds).toEqual([
'mismatched-error-session',
'opened-session',
]);
});
});
@@ -902,6 +1048,178 @@ describe('memoryService', () => {
expect(result.sessionIndex).toContain(path.join(chatsDir, fileName));
});
it('falls back to scratchpad workflow summary when summary is missing', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'scratchpad-only',
summary: undefined,
memoryScratchpad: {
version: 1,
workflowSummary:
'read_file -> edit | paths packages/core/src/services/memoryService.ts | validated',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch01.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('read_file -> edit');
expect(result.sessionIndex).not.toContain('(no summary)');
});
it('ignores malformed scratchpad workflow summaries while indexing sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const malformedConversation = createConversation({
sessionId: 'malformed-scratchpad',
summary: undefined,
memoryScratchpad: {
version: 1,
workflowSummary: 123,
} as unknown as ConversationRecord['memoryScratchpad'],
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-badpad.jsonl`,
),
malformedConversation,
);
const validConversation = createConversation({
sessionId: 'valid-session',
summary: 'Still indexes other sessions',
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-valid.jsonl`,
),
validConversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('(no summary)');
expect(result.sessionIndex).toContain('Still indexes other sessions');
expect(result.sessionIndex).not.toContain('123');
});
it('appends workflow summary when both summary and scratchpad are present', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'summary-and-scratchpad',
summary: 'Fix session scanning',
memoryScratchpad: {
version: 1,
workflowSummary:
'read_file -> edit | paths packages/core/src/services/sessionSummaryUtils.ts',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch02.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('Fix session scanning | workflow:');
expect(result.sessionIndex).toContain('sessionSummaryUtils.ts');
});
it('omits stale scratchpad workflow summaries from resumed JSONL sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'stale-scratchpad',
summary: 'Resume memory work',
messageCount: 20,
lastUpdated: '2025-01-01T01:00:00Z',
});
const filePath = path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-stale001.jsonl`,
);
await writeConversationJsonl(filePath, conversation);
await fs.appendFile(
filePath,
`${JSON.stringify({
$set: {
memoryScratchpad: {
version: 1,
workflowSummary: 'stale_workflow | paths stale.ts',
},
},
})}\n`,
);
await fs.appendFile(
filePath,
[
JSON.stringify({
id: 'resumed-user-message',
timestamp: '2025-01-02T01:00:00Z',
type: 'user',
content: [{ text: 'Continue after the scratchpad was written' }],
}),
JSON.stringify({
$set: { lastUpdated: '2025-01-02T01:00:01Z' },
}),
].join('\n') + '\n',
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain('Resume memory work');
expect(result.sessionIndex).not.toContain('stale_workflow');
expect(result.sessionIndex).not.toContain('stale.ts');
});
it('sanitizes shell command workflow summaries before indexing sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
const conversation = createConversation({
sessionId: 'raw-shell-scratchpad',
summary: 'Investigate API migration',
memoryScratchpad: {
version: 1,
workflowSummary:
'run_shell_command: curl https://api.example.com -H "Authorization: Bearer sk-secret-token" -> read_file | paths package.json',
},
messageCount: 20,
});
await writeConversationJsonl(
path.join(
chatsDir,
`${SESSION_FILE_PREFIX}2025-01-01T00-00-shellraw.jsonl`,
),
conversation,
);
const result = await buildSessionIndex(chatsDir, { runs: [] });
expect(result.sessionIndex).toContain(
'workflow: run_shell_command: curl -> read_file | paths package.json',
);
expect(result.sessionIndex).not.toContain('Authorization');
expect(result.sessionIndex).not.toContain('sk-secret-token');
expect(result.sessionIndex).not.toContain('https://api.example.com');
});
it('filters out subagent sessions', async () => {
const { buildSessionIndex } = await import('./memoryService.js');
@@ -1176,6 +1494,9 @@ describe('memoryService', () => {
},
],
skillsCreated: ['debug-helper', 'test-gen'],
turnCount: 4,
durationMs: 1875,
terminateReason: 'GOAL',
},
],
};
@@ -1202,6 +1523,9 @@ describe('memoryService', () => {
]);
expect(result.runs[0].sessionIds).toEqual(['s1']);
expect(result.runs[0].runAt).toBe('2025-06-01T00:00:00Z');
expect(result.runs[0].turnCount).toBe(4);
expect(result.runs[0].durationMs).toBe(1875);
expect(result.runs[0].terminateReason).toBe('GOAL');
});
it('writeExtractionState + readExtractionState roundtrips runs correctly', async () => {
@@ -1235,11 +1559,17 @@ describe('memoryService', () => {
},
],
skillsCreated: ['skill-x'],
turnCount: 3,
durationMs: 2400,
terminateReason: 'GOAL',
},
{
runAt: '2025-01-02T00:00:00Z',
sessionIds: ['c'],
skillsCreated: [],
turnCount: 1,
durationMs: 900,
terminateReason: 'GOAL',
},
];
const state: ExtractionState = { runs };
+112 -86
View File
@@ -14,6 +14,7 @@ import {
SESSION_FILE_PREFIX,
loadConversationRecord,
type ConversationRecord,
type MemoryScratchpad,
} from './chatRecordingService.js';
import { debugLogger } from '../utils/debugLogger.js';
import { coreEvents } from '../utils/events.js';
@@ -22,7 +23,10 @@ import { FRONTMATTER_REGEX, parseFrontmatter } from '../skills/skillLoader.js';
import { LocalAgentExecutor } from '../agents/local-executor.js';
import { SkillExtractionAgent } from '../agents/skill-extraction-agent.js';
import { getModelConfigAlias } from '../agents/registry.js';
import type { SubagentActivityEvent } from '../agents/types.js';
import {
isToolActivityError,
type SubagentActivityEvent,
} from '../agents/types.js';
import { ExecutionLifecycleService } from './executionLifecycleService.js';
import { PromptRegistry } from '../prompts/prompt-registry.js';
import { ResourceRegistry } from '../resources/resource-registry.js';
@@ -36,6 +40,7 @@ import {
applyParsedSkillPatches,
hasParsedPatchHunks,
} from './memoryPatchUtils.js';
import { sanitizeWorkflowSummaryForScratchpad } from './sessionScratchpadUtils.js';
const LOCK_FILENAME = '.extraction.lock';
const STATE_FILENAME = '.extraction-state.json';
@@ -53,20 +58,6 @@ interface LockInfo {
startedAt: string;
}
function hasProperty<T extends string>(
obj: unknown,
prop: T,
): obj is { [key in T]: unknown } {
return obj !== null && typeof obj === 'object' && prop in obj;
}
function isStringProperty<T extends string>(
obj: unknown,
prop: T,
): obj is { [key in T]: string } {
return hasProperty(obj, prop) && typeof obj[prop] === 'string';
}
interface SessionVersion {
sessionId: string;
lastUpdated: string;
@@ -75,6 +66,7 @@ interface SessionVersion {
interface IndexedSession extends SessionVersion {
filePath: string;
summary?: string;
memoryScratchpad?: MemoryScratchpad;
userMessageCount: number;
}
@@ -87,6 +79,9 @@ export interface ExtractionRun {
candidateSessions?: SessionVersion[];
processedSessions?: SessionVersion[];
skillsCreated: string[];
turnCount?: number;
durationMs?: number;
terminateReason?: string;
}
/**
@@ -153,12 +148,25 @@ function normalizeStringArray(value: unknown): string[] {
return value.filter((item): item is string => typeof item === 'string');
}
function normalizeOptionalNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value)
? value
: undefined;
}
function normalizeOptionalString(value: unknown): string | undefined {
return typeof value === 'string' ? value : undefined;
}
function isExtractionRunLike(value: unknown): value is {
runAt: string;
sessionIds?: unknown;
candidateSessions?: unknown;
processedSessions?: unknown;
skillsCreated: unknown;
turnCount?: unknown;
durationMs?: unknown;
terminateReason?: unknown;
} {
return (
typeof value === 'object' &&
@@ -198,6 +206,9 @@ function buildExtractionRun(value: unknown): ExtractionRun | null {
processedSessions:
processedSessions.length > 0 ? processedSessions : undefined,
skillsCreated: normalizeStringArray(value.skillsCreated),
turnCount: normalizeOptionalNumber(value.turnCount),
durationMs: normalizeOptionalNumber(value.durationMs),
terminateReason: normalizeOptionalString(value.terminateReason),
};
}
@@ -291,7 +302,7 @@ function shouldReplaceIndexedSession(
return compareIndexedSessions(candidate, existing) < 0;
}
function isReadFileStartActivity(
function isReadFileActivity(
activity: SubagentActivityEvent,
): activity is SubagentActivityEvent & {
data: { name: string; args?: { file_path?: unknown }; callId?: unknown };
@@ -302,11 +313,36 @@ function isReadFileStartActivity(
);
}
function getResolvedReadFilePath(
function getReadFileCallId(activity: SubagentActivityEvent): string | null {
if (isReadFileActivity(activity)) {
const { callId } = activity.data;
return typeof callId === 'string' ? callId : null;
}
if (
activity.type === 'TOOL_CALL_END' &&
activity.data['name'] === READ_FILE_TOOL_NAME
) {
const id = activity.data['id'];
return typeof id === 'string' ? id : null;
}
if (
activity.type === 'ERROR' &&
activity.data['name'] === READ_FILE_TOOL_NAME
) {
const callId = activity.data['callId'];
return typeof callId === 'string' ? callId : null;
}
return null;
}
function getResolvedActivityFilePath(
config: Config,
activity: SubagentActivityEvent,
): string | null {
if (!isReadFileStartActivity(activity)) {
if (!isReadFileActivity(activity)) {
return null;
}
@@ -320,48 +356,11 @@ function getResolvedReadFilePath(
return null;
}
return path.resolve(config.getTargetDir(), args.file_path);
}
function getReadFileStartCallId(
activity: SubagentActivityEvent,
): string | null {
if (
!isReadFileStartActivity(activity) ||
!isStringProperty(activity.data, 'callId')
) {
return null;
}
return activity.data.callId;
}
function getCompletedReadFileCallId(
activity: SubagentActivityEvent,
): string | null {
if (
activity.type !== 'TOOL_CALL_END' ||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
!isStringProperty(activity.data, 'id')
) {
return null;
}
return activity.data['id'];
}
function getFailedReadFileCallId(
activity: SubagentActivityEvent,
): string | null {
if (
activity.type !== 'ERROR' ||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
!isStringProperty(activity.data, 'callId')
) {
return null;
}
return activity.data['callId'];
const targetDir =
'getTargetDir' in config && typeof config.getTargetDir === 'function'
? config.getTargetDir()
: process.cwd();
return path.resolve(targetDir, args.file_path);
}
function getUserMessageCount(
@@ -580,6 +579,10 @@ async function scanEligibleSessions(
lastUpdated: conversation.lastUpdated,
filePath,
summary: conversation.summary,
memoryScratchpad:
conversation.memoryScratchpadIsStale === true
? undefined
: conversation.memoryScratchpad,
userMessageCount: getUserMessageCount(conversation),
};
@@ -595,6 +598,28 @@ async function scanEligibleSessions(
return Array.from(latestBySessionId.values()).sort(compareIndexedSessions);
}
function formatSessionHeadline(session: IndexedSession): string {
const rawWorkflowSummary = session.memoryScratchpad?.workflowSummary;
const sanitizedWorkflowSummary =
typeof rawWorkflowSummary === 'string'
? sanitizeWorkflowSummaryForScratchpad(rawWorkflowSummary)
: undefined;
const workflowSummary = sanitizedWorkflowSummary?.trim()
? sanitizedWorkflowSummary
: undefined;
const summary = session.summary ?? workflowSummary ?? '(no summary)';
if (
session.summary &&
workflowSummary &&
workflowSummary !== session.summary
) {
return `${summary} | workflow: ${workflowSummary}`;
}
return summary;
}
/**
* Builds a session index for the extraction agent: a compact listing of all
* eligible sessions with their summary, file path, and new/previously-processed status.
@@ -651,8 +676,7 @@ export async function buildSessionIndex(
const status = candidateSessionIds.has(getSessionVersionKey(session))
? '[NEW]'
: '[old]';
const summary = session.summary ?? '(no summary)';
return `${status} ${summary} (${session.userMessageCount} user msgs) — ${session.filePath}`;
return `${status} ${formatSessionHeadline(session)} (${session.userMessageCount} user msgs) — ${session.filePath}`;
},
);
@@ -999,18 +1023,19 @@ export async function startMemoryService(config: Config): Promise<void> {
session,
]),
);
const pendingReadFileSessions = new Map<string, SessionVersion>();
const processedSessionKeys = new Set<string>();
const pendingReadFileSessions = new Map<string, string>();
// Create and run the extraction agent
const executor = await LocalAgentExecutor.create(
agentDefinition,
context,
(activity) => {
const readFileCallId = getReadFileStartCallId(activity);
if (readFileCallId) {
const resolvedPath = getResolvedReadFilePath(config, activity);
if (!resolvedPath) {
const readFileCallId = getReadFileCallId(activity);
if (activity.type === 'TOOL_CALL_START') {
const resolvedPath = getResolvedActivityFilePath(config, activity);
if (!resolvedPath || !readFileCallId) {
return;
}
@@ -1019,35 +1044,31 @@ export async function startMemoryService(config: Config): Promise<void> {
return;
}
pendingReadFileSessions.set(
readFileCallId,
getSessionVersionKey(session),
);
pendingReadFileSessions.set(readFileCallId, session);
return;
}
const completedReadFileCallId = getCompletedReadFileCallId(activity);
if (completedReadFileCallId) {
const sessionKey = pendingReadFileSessions.get(
completedReadFileCallId,
);
if (!sessionKey) {
return;
}
processedSessionKeys.add(sessionKey);
pendingReadFileSessions.delete(completedReadFileCallId);
if (!readFileCallId) {
return;
}
const failedReadFileCallId = getFailedReadFileCallId(activity);
if (failedReadFileCallId) {
pendingReadFileSessions.delete(failedReadFileCallId);
const session = pendingReadFileSessions.get(readFileCallId);
if (!session) {
return;
}
pendingReadFileSessions.delete(readFileCallId);
if (
activity.type === 'TOOL_CALL_END' &&
!isToolActivityError(activity.data['data'])
) {
processedSessionKeys.add(getSessionVersionKey(session));
}
},
);
await executor.run(
const executorResult = await executor.run(
{ request: 'Extract skills from the provided sessions.' },
abortController.signal,
);
@@ -1107,6 +1128,11 @@ export async function startMemoryService(config: Config): Promise<void> {
})),
processedSessions,
skillsCreated,
turnCount: normalizeOptionalNumber(executorResult?.turn_count),
durationMs: normalizeOptionalNumber(executorResult?.duration_ms),
terminateReason: normalizeOptionalString(
executorResult?.terminate_reason,
),
};
const updatedState: ExtractionState = {
runs: [...state.runs, run],
@@ -0,0 +1,45 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
import {
sanitizeWorkflowSummaryForScratchpad,
summarizeShellCommandForScratchpad,
} from './sessionScratchpadUtils.js';
describe('sessionScratchpadUtils', () => {
describe('summarizeShellCommandForScratchpad', () => {
it('summarizes quoted and assignment-prefixed shell commands', () => {
expect(summarizeShellCommandForScratchpad('"npm" run test')).toBe('npm');
expect(
summarizeShellCommandForScratchpad(
'DATABASE_URL=postgres://user:password@example/db pnpm test',
),
).toBe('pnpm');
});
it('handles adversarial unterminated quoted input without exposing arguments', () => {
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
expect(summarizeShellCommandForScratchpad(adversarialCommand)).toBe(
'shell',
);
});
});
describe('sanitizeWorkflowSummaryForScratchpad', () => {
it('sanitizes adversarial shell commands in workflow summaries', () => {
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
expect(
sanitizeWorkflowSummaryForScratchpad(
`${SHELL_TOOL_NAME}: ${adversarialCommand} -> read_file`,
),
).toBe(`${SHELL_TOOL_NAME}: shell -> read_file`);
});
});
});
@@ -0,0 +1,155 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
const WORKFLOW_PART_SEPARATOR = ' | ';
const TOOL_SEQUENCE_SEPARATOR = ' -> ';
const SHELL_ASSIGNMENT_REGEX = /^[A-Za-z_][A-Za-z0-9_]*=/;
const SAFE_COMMAND_NAME_REGEX = /^[A-Za-z0-9_.@+-]+$/;
const SAFE_TOOL_SEQUENCE_ENTRY_REGEX = /^[A-Za-z_][A-Za-z0-9_:.]*$/;
function tokenizeShellCommand(command: string): string[] {
const tokens: string[] = [];
let currentToken = '';
let quote: '"' | "'" | '`' | undefined;
for (let i = 0; i < command.length; i++) {
const char = command[i];
if (quote) {
if (char === quote) {
quote = undefined;
continue;
}
if (quote === '"' && char === '\\' && i + 1 < command.length) {
currentToken += command[i + 1];
i++;
continue;
}
currentToken += char;
continue;
}
if (char === ' ' || char === '\t' || char === '\n' || char === '\r') {
if (currentToken) {
tokens.push(currentToken);
currentToken = '';
}
continue;
}
if (char === '"' || char === "'" || char === '`') {
quote = char;
continue;
}
currentToken += char;
}
if (currentToken) {
tokens.push(currentToken);
}
return tokens;
}
function getSafeCommandName(token: string): string | undefined {
if (!token || SHELL_ASSIGNMENT_REGEX.test(token)) {
return undefined;
}
const pathParts = token.split(/[/\\]/).filter(Boolean);
const basename = pathParts[pathParts.length - 1] ?? token;
if (!basename || basename.includes('://')) {
return 'shell';
}
return SAFE_COMMAND_NAME_REGEX.test(basename) ? basename : 'shell';
}
export function summarizeShellCommandForScratchpad(
command: string,
): string | undefined {
const normalized = command.replace(/\s+/g, ' ').trim();
if (normalized.length === 0) {
return undefined;
}
for (const token of tokenizeShellCommand(normalized)) {
const commandName = getSafeCommandName(token);
if (commandName) {
return commandName;
}
}
return undefined;
}
function sanitizeWorkflowToolSequenceEntry(entry: string): string | undefined {
const trimmed = entry.trim();
if (!trimmed) {
return undefined;
}
const shellPrefix = `${SHELL_TOOL_NAME}:`;
if (trimmed.startsWith(shellPrefix)) {
const command = trimmed.slice(shellPrefix.length).trim();
const commandSummary = summarizeShellCommandForScratchpad(command);
return commandSummary
? `${SHELL_TOOL_NAME}: ${commandSummary}`
: SHELL_TOOL_NAME;
}
if (
trimmed === SHELL_TOOL_NAME ||
SAFE_TOOL_SEQUENCE_ENTRY_REGEX.test(trimmed)
) {
return trimmed;
}
return undefined;
}
export function sanitizeWorkflowSummaryForScratchpad(summary: string): string {
const normalized = summary.replace(/\s+/g, ' ').trim();
if (!normalized.includes(`${SHELL_TOOL_NAME}:`)) {
return normalized;
}
const sanitizedParts: string[] = [];
for (const part of normalized.split(WORKFLOW_PART_SEPARATOR)) {
const trimmed = part.trim();
if (!trimmed) {
continue;
}
if (trimmed.includes(`${SHELL_TOOL_NAME}:`)) {
const sanitizedToolSequence = trimmed
.split(TOOL_SEQUENCE_SEPARATOR)
.map(sanitizeWorkflowToolSequenceEntry)
.filter((entry): entry is string => Boolean(entry));
if (sanitizedToolSequence.length > 0) {
sanitizedParts.push(
sanitizedToolSequence.join(TOOL_SEQUENCE_SEPARATOR),
);
}
continue;
}
if (
trimmed.startsWith('paths ') ||
trimmed === 'validated' ||
trimmed === 'validation failed'
) {
sanitizedParts.push(trimmed);
}
}
return sanitizedParts.join(WORKFLOW_PART_SEPARATOR);
}
@@ -9,6 +9,8 @@ import { generateSummary, getPreviousSession } from './sessionSummaryUtils.js';
import type { Config } from '../config/config.js';
import type { ContentGenerator } from '../core/contentGenerator.js';
import * as chatRecordingService from './chatRecordingService.js';
import type { ConversationRecord } from './chatRecordingService.js';
import { CoreToolCallStatus } from '../scheduler/types.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
@@ -37,25 +39,33 @@ vi.mock('./chatRecordingService.js', async () => {
interface SessionFixture {
summary?: string;
memoryScratchpad?: unknown;
sessionId?: string;
startTime?: string;
lastUpdated?: string;
kind?: ConversationRecord['kind'];
messages?: ConversationRecord['messages'];
userMessageCount: number;
}
function buildLegacySessionJson(fixture: SessionFixture): string {
const messages =
fixture.messages ??
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}));
return JSON.stringify({
sessionId: fixture.sessionId ?? 'session-id',
projectHash: 'abc123',
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
summary: fixture.summary,
messages: Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
})),
memoryScratchpad: fixture.memoryScratchpad,
...(fixture.kind ? { kind: fixture.kind } : {}),
messages,
});
}
@@ -66,17 +76,22 @@ function buildJsonlSession(fixture: SessionFixture): string {
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
...(fixture.summary !== undefined ? { summary: fixture.summary } : {}),
...(fixture.memoryScratchpad !== undefined
? { memoryScratchpad: fixture.memoryScratchpad }
: {}),
...(fixture.kind ? { kind: fixture.kind } : {}),
};
const messages =
fixture.messages ??
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}));
const lines: string[] = [JSON.stringify(metadata)];
for (let i = 0; i < fixture.userMessageCount; i++) {
lines.push(
JSON.stringify({
id: String(i + 1),
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: `Message ${i + 1}` }],
}),
);
for (const message of messages) {
lines.push(JSON.stringify(message));
}
return lines.join('\n') + '\n';
}
@@ -119,6 +134,7 @@ describe('sessionSummaryUtils', () => {
mockConfig = {
getContentGenerator: vi.fn().mockReturnValue(mockContentGenerator),
getProjectRoot: vi.fn().mockReturnValue(projectTempDir),
getSessionId: vi.fn().mockReturnValue('current-session'),
storage: {
getProjectTempDir: vi.fn().mockReturnValue(projectTempDir),
@@ -157,13 +173,50 @@ describe('sessionSummaryUtils', () => {
expect(result).toBeNull();
});
it('should return null if most recent session already has summary', async () => {
it('should return null if most recent session already has summary metadata', async () => {
await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
summary: 'Existing summary',
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file -> edit',
},
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBeNull();
});
it('should return path if most recent session has summary but no scratchpad', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
summary: 'Existing summary',
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBe(filePath);
});
it('should return null if most recent session has scratchpad but no summary', async () => {
await writeSession(
chatsDir,
'session-2024-01-01T10-00-abc12345.json',
buildLegacySessionJson({
userMessageCount: 5,
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file -> edit',
},
}),
);
@@ -302,6 +355,36 @@ describe('sessionSummaryUtils', () => {
metadataOnly: true,
});
});
it('should skip subagent sessions when backfilling scratchpads', async () => {
const mainPath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-main0001.jsonl',
buildJsonlSession({
sessionId: 'main-session',
userMessageCount: 2,
lastUpdated: '2024-01-01T10:00:00Z',
summary: 'Main session summary',
}),
);
await setSessionMtime(mainPath, '2024-01-01T10:00:00Z');
await writeSession(
chatsDir,
'session-2024-01-02T10-00-sub00001.jsonl',
buildJsonlSession({
sessionId: 'subagent-session',
userMessageCount: 2,
lastUpdated: '2024-01-02T10:00:00Z',
summary: 'Subagent summary',
kind: 'subagent',
}),
);
const result = await getPreviousSession(mockConfig);
expect(result).toBe(mainPath);
});
});
describe('generateSummary', () => {
@@ -324,6 +407,7 @@ describe('sessionSummaryUtils', () => {
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
const written = JSON.parse(await fs.readFile(filePath, 'utf-8'));
expect(written.summary).toBe('Add dark mode to the app');
expect(written.memoryScratchpad).toEqual({ version: 1 });
expect(written.lastUpdated).toBe(lastUpdated);
});
@@ -356,10 +440,160 @@ describe('sessionSummaryUtils', () => {
expect(lastRecord).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
});
it('should backfill scratchpad without regenerating summary', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-backfill.jsonl',
buildJsonlSession({
userMessageCount: 2,
summary: 'Existing summary',
}),
);
await generateSummary(mockConfig);
expect(mockGenerateSummary).not.toHaveBeenCalled();
const lines = (await fs.readFile(filePath, 'utf-8'))
.split('\n')
.filter(Boolean);
const lastRecord = JSON.parse(lines[lines.length - 1]);
expect(lastRecord).toEqual({
$set: {
memoryScratchpad: {
version: 1,
},
},
});
});
it('should not retry summary generation after writing a scratchpad fallback', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-summary-fallback.jsonl',
buildJsonlSession({
sessionId: 'summary-fallback-session',
userMessageCount: 2,
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Read package metadata' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Reading package.json' }],
toolCalls: [
{
id: 'tool-1',
name: 'read_file',
args: { file_path: 'package.json' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
mockGenerateSummary.mockResolvedValue(undefined);
await generateSummary(mockConfig);
await generateSummary(mockConfig);
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.summary).toBeUndefined();
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'read_file | paths package.json',
toolSequence: ['read_file'],
touchedPaths: ['package.json'],
});
});
it('should refresh stale scratchpads when messages were appended after metadata', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-resumed1.jsonl',
buildJsonlSession({
sessionId: 'resumed-session',
userMessageCount: 2,
summary: 'Existing summary',
lastUpdated: '2024-01-01T10:00:00Z',
}),
);
await fs.appendFile(
filePath,
`${JSON.stringify({
$set: {
memoryScratchpad: {
version: 1,
workflowSummary: 'read_file',
toolSequence: ['read_file'],
},
},
})}\n`,
);
await fs.appendFile(
filePath,
[
JSON.stringify({
id: 'u-resumed',
timestamp: '2024-01-02T00:00:00Z',
type: 'user',
content: [{ text: 'Update src/app.ts' }],
}),
JSON.stringify({
id: 'g-resumed',
timestamp: '2024-01-02T00:00:01Z',
type: 'gemini',
content: [{ text: 'Editing file' }],
toolCalls: [
{
id: 'tool-resumed',
name: 'replace',
args: { file_path: 'src/app.ts' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-02T00:00:01Z',
},
],
}),
JSON.stringify({
$set: { lastUpdated: '2024-01-02T00:00:02Z' },
}),
].join('\n') + '\n',
);
await generateSummary(mockConfig);
expect(mockGenerateSummary).not.toHaveBeenCalled();
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'replace | paths src/app.ts',
toolSequence: ['replace'],
touchedPaths: ['src/app.ts'],
});
});
it('should preserve a newer JSONL lastUpdated written concurrently', async () => {
const initialLastUpdated = '2024-01-01T10:00:00Z';
const newerLastUpdated = '2024-01-02T12:34:56Z';
@@ -411,6 +645,7 @@ describe('sessionSummaryUtils', () => {
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.summary).toBe('Add dark mode to the app');
expect(savedConversation?.memoryScratchpad).toEqual({ version: 1 });
expect(savedConversation?.lastUpdated).toBe(newerLastUpdated);
const lines = (await fs.readFile(filePath, 'utf-8'))
@@ -420,6 +655,9 @@ describe('sessionSummaryUtils', () => {
expect(lastRecord).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
});
@@ -454,6 +692,9 @@ describe('sessionSummaryUtils', () => {
expect(JSON.parse(previousLines[previousLines.length - 1])).toEqual({
$set: {
summary: 'Add dark mode to the app',
memoryScratchpad: {
version: 1,
},
},
});
@@ -462,5 +703,312 @@ describe('sessionSummaryUtils', () => {
.filter(Boolean);
expect(currentLines).toHaveLength(2);
});
it('should preserve repo-root file names in scratchpad touched paths', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-rootpath.jsonl',
buildJsonlSession({
sessionId: 'root-path-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Inspect package.json' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Reading files' }],
toolCalls: [
{
id: 'tool-1',
name: 'read_file',
args: { file_path: 'package.json' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'read_file | paths package.json',
toolSequence: ['read_file'],
touchedPaths: ['package.json'],
});
});
it('should summarize shell commands without raw arguments in scratchpad tool sequence', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-shellcmd.jsonl',
buildJsonlSession({
sessionId: 'shell-command-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Run the migration and regenerate docs' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running commands' }],
toolCalls: [
{
id: 'tool-1',
name: 'run_shell_command',
args: {
command:
'curl https://api.example.com -H "Authorization: Bearer sk-secret-token"',
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
{
id: 'tool-2',
name: 'run_shell_command',
args: {
command:
'DATABASE_URL=postgresql://user:password@localhost/db npm run migrate -- --name add-users',
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:02Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:03Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'run_shell_command: curl -> run_shell_command: npm',
toolSequence: ['run_shell_command: curl', 'run_shell_command: npm'],
});
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('Authorization');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('sk-secret-token');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('password');
expect(
savedConversation?.memoryScratchpad?.workflowSummary,
).not.toContain('add-users');
});
it('should not classify validation substrings as validation tools', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-validation-substring.jsonl',
buildJsonlSession({
sessionId: 'validation-substring-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Run the contest helper' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running helper' }],
toolCalls: [
{
id: 'tool-1',
name: 'contest_runner',
args: {},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'contest_runner',
toolSequence: ['contest_runner'],
});
});
it('should cap nested path extraction depth', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-deep-paths.jsonl',
buildJsonlSession({
sessionId: 'deep-paths-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Edit shallow and deeply nested files' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Editing files' }],
toolCalls: [
{
id: 'tool-1',
name: 'replace',
args: {
file_path: 'src/shallow.ts',
level1: {
level2: {
level3: {
level4: {
level5: {
level6: {
level7: {
file_path: 'src/deep.ts',
},
},
},
},
},
},
},
},
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:01Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:02Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'replace | paths src/shallow.ts',
toolSequence: ['replace'],
touchedPaths: ['src/shallow.ts'],
});
});
it('should use the latest validation result in scratchpad metadata', async () => {
const filePath = await writeSession(
chatsDir,
'session-2024-01-01T10-00-validation.jsonl',
buildJsonlSession({
sessionId: 'validation-session',
userMessageCount: 2,
summary: 'Existing summary',
messages: [
{
id: 'u1',
timestamp: '2024-01-01T00:00:00Z',
type: 'user',
content: [{ text: 'Fix the tests' }],
},
{
id: 'g1',
timestamp: '2024-01-01T00:00:01Z',
type: 'gemini',
content: [{ text: 'Running tests' }],
toolCalls: [
{
id: 'tool-1',
name: 'run_shell_command',
args: { command: 'npm test' },
status: CoreToolCallStatus.Error,
timestamp: '2024-01-01T00:00:01Z',
},
{
id: 'tool-2',
name: 'run_shell_command',
args: { command: 'npm test' },
status: CoreToolCallStatus.Success,
timestamp: '2024-01-01T00:00:02Z',
},
],
},
{
id: 'u2',
timestamp: '2024-01-01T00:00:03Z',
type: 'user',
content: [{ text: 'Done' }],
},
],
}),
);
await generateSummary(mockConfig);
const savedConversation =
await chatRecordingService.loadConversationRecord(filePath);
expect(savedConversation?.memoryScratchpad).toEqual({
version: 1,
workflowSummary: 'run_shell_command: npm | validated',
toolSequence: ['run_shell_command: npm'],
validationStatus: 'passed',
});
});
});
});
+318 -33
View File
@@ -12,15 +12,29 @@ import {
SESSION_FILE_PREFIX,
loadConversationRecord,
type ConversationRecord,
type MemoryScratchpad,
type ToolCallRecord,
} from './chatRecordingService.js';
import { CoreToolCallStatus } from '../scheduler/types.js';
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
import { summarizeShellCommandForScratchpad } from './sessionScratchpadUtils.js';
import fs from 'node:fs/promises';
import path from 'node:path';
const MIN_MESSAGES_FOR_SUMMARY = 1;
const MAX_SCRATCHPAD_TOOLS = 6;
const MAX_SCRATCHPAD_PATHS = 4;
const MAX_SCRATCHPAD_PATH_DEPTH = 6;
const MAX_WORKFLOW_SUMMARY_LENGTH = 160;
const VALIDATION_COMMAND_REGEX =
/\b(test|tests|vitest|jest|pytest|cargo test|npm test|pnpm test|yarn test|bun test|lint|build|check|typecheck)\b/i;
const PATH_KEY_REGEX = /(path|file|dir|directory|cwd|root)/i;
const VALIDATION_TOOL_REGEX = /\b(test|lint|build|check|typecheck)\b/i;
type LoadedSession = ConversationRecord & {
messageCount?: number;
userMessageCount?: number;
memoryScratchpadIsStale?: boolean;
};
interface SessionFileCandidate {
@@ -72,6 +86,238 @@ function getSessionTimestampMs(session: LoadedSession): number {
return Number.isNaN(parsed) ? 0 : parsed;
}
function normalizeToolName(name: string): string {
const trimmed = name.trim();
return trimmed.length > 0 ? trimmed : 'unknown_tool';
}
function pushUniqueLimited(
target: string[],
value: string,
limit: number,
): void {
if (!value || target.includes(value) || target.length >= limit) {
return;
}
target.push(value);
}
function normalizePathCandidate(
candidate: string,
projectRoot: string,
): string | null {
const trimmed = candidate.trim();
if (
trimmed.length === 0 ||
trimmed.length > 240 ||
trimmed.includes('\n') ||
(!trimmed.includes('/') &&
!trimmed.includes('\\') &&
!trimmed.startsWith('.') &&
path.extname(trimmed).length === 0)
) {
return null;
}
let normalized = trimmed.replace(/\\/g, '/');
if (path.isAbsolute(trimmed)) {
const relative = path.relative(projectRoot, trimmed);
normalized =
relative && !relative.startsWith('..') && !path.isAbsolute(relative)
? relative.replace(/\\/g, '/')
: path.basename(trimmed);
}
if (normalized.length > 120) {
normalized = normalized.split('/').slice(-3).join('/');
}
return normalized.length > 0 ? normalized : null;
}
function collectPathsFromValue(
value: unknown,
projectRoot: string,
paths: string[],
keyHint?: string,
depth = 0,
): void {
if (
paths.length >= MAX_SCRATCHPAD_PATHS ||
depth > MAX_SCRATCHPAD_PATH_DEPTH
) {
return;
}
if (typeof value === 'string') {
if (!keyHint || !PATH_KEY_REGEX.test(keyHint)) {
return;
}
const normalized = normalizePathCandidate(value, projectRoot);
if (normalized) {
pushUniqueLimited(paths, normalized, MAX_SCRATCHPAD_PATHS);
}
return;
}
if (Array.isArray(value)) {
for (const item of value) {
collectPathsFromValue(item, projectRoot, paths, keyHint, depth + 1);
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
return;
}
}
return;
}
if (typeof value !== 'object' || value === null) {
return;
}
for (const [key, nestedValue] of Object.entries(value)) {
collectPathsFromValue(nestedValue, projectRoot, paths, key, depth + 1);
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
return;
}
}
}
function getToolCallCommand(toolCall: ToolCallRecord): string | undefined {
for (const key of ['command', 'cmd', 'script']) {
const value = toolCall.args[key];
if (typeof value === 'string' && value.trim().length > 0) {
return value;
}
}
return undefined;
}
function getToolSequenceEntry(toolCall: ToolCallRecord): string {
const toolName = normalizeToolName(toolCall.name);
if (toolName !== SHELL_TOOL_NAME) {
return toolName;
}
const command = getToolCallCommand(toolCall);
const commandSummary = command
? summarizeShellCommandForScratchpad(command)
: undefined;
return commandSummary ? `${toolName}: ${commandSummary}` : toolName;
}
function getValidationStatusForToolCall(
toolCall: ToolCallRecord,
): MemoryScratchpad['validationStatus'] | undefined {
const command = getToolCallCommand(toolCall);
const isValidationTool =
VALIDATION_TOOL_REGEX.test(toolCall.name) ||
(command ? VALIDATION_COMMAND_REGEX.test(command) : false);
if (!isValidationTool) {
return undefined;
}
if (toolCall.status === CoreToolCallStatus.Success) {
return 'passed';
}
if (
toolCall.status === CoreToolCallStatus.Error ||
toolCall.status === CoreToolCallStatus.Cancelled
) {
return 'failed';
}
return 'unknown';
}
function buildWorkflowSummary(
toolSequence: string[],
touchedPaths: string[],
validationStatus?: MemoryScratchpad['validationStatus'],
): string | undefined {
const parts: string[] = [];
if (toolSequence.length > 0) {
parts.push(toolSequence.join(' -> '));
}
if (touchedPaths.length > 0) {
parts.push(`paths ${touchedPaths.join(', ')}`);
}
if (validationStatus === 'passed') {
parts.push('validated');
} else if (validationStatus === 'failed') {
parts.push('validation failed');
}
if (parts.length === 0) {
return undefined;
}
const summary = parts.join(' | ');
if (summary.length === 0) {
return undefined;
}
return summary.length > MAX_WORKFLOW_SUMMARY_LENGTH
? `${summary.slice(0, MAX_WORKFLOW_SUMMARY_LENGTH - 3)}...`
: summary;
}
function buildMemoryScratchpad(
messages: ConversationRecord['messages'],
projectRoot: string,
): MemoryScratchpad {
const toolSequence: string[] = [];
const touchedPaths: string[] = [];
let validationStatus: MemoryScratchpad['validationStatus'];
for (const message of messages) {
if (message.type !== 'gemini' || !message.toolCalls) {
continue;
}
for (const toolCall of message.toolCalls) {
pushUniqueLimited(
toolSequence,
getToolSequenceEntry(toolCall),
MAX_SCRATCHPAD_TOOLS,
);
collectPathsFromValue(toolCall.args, projectRoot, touchedPaths);
const toolValidationStatus = getValidationStatusForToolCall(toolCall);
if (toolValidationStatus) {
validationStatus = toolValidationStatus;
}
}
}
const workflowSummary = buildWorkflowSummary(
toolSequence,
touchedPaths,
validationStatus,
);
return {
version: 1,
...(workflowSummary ? { workflowSummary } : {}),
...(toolSequence.length > 0 ? { toolSequence } : {}),
...(touchedPaths.length > 0 ? { touchedPaths } : {}),
...(validationStatus ? { validationStatus } : {}),
};
}
function hasCurrentMemoryScratchpad(session: LoadedSession): boolean {
return Boolean(
session.memoryScratchpad && session.memoryScratchpadIsStale !== true,
);
}
function hasSessionSummaryMetadata(session: LoadedSession): boolean {
return hasCurrentMemoryScratchpad(session);
}
function getLoadedMessageCount(session: LoadedSession): number {
return session.messageCount ?? session.messages.length;
}
/**
* Generates and saves a summary for a session file.
*/
@@ -85,10 +331,11 @@ async function generateAndSaveSummary(
return;
}
// Skip if summary already exists
if (conversation.summary) {
// Skip if workflow metadata already exists; memory extraction can use the
// scratchpad even when summary generation was unavailable.
if (hasSessionSummaryMetadata(conversation)) {
debugLogger.debug(
`[SessionSummary] Summary already exists for ${sessionPath}, skipping`,
`[SessionSummary] Summary metadata already exists for ${sessionPath}, skipping`,
);
return;
}
@@ -101,29 +348,31 @@ async function generateAndSaveSummary(
return;
}
// Create summary service
const contentGenerator = config.getContentGenerator();
if (!contentGenerator) {
debugLogger.debug(
'[SessionSummary] Content generator not available, skipping summary generation',
);
return;
}
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
const summaryService = new SessionSummaryService(baseLlmClient);
// Generate summary
const summary = await summaryService.generateSummary({
messages: conversation.messages,
});
let summary = conversation.summary;
if (!summary) {
debugLogger.warn(
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
);
return;
const contentGenerator = config.getContentGenerator();
if (!contentGenerator) {
debugLogger.debug(
'[SessionSummary] Content generator not available, skipping summary generation',
);
} else {
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
const summaryService = new SessionSummaryService(baseLlmClient);
summary =
(await summaryService.generateSummary({
messages: conversation.messages,
})) ?? undefined;
if (!summary) {
debugLogger.warn(
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
);
}
}
}
let scratchpadSourceConversation = conversation;
// Re-read the file before writing to handle race conditions. For JSONL we
// only need the metadata; for legacy JSON we need the full record so we can
// round-trip the messages back to disk.
@@ -136,18 +385,53 @@ async function generateAndSaveSummary(
return;
}
// Check if summary was added by another process
if (freshConversation.summary) {
// Check if summary metadata was added by another process
if (hasSessionSummaryMetadata(freshConversation)) {
debugLogger.debug(
`[SessionSummary] Summary was added by another process for ${sessionPath}`,
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
);
return;
}
if (
!hasCurrentMemoryScratchpad(freshConversation) &&
(getLoadedMessageCount(freshConversation) !==
getLoadedMessageCount(conversation) ||
freshConversation.lastUpdated !== conversation.lastUpdated)
) {
const latestConversation = await loadConversationRecord(sessionPath);
if (!latestConversation) {
debugLogger.debug(`[SessionSummary] Could not re-read ${sessionPath}`);
return;
}
if (hasSessionSummaryMetadata(latestConversation)) {
debugLogger.debug(
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
);
return;
}
scratchpadSourceConversation = latestConversation;
}
const metadataUpdate: Partial<ConversationRecord> = {};
if (!freshConversation.summary && summary) {
metadataUpdate.summary = summary;
}
if (!hasCurrentMemoryScratchpad(freshConversation)) {
metadataUpdate.memoryScratchpad = buildMemoryScratchpad(
scratchpadSourceConversation.messages,
config.getProjectRoot(),
);
}
if (Object.keys(metadataUpdate).length === 0) {
return;
}
if (isJsonl) {
await fs.appendFile(
sessionPath,
`${JSON.stringify({ $set: { summary } })}\n`,
`${JSON.stringify({ $set: metadataUpdate })}\n`,
);
} else {
const lastUpdated = freshConversation.lastUpdated;
@@ -156,7 +440,7 @@ async function generateAndSaveSummary(
JSON.stringify(
{
...freshConversation,
summary,
...metadataUpdate,
lastUpdated,
},
null,
@@ -165,13 +449,13 @@ async function generateAndSaveSummary(
);
}
debugLogger.debug(
`[SessionSummary] Saved summary for ${sessionPath}: "${summary}"`,
`[SessionSummary] Saved summary metadata for ${sessionPath}${summary ? `: "${summary}"` : ''}`,
);
}
/**
* Finds the most recently updated previous session that still needs a summary.
* Returns the path if it needs a summary, null otherwise.
* Finds the most recently updated previous session that still needs workflow metadata.
* Returns the path if it needs a scratchpad, null otherwise.
*/
export async function getPreviousSession(
config: Config,
@@ -217,7 +501,8 @@ export async function getPreviousSession(
});
if (!conversation) continue;
if (conversation.sessionId === config.getSessionId()) continue;
if (conversation.summary) continue;
if (conversation.kind === 'subagent') continue;
if (hasSessionSummaryMetadata(conversation)) continue;
// Only generate summaries for sessions with more than 1 user message.
// `loadConversationRecord` populates `userMessageCount` in metadataOnly
@@ -264,7 +549,7 @@ export async function getPreviousSession(
}
/**
* Generates summary for the previous session if it lacks one.
* Generates summary metadata for the previous session if it lacks a scratchpad.
* This is designed to be called fire-and-forget on startup.
*/
export async function generateSummary(config: Config): Promise<void> {