mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-27 05:24:34 -07:00
feat(memory): persist auto-memory scratchpad for skill extraction (#25873)
This commit is contained in:
@@ -337,6 +337,7 @@ jobs:
|
||||
if: "${{ steps.check_evals.outputs.should_run == 'true' }}"
|
||||
env:
|
||||
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
|
||||
GEMINI_CLI_TRUST_WORKSPACE: true
|
||||
GEMINI_MODEL: 'gemini-3-pro-preview'
|
||||
# Only run always passes behavioral tests.
|
||||
EVAL_SUITE_TYPE: 'behavioral'
|
||||
|
||||
@@ -66,6 +66,7 @@ jobs:
|
||||
continue-on-error: true
|
||||
env:
|
||||
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
|
||||
GEMINI_CLI_TRUST_WORKSPACE: true
|
||||
GEMINI_MODEL: '${{ matrix.model }}'
|
||||
RUN_EVALS: 'true'
|
||||
EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
|
||||
|
||||
@@ -5,12 +5,78 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import {
|
||||
loadConversationRecord,
|
||||
SESSION_FILE_PREFIX,
|
||||
} from '@google/gemini-cli-core';
|
||||
import {
|
||||
evalTest,
|
||||
assertModelHasOutput,
|
||||
checkModelOutputContent,
|
||||
} from './test-helper.js';
|
||||
|
||||
function findDir(base: string, name: string): string | null {
|
||||
if (!fs.existsSync(base)) return null;
|
||||
const files = fs.readdirSync(base);
|
||||
for (const file of files) {
|
||||
const fullPath = path.join(base, file);
|
||||
if (fs.statSync(fullPath).isDirectory()) {
|
||||
if (file === name) return fullPath;
|
||||
const found = findDir(fullPath, name);
|
||||
if (found) return found;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function loadLatestSessionRecord(homeDir: string, sessionId: string) {
|
||||
const chatsDir = findDir(path.join(homeDir, '.gemini'), 'chats');
|
||||
if (!chatsDir) {
|
||||
throw new Error('Could not find chats directory for eval session logs');
|
||||
}
|
||||
|
||||
const candidates = fs
|
||||
.readdirSync(chatsDir)
|
||||
.filter(
|
||||
(file) =>
|
||||
file.startsWith(SESSION_FILE_PREFIX) &&
|
||||
(file.endsWith('.json') || file.endsWith('.jsonl')),
|
||||
);
|
||||
|
||||
const matchingRecords = [];
|
||||
for (const file of candidates) {
|
||||
const filePath = path.join(chatsDir, file);
|
||||
const record = await loadConversationRecord(filePath);
|
||||
if (record?.sessionId === sessionId) {
|
||||
matchingRecords.push(record);
|
||||
}
|
||||
}
|
||||
|
||||
matchingRecords.sort(
|
||||
(a, b) => Date.parse(b.lastUpdated) - Date.parse(a.lastUpdated),
|
||||
);
|
||||
return matchingRecords[0] ?? null;
|
||||
}
|
||||
|
||||
async function waitForSessionScratchpad(
|
||||
homeDir: string,
|
||||
sessionId: string,
|
||||
timeoutMs = 30000,
|
||||
) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const record = await loadLatestSessionRecord(homeDir, sessionId);
|
||||
if (record?.memoryScratchpad) {
|
||||
return record;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
return loadLatestSessionRecord(homeDir, sessionId);
|
||||
}
|
||||
|
||||
describe('save_memory', () => {
|
||||
const TEST_PREFIX = 'Save memory test: ';
|
||||
const rememberingFavoriteColor = "Agent remembers user's favorite color";
|
||||
@@ -569,6 +635,103 @@ describe('save_memory', () => {
|
||||
},
|
||||
});
|
||||
|
||||
const memoryV2SessionScratchpad =
|
||||
'Session summary persists memory scratchpad for memory-saving sessions';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: memoryV2SessionScratchpad,
|
||||
sessionId: 'memory-scratchpad-eval',
|
||||
params: {
|
||||
settings: {
|
||||
experimental: { memoryV2: true },
|
||||
},
|
||||
},
|
||||
messages: [
|
||||
{
|
||||
id: 'msg-1',
|
||||
type: 'user',
|
||||
content: [
|
||||
{
|
||||
text: 'Across all my projects, I prefer Vitest over Jest for testing.',
|
||||
},
|
||||
],
|
||||
timestamp: '2026-01-01T00:00:00Z',
|
||||
},
|
||||
{
|
||||
id: 'msg-2',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Noted. What else should I keep in mind?' }],
|
||||
timestamp: '2026-01-01T00:00:05Z',
|
||||
},
|
||||
{
|
||||
id: 'msg-3',
|
||||
type: 'user',
|
||||
content: [
|
||||
{
|
||||
text: 'For this repo I was debugging a flaky API test earlier, but that was just transient context.',
|
||||
},
|
||||
],
|
||||
timestamp: '2026-01-01T00:01:00Z',
|
||||
},
|
||||
{
|
||||
id: 'msg-4',
|
||||
type: 'gemini',
|
||||
content: [
|
||||
{ text: 'Understood. I will only save the durable preference.' },
|
||||
],
|
||||
timestamp: '2026-01-01T00:01:05Z',
|
||||
},
|
||||
],
|
||||
prompt:
|
||||
'Please save any persistent preferences or facts about me from our conversation to memory.',
|
||||
assert: async (rig, result) => {
|
||||
await rig.waitForToolCall('write_file').catch(() => {});
|
||||
const writeCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((log) =>
|
||||
['write_file', 'replace'].includes(log.toolRequest.name),
|
||||
);
|
||||
|
||||
expect(
|
||||
writeCalls.length,
|
||||
'Expected memoryV2 save flow to edit a markdown memory file',
|
||||
).toBeGreaterThan(0);
|
||||
|
||||
await rig.run({
|
||||
args: ['--list-sessions'],
|
||||
approvalMode: 'yolo',
|
||||
timeout: 120000,
|
||||
});
|
||||
|
||||
const record = await waitForSessionScratchpad(
|
||||
rig.homeDir!,
|
||||
'memory-scratchpad-eval',
|
||||
);
|
||||
expect(
|
||||
record?.memoryScratchpad,
|
||||
'Expected the resumed session log to contain a memoryScratchpad after session summary generation',
|
||||
).toBeDefined();
|
||||
expect(record?.memoryScratchpad?.version).toBe(1);
|
||||
expect(
|
||||
record?.memoryScratchpad?.toolSequence?.some((toolName) =>
|
||||
['write_file', 'replace'].includes(toolName),
|
||||
),
|
||||
'Expected memoryScratchpad.toolSequence to include the markdown editing tool used for memory persistence',
|
||||
).toBe(true);
|
||||
expect(
|
||||
record?.memoryScratchpad?.touchedPaths?.length,
|
||||
'Expected memoryScratchpad to capture at least one touched path',
|
||||
).toBeGreaterThan(0);
|
||||
expect(
|
||||
record?.memoryScratchpad?.workflowSummary,
|
||||
'Expected memoryScratchpad.workflowSummary to be populated',
|
||||
).toMatch(/write_file|replace/i);
|
||||
|
||||
assertModelHasOutput(result);
|
||||
},
|
||||
});
|
||||
|
||||
const memoryV2RoutesUserProject =
|
||||
'Agent routes personal-to-user project notes to user-project memory';
|
||||
evalTest('USUALLY_PASSES', {
|
||||
|
||||
+630
-17
@@ -6,21 +6,30 @@
|
||||
|
||||
import fsp from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { describe, expect } from 'vitest';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
type Config,
|
||||
ApprovalMode,
|
||||
type MemoryScratchpad,
|
||||
SESSION_FILE_PREFIX,
|
||||
getProjectHash,
|
||||
startMemoryService,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { componentEvalTest } from './component-test-helper.js';
|
||||
import { ComponentRig, componentEvalTest } from './component-test-helper.js';
|
||||
import {
|
||||
average,
|
||||
averageNullable,
|
||||
countMatchingIds,
|
||||
roundStat,
|
||||
} from './statistics-helper.js';
|
||||
import { prepareWorkspace } from './test-helper.js';
|
||||
|
||||
interface SeedSession {
|
||||
sessionId: string;
|
||||
summary: string;
|
||||
userTurns: string[];
|
||||
timestampOffsetMinutes: number;
|
||||
memoryScratchpad?: MemoryScratchpad;
|
||||
}
|
||||
|
||||
interface MessageRecord {
|
||||
@@ -30,6 +39,81 @@ interface MessageRecord {
|
||||
content: Array<{ text: string }>;
|
||||
}
|
||||
|
||||
interface SessionVersion {
|
||||
sessionId: string;
|
||||
lastUpdated: string;
|
||||
}
|
||||
|
||||
interface ExtractionRunSnapshot {
|
||||
sessionIds: string[];
|
||||
skillsCreated: string[];
|
||||
candidateSessions: SessionVersion[];
|
||||
processedSessions: SessionVersion[];
|
||||
turnCount?: number;
|
||||
durationMs?: number;
|
||||
terminateReason?: string;
|
||||
}
|
||||
|
||||
interface ExtractionOutcome {
|
||||
state: { runs: ExtractionRunSnapshot[] };
|
||||
skillsDir: string;
|
||||
skillBodies: string[];
|
||||
}
|
||||
|
||||
interface SkillQualitySignal {
|
||||
label: string;
|
||||
pattern: RegExp;
|
||||
}
|
||||
|
||||
interface ScratchpadRunMetrics {
|
||||
turnCount: number | null;
|
||||
durationMs: number | null;
|
||||
terminateReason: string | null;
|
||||
skillsCreated: number;
|
||||
candidateSessions: number;
|
||||
processedSessions: number;
|
||||
relevantReads: number;
|
||||
distractorReads: number;
|
||||
totalReads: number;
|
||||
recall: number;
|
||||
precision: number;
|
||||
signalScore: number;
|
||||
skillQualityScore: number;
|
||||
skillQualityMax: number;
|
||||
skillQualityRatio: number;
|
||||
missingQualitySignals: string[];
|
||||
}
|
||||
|
||||
interface ScratchpadStatsTrial {
|
||||
trial: number;
|
||||
baseline: ScratchpadRunMetrics;
|
||||
enhanced: ScratchpadRunMetrics;
|
||||
}
|
||||
|
||||
interface ScratchpadStatsAggregate {
|
||||
turnCountAvg: number | null;
|
||||
durationMsAvg: number | null;
|
||||
recallAvg: number;
|
||||
precisionAvg: number;
|
||||
signalScoreAvg: number;
|
||||
relevantReadsAvg: number;
|
||||
distractorReadsAvg: number;
|
||||
skillsCreatedAvg: number;
|
||||
skillQualityScoreAvg: number;
|
||||
skillQualityRatioAvg: number;
|
||||
}
|
||||
|
||||
interface ScratchpadStatsReport {
|
||||
generatedAt: string;
|
||||
trials: number;
|
||||
aggregate: {
|
||||
baseline: ScratchpadStatsAggregate;
|
||||
enhanced: ScratchpadStatsAggregate;
|
||||
};
|
||||
deltas: ScratchpadStatsAggregate;
|
||||
results: ScratchpadStatsTrial[];
|
||||
}
|
||||
|
||||
const WORKSPACE_FILES = {
|
||||
'package.json': JSON.stringify(
|
||||
{
|
||||
@@ -68,6 +152,143 @@ function buildMessages(userTurns: string[]): MessageRecord[] {
|
||||
]);
|
||||
}
|
||||
|
||||
function padTurns(turns: string[]): string[] {
|
||||
if (turns.length >= 10) {
|
||||
return turns;
|
||||
}
|
||||
|
||||
const padded = [...turns];
|
||||
for (let i = turns.length; i < 10; i++) {
|
||||
padded.push(`${turns[i % turns.length]} (repeat ${i + 1})`);
|
||||
}
|
||||
return padded;
|
||||
}
|
||||
|
||||
function createScratchpad(
|
||||
workflowSummary: string,
|
||||
touchedPaths: string[],
|
||||
validationStatus: MemoryScratchpad['validationStatus'] = 'passed',
|
||||
): MemoryScratchpad {
|
||||
return {
|
||||
version: 1,
|
||||
workflowSummary,
|
||||
toolSequence: ['run_shell_command'],
|
||||
touchedPaths,
|
||||
validationStatus,
|
||||
};
|
||||
}
|
||||
|
||||
function createWorkflowComparisonSessions(withScratchpad: boolean): {
|
||||
sessions: SeedSession[];
|
||||
relevantSessionIds: string[];
|
||||
distractorSessionIds: string[];
|
||||
} {
|
||||
const relevantWorkflowSummary =
|
||||
'run_shell_command -> run_shell_command | paths packages/cli/src/config/settings.ts, docs/settings.md | validated';
|
||||
|
||||
const relevantScratchpad = withScratchpad
|
||||
? createScratchpad(relevantWorkflowSummary, [
|
||||
'packages/cli/src/config/settings.ts',
|
||||
'docs/settings.md',
|
||||
])
|
||||
: undefined;
|
||||
|
||||
const sessions: SeedSession[] = [
|
||||
{
|
||||
sessionId: 'hidden-settings-workflow-a',
|
||||
summary: 'Prepare release notes for settings launch',
|
||||
timestampOffsetMinutes: 420,
|
||||
memoryScratchpad: relevantScratchpad,
|
||||
userTurns: padTurns([
|
||||
'When we add a new setting, the durable workflow is to regenerate the settings docs instead of editing them by hand.',
|
||||
'The sequence that worked was npm run predocs:settings, npm run schema:settings, then npm run docs:settings.',
|
||||
'Skipping predocs leaves stale defaults in the generated docs.',
|
||||
'We verify the workflow by checking that both the schema output and docs update together.',
|
||||
'This exact command order is the recurring workflow we use for settings changes.',
|
||||
]),
|
||||
},
|
||||
{
|
||||
sessionId: 'hidden-settings-workflow-b',
|
||||
summary: 'Investigate CI drift in generated config reference',
|
||||
timestampOffsetMinutes: 390,
|
||||
memoryScratchpad: relevantScratchpad,
|
||||
userTurns: padTurns([
|
||||
'The config reference drift was fixed by rerunning the standard settings regeneration workflow.',
|
||||
'We again used npm run predocs:settings before npm run schema:settings and npm run docs:settings.',
|
||||
'The recurring rule is never to hand-edit generated settings docs.',
|
||||
'The validation step is to confirm the schema artifact and docs changed together after regeneration.',
|
||||
'This is the same recurring workflow we use every time a setting changes.',
|
||||
]),
|
||||
},
|
||||
{
|
||||
sessionId: 'distractor-release-notes',
|
||||
summary: 'Prepare release notes for auth launch',
|
||||
timestampOffsetMinutes: 360,
|
||||
memoryScratchpad: undefined,
|
||||
userTurns: padTurns([
|
||||
'This release-notes task was one-off and just needed manual wording updates.',
|
||||
'I edited CHANGELOG.md and docs/release-notes.md directly.',
|
||||
'There was no reusable command sequence here beyond proofreading the copy.',
|
||||
'This task should not become a standing workflow.',
|
||||
'Once the wording landed, we were done.',
|
||||
]),
|
||||
},
|
||||
{
|
||||
sessionId: 'distractor-ci-snapshots',
|
||||
summary: 'Investigate CI drift in auth snapshots',
|
||||
timestampOffsetMinutes: 330,
|
||||
memoryScratchpad: undefined,
|
||||
userTurns: padTurns([
|
||||
'This auth snapshot issue was specific to a flaky test in CI.',
|
||||
'The only commands we ran were npm test -- auth and an isolated snapshot update.',
|
||||
'It was not the recurring settings-doc workflow.',
|
||||
'Once the flaky snapshot passed, there was no broader reusable procedure.',
|
||||
'Treat this as a one-off CI cleanup.',
|
||||
]),
|
||||
},
|
||||
{
|
||||
sessionId: 'distractor-onboarding-docs',
|
||||
summary: 'Refresh onboarding documentation copy',
|
||||
timestampOffsetMinutes: 300,
|
||||
memoryScratchpad: undefined,
|
||||
userTurns: padTurns([
|
||||
'This was just a docs wording cleanup in docs/onboarding.md.',
|
||||
'No command sequence was involved.',
|
||||
'We manually edited the copy and reviewed it.',
|
||||
'There is no recurring operational workflow to capture here.',
|
||||
'This should stay a one-off docs edit.',
|
||||
]),
|
||||
},
|
||||
{
|
||||
sessionId: 'distractor-deploy-copy',
|
||||
summary: 'Adjust deployment checklist wording',
|
||||
timestampOffsetMinutes: 270,
|
||||
memoryScratchpad: undefined,
|
||||
userTurns: padTurns([
|
||||
'This was a wording-only change to docs/deploy.md.',
|
||||
'We did not run a reusable command sequence.',
|
||||
'It should not become a skill.',
|
||||
'The edit was only for this deploy checklist cleanup.',
|
||||
'After the copy change, the task was complete.',
|
||||
]),
|
||||
},
|
||||
];
|
||||
|
||||
return {
|
||||
sessions,
|
||||
relevantSessionIds: [
|
||||
'hidden-settings-workflow-a',
|
||||
'hidden-settings-workflow-b',
|
||||
],
|
||||
distractorSessionIds: [
|
||||
'distractor-release-notes',
|
||||
'distractor-ci-snapshots',
|
||||
'distractor-onboarding-docs',
|
||||
'distractor-deploy-copy',
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async function seedSessions(
|
||||
config: Config,
|
||||
sessions: SeedSession[],
|
||||
@@ -78,9 +299,10 @@ async function seedSessions(
|
||||
const projectRoot = config.storage.getProjectRoot();
|
||||
|
||||
for (const session of sessions) {
|
||||
const timestamp = new Date(
|
||||
const sessionTimestamp = new Date(
|
||||
Date.now() - session.timestampOffsetMinutes * 60 * 1000,
|
||||
)
|
||||
);
|
||||
const timestamp = sessionTimestamp
|
||||
.toISOString()
|
||||
.slice(0, 16)
|
||||
.replace(/:/g, '-');
|
||||
@@ -89,8 +311,9 @@ async function seedSessions(
|
||||
sessionId: session.sessionId,
|
||||
projectHash: getProjectHash(projectRoot),
|
||||
summary: session.summary,
|
||||
memoryScratchpad: session.memoryScratchpad,
|
||||
startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
|
||||
lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(),
|
||||
lastUpdated: sessionTimestamp.toISOString(),
|
||||
messages: buildMessages(session.userTurns),
|
||||
};
|
||||
|
||||
@@ -101,10 +324,9 @@ async function seedSessions(
|
||||
}
|
||||
}
|
||||
|
||||
async function runExtractionAndReadState(config: Config): Promise<{
|
||||
state: { runs: Array<{ sessionIds: string[]; skillsCreated: string[] }> };
|
||||
skillsDir: string;
|
||||
}> {
|
||||
async function runExtractionAndReadState(
|
||||
config: Config,
|
||||
): Promise<ExtractionOutcome> {
|
||||
await startMemoryService(config);
|
||||
|
||||
const memoryDir = config.storage.getProjectMemoryTempDir();
|
||||
@@ -113,7 +335,15 @@ async function runExtractionAndReadState(config: Config): Promise<{
|
||||
|
||||
const raw = await fsp.readFile(statePath, 'utf-8');
|
||||
const state = JSON.parse(raw) as {
|
||||
runs?: Array<{ sessionIds?: string[]; skillsCreated?: string[] }>;
|
||||
runs?: Array<{
|
||||
sessionIds?: string[];
|
||||
skillsCreated?: string[];
|
||||
candidateSessions?: SessionVersion[];
|
||||
processedSessions?: SessionVersion[];
|
||||
turnCount?: number;
|
||||
durationMs?: number;
|
||||
terminateReason?: string;
|
||||
}>;
|
||||
};
|
||||
if (!Array.isArray(state.runs) || state.runs.length === 0) {
|
||||
throw new Error('Skill extraction finished without writing any run state');
|
||||
@@ -126,27 +356,292 @@ async function runExtractionAndReadState(config: Config): Promise<{
|
||||
skillsCreated: Array.isArray(run.skillsCreated)
|
||||
? run.skillsCreated
|
||||
: [],
|
||||
candidateSessions: Array.isArray(run.candidateSessions)
|
||||
? run.candidateSessions
|
||||
: [],
|
||||
processedSessions: Array.isArray(run.processedSessions)
|
||||
? run.processedSessions
|
||||
: [],
|
||||
turnCount:
|
||||
typeof run.turnCount === 'number' ? run.turnCount : undefined,
|
||||
durationMs:
|
||||
typeof run.durationMs === 'number' ? run.durationMs : undefined,
|
||||
terminateReason:
|
||||
typeof run.terminateReason === 'string'
|
||||
? run.terminateReason
|
||||
: undefined,
|
||||
})),
|
||||
},
|
||||
skillsDir,
|
||||
skillBodies: await readSkillBodies(skillsDir),
|
||||
};
|
||||
}
|
||||
|
||||
async function summarizeScratchpadRun(
|
||||
outcome: ExtractionOutcome,
|
||||
run: ExtractionRunSnapshot,
|
||||
scenario: ReturnType<typeof createWorkflowComparisonSessions>,
|
||||
): Promise<ScratchpadRunMetrics> {
|
||||
const relevantReads = countMatchingIds(
|
||||
run.processedSessions,
|
||||
scenario.relevantSessionIds,
|
||||
);
|
||||
const distractorReads = countMatchingIds(
|
||||
run.processedSessions,
|
||||
scenario.distractorSessionIds,
|
||||
);
|
||||
const totalReads = run.processedSessions.length;
|
||||
const quality = scoreSkillQuality(
|
||||
outcome.skillBodies,
|
||||
SETTINGS_SKILL_QUALITY_SIGNALS,
|
||||
);
|
||||
|
||||
return {
|
||||
turnCount: run.turnCount ?? null,
|
||||
durationMs: run.durationMs ?? null,
|
||||
terminateReason: run.terminateReason ?? null,
|
||||
skillsCreated: run.skillsCreated.length,
|
||||
candidateSessions: run.candidateSessions.length,
|
||||
processedSessions: totalReads,
|
||||
relevantReads,
|
||||
distractorReads,
|
||||
totalReads,
|
||||
recall: relevantReads / scenario.relevantSessionIds.length,
|
||||
precision: totalReads === 0 ? 0 : relevantReads / totalReads,
|
||||
signalScore: relevantReads - distractorReads,
|
||||
skillQualityScore: quality.score,
|
||||
skillQualityMax: quality.maxScore,
|
||||
skillQualityRatio:
|
||||
quality.maxScore === 0 ? 0 : quality.score / quality.maxScore,
|
||||
missingQualitySignals: quality.missing,
|
||||
};
|
||||
}
|
||||
|
||||
function averageScratchpadRuns(
|
||||
runs: ScratchpadRunMetrics[],
|
||||
): ScratchpadStatsAggregate {
|
||||
return {
|
||||
turnCountAvg: roundStat(averageNullable(runs.map((run) => run.turnCount))),
|
||||
durationMsAvg: roundStat(
|
||||
averageNullable(runs.map((run) => run.durationMs)),
|
||||
),
|
||||
recallAvg: roundStat(average(runs.map((run) => run.recall))) ?? 0,
|
||||
precisionAvg: roundStat(average(runs.map((run) => run.precision))) ?? 0,
|
||||
signalScoreAvg: roundStat(average(runs.map((run) => run.signalScore))) ?? 0,
|
||||
relevantReadsAvg:
|
||||
roundStat(average(runs.map((run) => run.relevantReads))) ?? 0,
|
||||
distractorReadsAvg:
|
||||
roundStat(average(runs.map((run) => run.distractorReads))) ?? 0,
|
||||
skillsCreatedAvg:
|
||||
roundStat(average(runs.map((run) => run.skillsCreated))) ?? 0,
|
||||
skillQualityScoreAvg:
|
||||
roundStat(average(runs.map((run) => run.skillQualityScore))) ?? 0,
|
||||
skillQualityRatioAvg:
|
||||
roundStat(average(runs.map((run) => run.skillQualityRatio))) ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
function diffScratchpadAggregates(
|
||||
baseline: ScratchpadStatsAggregate,
|
||||
enhanced: ScratchpadStatsAggregate,
|
||||
): ScratchpadStatsAggregate {
|
||||
return {
|
||||
turnCountAvg:
|
||||
baseline.turnCountAvg === null || enhanced.turnCountAvg === null
|
||||
? null
|
||||
: roundStat(enhanced.turnCountAvg - baseline.turnCountAvg),
|
||||
durationMsAvg:
|
||||
baseline.durationMsAvg === null || enhanced.durationMsAvg === null
|
||||
? null
|
||||
: roundStat(enhanced.durationMsAvg - baseline.durationMsAvg),
|
||||
recallAvg: roundStat(enhanced.recallAvg - baseline.recallAvg) ?? 0,
|
||||
precisionAvg: roundStat(enhanced.precisionAvg - baseline.precisionAvg) ?? 0,
|
||||
signalScoreAvg:
|
||||
roundStat(enhanced.signalScoreAvg - baseline.signalScoreAvg) ?? 0,
|
||||
relevantReadsAvg:
|
||||
roundStat(enhanced.relevantReadsAvg - baseline.relevantReadsAvg) ?? 0,
|
||||
distractorReadsAvg:
|
||||
roundStat(enhanced.distractorReadsAvg - baseline.distractorReadsAvg) ?? 0,
|
||||
skillsCreatedAvg:
|
||||
roundStat(enhanced.skillsCreatedAvg - baseline.skillsCreatedAvg) ?? 0,
|
||||
skillQualityScoreAvg:
|
||||
roundStat(
|
||||
enhanced.skillQualityScoreAvg - baseline.skillQualityScoreAvg,
|
||||
) ?? 0,
|
||||
skillQualityRatioAvg:
|
||||
roundStat(
|
||||
enhanced.skillQualityRatioAvg - baseline.skillQualityRatioAvg,
|
||||
) ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
async function runScenarioWithFreshRig(
|
||||
sessions: SeedSession[],
|
||||
): Promise<ExtractionOutcome> {
|
||||
const rig = new ComponentRig({
|
||||
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
|
||||
});
|
||||
try {
|
||||
await rig.initialize();
|
||||
await prepareWorkspace(rig.testDir, rig.testDir, WORKSPACE_FILES);
|
||||
await seedSessions(rig.config!, sessions);
|
||||
return await runExtractionAndReadState(rig.config!);
|
||||
} finally {
|
||||
await rig.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
async function runScratchpadStatsTrial(
|
||||
trial: number,
|
||||
): Promise<ScratchpadStatsTrial> {
|
||||
const baselineScenario = createWorkflowComparisonSessions(false);
|
||||
const enhancedScenario = createWorkflowComparisonSessions(true);
|
||||
|
||||
const baselineOutcome = await runScenarioWithFreshRig(
|
||||
baselineScenario.sessions,
|
||||
);
|
||||
const enhancedOutcome = await runScenarioWithFreshRig(
|
||||
enhancedScenario.sessions,
|
||||
);
|
||||
|
||||
const baselineRun = baselineOutcome.state.runs.at(-1);
|
||||
const enhancedRun = enhancedOutcome.state.runs.at(-1);
|
||||
if (!baselineRun || !enhancedRun) {
|
||||
throw new Error('Expected both baseline and scratchpad runs to exist');
|
||||
}
|
||||
|
||||
expectSuccessfulExtractionRun(baselineRun);
|
||||
expectSuccessfulExtractionRun(enhancedRun);
|
||||
|
||||
return {
|
||||
trial,
|
||||
baseline: await summarizeScratchpadRun(
|
||||
baselineOutcome,
|
||||
baselineRun,
|
||||
baselineScenario,
|
||||
),
|
||||
enhanced: await summarizeScratchpadRun(
|
||||
enhancedOutcome,
|
||||
enhancedRun,
|
||||
enhancedScenario,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
async function runScratchpadStatsReport(
|
||||
trials: number,
|
||||
): Promise<ScratchpadStatsReport> {
|
||||
const results: ScratchpadStatsTrial[] = [];
|
||||
|
||||
for (let trial = 1; trial <= trials; trial++) {
|
||||
results.push(await runScratchpadStatsTrial(trial));
|
||||
}
|
||||
|
||||
const baseline = averageScratchpadRuns(
|
||||
results.map((result) => result.baseline),
|
||||
);
|
||||
const enhanced = averageScratchpadRuns(
|
||||
results.map((result) => result.enhanced),
|
||||
);
|
||||
|
||||
return {
|
||||
generatedAt: new Date().toISOString(),
|
||||
trials,
|
||||
aggregate: {
|
||||
baseline,
|
||||
enhanced,
|
||||
},
|
||||
deltas: diffScratchpadAggregates(baseline, enhanced),
|
||||
results,
|
||||
};
|
||||
}
|
||||
|
||||
async function writeScratchpadStatsReport(
|
||||
report: ScratchpadStatsReport,
|
||||
): Promise<string> {
|
||||
const outputPath = path.resolve(
|
||||
process.cwd(),
|
||||
'evals/logs/skill_extraction_scratchpad_stats.json',
|
||||
);
|
||||
await fsp.mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await fsp.writeFile(outputPath, `${JSON.stringify(report, null, 2)}\n`);
|
||||
return outputPath;
|
||||
}
|
||||
|
||||
async function readSkillBodies(skillsDir: string): Promise<string[]> {
|
||||
const bodies: string[] = [];
|
||||
|
||||
try {
|
||||
const entries = await fsp.readdir(skillsDir, { withFileTypes: true });
|
||||
const skillDirs = entries.filter((entry) => entry.isDirectory());
|
||||
const bodies = await Promise.all(
|
||||
skillDirs.map((entry) =>
|
||||
fsp.readFile(path.join(skillsDir, entry.name, 'SKILL.md'), 'utf-8'),
|
||||
),
|
||||
);
|
||||
for (const entry of entries) {
|
||||
if (!entry.isDirectory()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
bodies.push(
|
||||
await fsp.readFile(
|
||||
path.join(skillsDir, entry.name, 'SKILL.md'),
|
||||
'utf-8',
|
||||
),
|
||||
);
|
||||
} catch {
|
||||
// Ignore incomplete skill directories so one bad artifact does not hide
|
||||
// valid skills created in the same eval run.
|
||||
}
|
||||
}
|
||||
return bodies;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function expectSuccessfulExtractionRun(run: ExtractionRunSnapshot): void {
|
||||
expect(run.turnCount).toBeGreaterThan(0);
|
||||
expect(run.turnCount).toBeLessThanOrEqual(30);
|
||||
expect(run.durationMs).toBeGreaterThan(0);
|
||||
expect(run.terminateReason).toBe('GOAL');
|
||||
}
|
||||
|
||||
function scoreSkillQuality(
|
||||
skillBodies: string[],
|
||||
signals: SkillQualitySignal[],
|
||||
): { score: number; maxScore: number; missing: string[] } {
|
||||
const combined = skillBodies.join('\n\n');
|
||||
const matched = signals.filter((signal) => signal.pattern.test(combined));
|
||||
|
||||
return {
|
||||
score: matched.length,
|
||||
maxScore: signals.length,
|
||||
missing: signals
|
||||
.filter((signal) => !signal.pattern.test(combined))
|
||||
.map((signal) => signal.label),
|
||||
};
|
||||
}
|
||||
|
||||
const SETTINGS_SKILL_QUALITY_SIGNALS: SkillQualitySignal[] = [
|
||||
{ label: 'predocs command', pattern: /npm run predocs:settings/i },
|
||||
{ label: 'schema command', pattern: /npm run schema:settings/i },
|
||||
{ label: 'docs command', pattern: /npm run docs:settings/i },
|
||||
{ label: 'verification guidance', pattern: /verif(?:y|ication)/i },
|
||||
{
|
||||
label: 'generated docs warning or ordering constraint',
|
||||
pattern:
|
||||
/do not hand-edit|manual edits|exact command order|preserve.*order/i,
|
||||
},
|
||||
];
|
||||
|
||||
const DB_MIGRATION_SKILL_QUALITY_SIGNALS: SkillQualitySignal[] = [
|
||||
{ label: 'db check command', pattern: /npm run db:check/i },
|
||||
{ label: 'db migrate command', pattern: /npm run db:migrate/i },
|
||||
{ label: 'db validate command', pattern: /npm run db:validate/i },
|
||||
{ label: 'rollback guidance', pattern: /npm run db:rollback|rollback/i },
|
||||
{
|
||||
label: 'ordering constraint',
|
||||
pattern: /check.*migrate.*validate|ordering is critical|mandatory/i,
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Shared configOverrides for all skill extraction component evals.
|
||||
* - experimentalAutoMemory: enables the Auto Memory skill extraction pipeline.
|
||||
@@ -158,6 +653,16 @@ const EXTRACTION_CONFIG_OVERRIDES = {
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
};
|
||||
|
||||
function parseScratchpadStatsTrials(): number {
|
||||
const configured = Number.parseInt(
|
||||
process.env['SCRATCHPAD_STATS_TRIALS'] ?? '8',
|
||||
10,
|
||||
);
|
||||
return Number.isFinite(configured) && configured > 0 ? configured : 8;
|
||||
}
|
||||
|
||||
const SCRATCHPAD_STATS_TRIALS = parseScratchpadStatsTrials();
|
||||
|
||||
describe('Skill Extraction', () => {
|
||||
componentEvalTest('USUALLY_PASSES', {
|
||||
suiteName: 'skill-extraction',
|
||||
@@ -264,15 +769,24 @@ describe('Skill Extraction', () => {
|
||||
const { state, skillsDir } = await runExtractionAndReadState(config);
|
||||
const skillBodies = await readSkillBodies(skillsDir);
|
||||
const combinedSkills = skillBodies.join('\n\n');
|
||||
const quality = scoreSkillQuality(
|
||||
skillBodies,
|
||||
SETTINGS_SKILL_QUALITY_SIGNALS,
|
||||
);
|
||||
|
||||
expect(state.runs).toHaveLength(1);
|
||||
expect(state.runs[0].sessionIds).toHaveLength(2);
|
||||
expectSuccessfulExtractionRun(state.runs[0]);
|
||||
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
|
||||
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
|
||||
expect(combinedSkills).toContain('npm run predocs:settings');
|
||||
expect(combinedSkills).toContain('npm run schema:settings');
|
||||
expect(combinedSkills).toContain('npm run docs:settings');
|
||||
expect(combinedSkills).toMatch(/Verification/i);
|
||||
expect(combinedSkills).toMatch(/verif(?:y|ication)/i);
|
||||
expect(
|
||||
quality.score,
|
||||
`missing quality signals: ${quality.missing.join(', ')}`,
|
||||
).toBeGreaterThanOrEqual(4);
|
||||
|
||||
// Verify the extraction agent activated skill-creator for design guidance.
|
||||
expect(config.getSkillManager().isSkillActive('skill-creator')).toBe(
|
||||
@@ -281,6 +795,96 @@ describe('Skill Extraction', () => {
|
||||
},
|
||||
});
|
||||
|
||||
componentEvalTest('USUALLY_PASSES', {
|
||||
suiteName: 'skill-extraction',
|
||||
suiteType: 'component-level',
|
||||
name: 'memory scratchpad improves repeated-workflow recall versus summary-only index',
|
||||
files: WORKSPACE_FILES,
|
||||
timeout: 360000,
|
||||
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
|
||||
assert: async () => {
|
||||
const baselineScenario = createWorkflowComparisonSessions(false);
|
||||
const enhancedScenario = createWorkflowComparisonSessions(true);
|
||||
|
||||
const baselineOutcome = await runScenarioWithFreshRig(
|
||||
baselineScenario.sessions,
|
||||
);
|
||||
const enhancedOutcome = await runScenarioWithFreshRig(
|
||||
enhancedScenario.sessions,
|
||||
);
|
||||
|
||||
const baselineRun = baselineOutcome.state.runs.at(-1);
|
||||
const enhancedRun = enhancedOutcome.state.runs.at(-1);
|
||||
if (!baselineRun || !enhancedRun) {
|
||||
throw new Error('Expected both baseline and scratchpad runs to exist');
|
||||
}
|
||||
|
||||
expectSuccessfulExtractionRun(baselineRun);
|
||||
expectSuccessfulExtractionRun(enhancedRun);
|
||||
|
||||
const baselineRelevantReads = countMatchingIds(
|
||||
baselineRun.processedSessions,
|
||||
baselineScenario.relevantSessionIds,
|
||||
);
|
||||
const enhancedRelevantReads = countMatchingIds(
|
||||
enhancedRun.processedSessions,
|
||||
enhancedScenario.relevantSessionIds,
|
||||
);
|
||||
const baselineDistractorReads = countMatchingIds(
|
||||
baselineRun.processedSessions,
|
||||
baselineScenario.distractorSessionIds,
|
||||
);
|
||||
const enhancedDistractorReads = countMatchingIds(
|
||||
enhancedRun.processedSessions,
|
||||
enhancedScenario.distractorSessionIds,
|
||||
);
|
||||
const baselineSignalScore =
|
||||
baselineRelevantReads - baselineDistractorReads;
|
||||
const enhancedSignalScore =
|
||||
enhancedRelevantReads - enhancedDistractorReads;
|
||||
|
||||
expect(enhancedRun.candidateSessions).toHaveLength(
|
||||
enhancedScenario.sessions.length,
|
||||
);
|
||||
expect(enhancedRelevantReads).toBeGreaterThanOrEqual(2);
|
||||
expect(enhancedRelevantReads).toBeGreaterThanOrEqual(
|
||||
baselineRelevantReads,
|
||||
);
|
||||
expect(enhancedDistractorReads).toBeLessThanOrEqual(
|
||||
baselineDistractorReads,
|
||||
);
|
||||
expect(enhancedSignalScore).toBeGreaterThan(baselineSignalScore);
|
||||
},
|
||||
});
|
||||
|
||||
if (process.env['RUN_SCRATCHPAD_STATS'] === '1') {
|
||||
componentEvalTest('USUALLY_PASSES', {
|
||||
suiteName: 'skill-extraction',
|
||||
suiteType: 'component-level',
|
||||
name: 'reports memory scratchpad retrieval statistics',
|
||||
timeout: Math.max(360000, SCRATCHPAD_STATS_TRIALS * 150000),
|
||||
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
|
||||
assert: async () => {
|
||||
const report = await runScratchpadStatsReport(SCRATCHPAD_STATS_TRIALS);
|
||||
const outputPath = await writeScratchpadStatsReport(report);
|
||||
|
||||
console.info(
|
||||
`Wrote scratchpad stats report to ${outputPath}\n${JSON.stringify(
|
||||
report.aggregate,
|
||||
null,
|
||||
2,
|
||||
)}`,
|
||||
);
|
||||
|
||||
expect(report.results).toHaveLength(SCRATCHPAD_STATS_TRIALS);
|
||||
expect(report.aggregate.baseline.recallAvg).toBeGreaterThan(0);
|
||||
expect(report.aggregate.enhanced.recallAvg).toBeGreaterThan(0);
|
||||
},
|
||||
});
|
||||
} else {
|
||||
it.skip('reports memory scratchpad retrieval statistics', () => {});
|
||||
}
|
||||
|
||||
componentEvalTest('USUALLY_PASSES', {
|
||||
suiteName: 'skill-extraction',
|
||||
suiteType: 'component-level',
|
||||
@@ -330,15 +934,24 @@ describe('Skill Extraction', () => {
|
||||
const { state, skillsDir } = await runExtractionAndReadState(config);
|
||||
const skillBodies = await readSkillBodies(skillsDir);
|
||||
const combinedSkills = skillBodies.join('\n\n');
|
||||
const quality = scoreSkillQuality(
|
||||
skillBodies,
|
||||
DB_MIGRATION_SKILL_QUALITY_SIGNALS,
|
||||
);
|
||||
|
||||
expect(state.runs).toHaveLength(1);
|
||||
expect(state.runs[0].sessionIds).toHaveLength(2);
|
||||
expectSuccessfulExtractionRun(state.runs[0]);
|
||||
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
|
||||
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
|
||||
expect(combinedSkills).toContain('npm run db:check');
|
||||
expect(combinedSkills).toContain('npm run db:migrate');
|
||||
expect(combinedSkills).toContain('npm run db:validate');
|
||||
expect(combinedSkills).toMatch(/rollback/i);
|
||||
expect(
|
||||
quality.score,
|
||||
`missing quality signals: ${quality.missing.join(', ')}`,
|
||||
).toBeGreaterThanOrEqual(4);
|
||||
|
||||
// Verify the extraction agent activated skill-creator for design guidance.
|
||||
expect(config.getSkillManager().isSkillActive('skill-creator')).toBe(
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
export function countMatchingIds<T extends { sessionId: string }>(
|
||||
items: T[],
|
||||
expectedIds: string[],
|
||||
): number {
|
||||
const expected = new Set(expectedIds);
|
||||
return items.filter((item) => expected.has(item.sessionId)).length;
|
||||
}
|
||||
|
||||
export function roundStat(value: number | null): number | null {
|
||||
return value === null ? null : Number(value.toFixed(4));
|
||||
}
|
||||
|
||||
export function average(values: number[]): number {
|
||||
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
||||
}
|
||||
|
||||
export function averageNullable(values: Array<number | null>): number | null {
|
||||
const numericValues = values.filter((value) => value !== null);
|
||||
return numericValues.length === 0 ? null : average(numericValues);
|
||||
}
|
||||
@@ -779,6 +779,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
return {
|
||||
result: finalResult || 'Task completed.',
|
||||
terminate_reason: terminateReason,
|
||||
turn_count: turnCounter,
|
||||
duration_ms: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -786,6 +788,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
result:
|
||||
finalResult || 'Agent execution was terminated before completion.',
|
||||
terminate_reason: terminateReason,
|
||||
turn_count: turnCounter,
|
||||
duration_ms: Date.now() - startTime,
|
||||
};
|
||||
} catch (error) {
|
||||
// Check if the error is an AbortError caused by our internal timeout.
|
||||
@@ -826,6 +830,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
return {
|
||||
result: finalResult,
|
||||
terminate_reason: terminateReason,
|
||||
turn_count: turnCounter,
|
||||
duration_ms: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -840,6 +846,8 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
return {
|
||||
result: finalResult,
|
||||
terminate_reason: terminateReason,
|
||||
turn_count: turnCounter,
|
||||
duration_ms: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -74,12 +74,14 @@ describe('SkillExtractionAgent', () => {
|
||||
|
||||
expect(query).toContain(existingSkillsSummary);
|
||||
expect(query).toContain(sessionIndex);
|
||||
expect(query).toContain('optional workflow hint');
|
||||
expect(query).toContain(
|
||||
'The summary is a user-intent summary, not a workflow summary.',
|
||||
'workflow hints alone is never enough evidence for a reusable skill.',
|
||||
);
|
||||
expect(query).toContain(
|
||||
'The session summaries describe user intent, not workflow details.',
|
||||
'Session summaries describe user intent; optional workflow hints describe likely procedural traces.',
|
||||
);
|
||||
expect(query).toContain('Use workflow hints for routing');
|
||||
expect(query).toContain(
|
||||
'Only write a skill if the evidence shows a durable, recurring workflow',
|
||||
);
|
||||
|
||||
@@ -303,10 +303,11 @@ export const SkillExtractionAgent = (
|
||||
'# Session Index',
|
||||
'',
|
||||
'Below is an index of past conversation sessions. Each line shows:',
|
||||
'[NEW] or [old] status, a 1-line summary, message count, and the file path.',
|
||||
'[NEW] or [old] status, a 1-line user-intent summary, optional workflow hint, message count, and the file path.',
|
||||
'',
|
||||
'The summary is a user-intent summary, not a workflow summary.',
|
||||
'Matching summary text alone is never enough evidence for a reusable skill.',
|
||||
'Some lines may include "| workflow: ..."; this is a compact workflow hint from session metadata.',
|
||||
'Use workflow hints to prioritize which sessions to read and to group likely recurring workflows.',
|
||||
'Matching summary text or workflow hints alone is never enough evidence for a reusable skill.',
|
||||
'',
|
||||
'[NEW] = not yet processed for skill extraction (focus on these)',
|
||||
'[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)',
|
||||
@@ -326,7 +327,7 @@ export const SkillExtractionAgent = (
|
||||
|
||||
return {
|
||||
systemPrompt: buildSystemPrompt(skillsDir),
|
||||
query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
|
||||
query: `${initialContext}\n\nAnalyze the session index above. Session summaries describe user intent; optional workflow hints describe likely procedural traces. Use workflow hints for routing, then read sessions that suggest repeated workflows using read_file to verify recurrence from transcript evidence. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
|
||||
};
|
||||
},
|
||||
runConfig: {
|
||||
|
||||
@@ -36,6 +36,8 @@ export enum AgentTerminateMode {
|
||||
export interface OutputObject {
|
||||
result: string;
|
||||
terminate_reason: AgentTerminateMode;
|
||||
turn_count?: number;
|
||||
duration_ms?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -112,6 +112,7 @@ export async function loadConversationRecord(
|
||||
userMessageCount?: number;
|
||||
firstUserMessage?: string;
|
||||
hasUserOrAssistantMessage?: boolean;
|
||||
memoryScratchpadIsStale?: boolean;
|
||||
})
|
||||
| null
|
||||
> {
|
||||
@@ -133,6 +134,8 @@ export async function loadConversationRecord(
|
||||
string,
|
||||
{ isUser: boolean; isUserOrAssistant: boolean }
|
||||
>();
|
||||
let isTrackingMemoryScratchpadFreshness = false;
|
||||
let memoryScratchpadIsStale = false;
|
||||
let firstUserMessageStr: string | undefined;
|
||||
|
||||
for await (const line of rl) {
|
||||
@@ -140,6 +143,9 @@ export async function loadConversationRecord(
|
||||
try {
|
||||
const record = JSON.parse(line) as unknown;
|
||||
if (isRewindRecord(record)) {
|
||||
if (isTrackingMemoryScratchpadFreshness) {
|
||||
memoryScratchpadIsStale = true;
|
||||
}
|
||||
const rewindId = record.$rewindTo;
|
||||
if (options?.metadataOnly) {
|
||||
const idx = messageIds.indexOf(rewindId);
|
||||
@@ -168,6 +174,9 @@ export async function loadConversationRecord(
|
||||
}
|
||||
}
|
||||
} else if (isMessageRecord(record)) {
|
||||
if (isTrackingMemoryScratchpadFreshness) {
|
||||
memoryScratchpadIsStale = true;
|
||||
}
|
||||
const id = record.id;
|
||||
const isUser = hasProperty(record, 'type') && record.type === 'user';
|
||||
const isUserOrAssistant =
|
||||
@@ -206,6 +215,12 @@ export async function loadConversationRecord(
|
||||
}
|
||||
}
|
||||
} else if (isMetadataUpdateRecord(record)) {
|
||||
if (hasProperty(record.$set, 'memoryScratchpad')) {
|
||||
isTrackingMemoryScratchpadFreshness = Boolean(
|
||||
record.$set.memoryScratchpad,
|
||||
);
|
||||
memoryScratchpadIsStale = false;
|
||||
}
|
||||
// Metadata update
|
||||
metadata = {
|
||||
...metadata,
|
||||
@@ -257,6 +272,7 @@ export async function loadConversationRecord(
|
||||
startTime: metadata.startTime || new Date().toISOString(),
|
||||
lastUpdated: metadata.lastUpdated || new Date().toISOString(),
|
||||
summary: metadata.summary,
|
||||
memoryScratchpad: metadata.memoryScratchpad,
|
||||
directories: metadata.directories,
|
||||
kind: metadata.kind,
|
||||
messages: options?.metadataOnly ? [] : loadedMessages,
|
||||
@@ -267,6 +283,9 @@ export async function loadConversationRecord(
|
||||
options?.metadataOnly && metadataMessages.length > 0
|
||||
? metadataMessages.filter((m) => m.type === 'user').length
|
||||
: userMessageCount,
|
||||
memoryScratchpadIsStale: isTrackingMemoryScratchpadFreshness
|
||||
? memoryScratchpadIsStale
|
||||
: undefined,
|
||||
firstUserMessage: fallbackFirstUserMessage,
|
||||
hasUserOrAssistantMessage:
|
||||
options?.metadataOnly && metadataMessages.length > 0
|
||||
@@ -332,6 +351,13 @@ export class ChatRecordingService {
|
||||
for (const msg of this.cachedConversation.messages) {
|
||||
this.appendRecord(msg);
|
||||
}
|
||||
if (this.cachedConversation.memoryScratchpad) {
|
||||
this.appendRecord({
|
||||
$set: {
|
||||
memoryScratchpad: this.cachedConversation.memoryScratchpad,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Update the session ID in the existing file
|
||||
|
||||
@@ -25,6 +25,19 @@ export interface TokensSummary {
|
||||
total: number; // totalTokenCount
|
||||
}
|
||||
|
||||
export type MemoryValidationStatus = 'passed' | 'failed' | 'unknown';
|
||||
|
||||
/**
|
||||
* Lightweight workflow metadata attached to a session for memory extraction.
|
||||
*/
|
||||
export interface MemoryScratchpad {
|
||||
version: 1;
|
||||
workflowSummary?: string;
|
||||
toolSequence?: string[];
|
||||
touchedPaths?: string[];
|
||||
validationStatus?: MemoryValidationStatus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Base fields common to all messages.
|
||||
*/
|
||||
@@ -83,6 +96,7 @@ export interface ConversationRecord {
|
||||
lastUpdated: string;
|
||||
messages: MessageRecord[];
|
||||
summary?: string;
|
||||
memoryScratchpad?: MemoryScratchpad;
|
||||
/** Workspace directories added during the session via /dir add */
|
||||
directories?: string[];
|
||||
/** The kind of conversation (main agent or subagent) */
|
||||
@@ -120,6 +134,7 @@ export interface PartialMetadataRecord {
|
||||
startTime?: string;
|
||||
lastUpdated?: string;
|
||||
summary?: string;
|
||||
memoryScratchpad?: MemoryScratchpad;
|
||||
directories?: string[];
|
||||
kind?: 'main' | 'subagent';
|
||||
}
|
||||
|
||||
@@ -127,6 +127,7 @@ async function writeConversationJsonl(
|
||||
startTime: conversation.startTime,
|
||||
lastUpdated: conversation.lastUpdated,
|
||||
summary: conversation.summary,
|
||||
memoryScratchpad: conversation.memoryScratchpad,
|
||||
directories: conversation.directories,
|
||||
kind: conversation.kind,
|
||||
};
|
||||
@@ -565,7 +566,7 @@ describe('memoryService', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('records only sessions whose read_file calls succeed as processed', async () => {
|
||||
it('records only sessions whose read_file completed successfully as processed', async () => {
|
||||
const { startMemoryService, readExtractionState } = await import(
|
||||
'./memoryService.js'
|
||||
);
|
||||
@@ -595,17 +596,69 @@ describe('memoryService', () => {
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-01T01:00:00Z',
|
||||
});
|
||||
const failedConversation = createConversation({
|
||||
sessionId: 'failed-session',
|
||||
summary: 'read_file errors on this one',
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-03T01:00:00Z',
|
||||
});
|
||||
const rejectedConversation = createConversation({
|
||||
sessionId: 'rejected-session',
|
||||
summary: 'read_file was rejected for this one',
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-02T02:00:00Z',
|
||||
});
|
||||
const mismatchedEndConversation = createConversation({
|
||||
sessionId: 'mismatched-end-session',
|
||||
summary: 'read_file start with a mismatched tool end',
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-02T03:00:00Z',
|
||||
});
|
||||
const mismatchedErrorConversation = createConversation({
|
||||
sessionId: 'mismatched-error-session',
|
||||
summary: 'read_file recovers after a mismatched tool error',
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-02T04:00:00Z',
|
||||
});
|
||||
|
||||
const openedPath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-02T00-00-opened.jsonl`,
|
||||
);
|
||||
const skippedPath = path.join(
|
||||
const failedPath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
|
||||
`${SESSION_FILE_PREFIX}2025-01-03T00-00-failed.jsonl`,
|
||||
);
|
||||
const rejectedPath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-02T00-00-rejected.jsonl`,
|
||||
);
|
||||
const mismatchedEndPath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-end.jsonl`,
|
||||
);
|
||||
const mismatchedErrorPath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-02T00-00-mismatched-error.jsonl`,
|
||||
);
|
||||
await writeConversationJsonl(openedPath, openedConversation);
|
||||
await writeConversationJsonl(skippedPath, skippedConversation);
|
||||
await writeConversationJsonl(failedPath, failedConversation);
|
||||
await writeConversationJsonl(rejectedPath, rejectedConversation);
|
||||
await writeConversationJsonl(
|
||||
mismatchedEndPath,
|
||||
mismatchedEndConversation,
|
||||
);
|
||||
await writeConversationJsonl(
|
||||
mismatchedErrorPath,
|
||||
mismatchedErrorConversation,
|
||||
);
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-skipped.jsonl`,
|
||||
),
|
||||
skippedConversation,
|
||||
);
|
||||
|
||||
vi.mocked(LocalAgentExecutor.create).mockImplementationOnce(
|
||||
async (_definition, _context, onActivity) =>
|
||||
@@ -624,21 +677,21 @@ describe('memoryService', () => {
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_START',
|
||||
type: 'TOOL_CALL_END',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
args: { file_path: skippedPath },
|
||||
callId: 'call-skipped',
|
||||
id: 'call-opened',
|
||||
data: {},
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'ERROR',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
callId: 'call-skipped',
|
||||
error: 'access denied',
|
||||
args: { file_path: failedPath },
|
||||
callId: 'call-failed',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
@@ -647,8 +700,28 @@ describe('memoryService', () => {
|
||||
type: 'TOOL_CALL_END',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
id: 'call-opened',
|
||||
data: { content: 'Read this one' },
|
||||
id: 'call-failed',
|
||||
data: { isError: true },
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
args: { file_path: rejectedPath },
|
||||
callId: 'call-rejected',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
callId: 'call-rejected',
|
||||
error: 'User rejected this operation.',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
@@ -661,6 +734,56 @@ describe('memoryService', () => {
|
||||
callId: 'call-unrelated',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
args: { file_path: mismatchedEndPath },
|
||||
callId: 'call-mismatched-end',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_END',
|
||||
data: {
|
||||
name: 'write_file',
|
||||
id: 'call-mismatched-end',
|
||||
data: {},
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
args: { file_path: mismatchedErrorPath },
|
||||
callId: 'call-mismatched-error',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
name: 'write_file',
|
||||
callId: 'call-mismatched-error',
|
||||
error: 'Different tool failed.',
|
||||
},
|
||||
});
|
||||
onActivity?.({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'Skill Extractor',
|
||||
type: 'TOOL_CALL_END',
|
||||
data: {
|
||||
name: 'read_file',
|
||||
id: 'call-mismatched-error',
|
||||
data: {},
|
||||
},
|
||||
});
|
||||
return undefined;
|
||||
}),
|
||||
}) as never,
|
||||
@@ -691,6 +814,22 @@ describe('memoryService', () => {
|
||||
);
|
||||
expect(state.runs).toHaveLength(1);
|
||||
expect(state.runs[0].candidateSessions).toEqual([
|
||||
{
|
||||
sessionId: 'failed-session',
|
||||
lastUpdated: '2025-01-03T01:00:00Z',
|
||||
},
|
||||
{
|
||||
sessionId: 'mismatched-error-session',
|
||||
lastUpdated: '2025-01-02T04:00:00Z',
|
||||
},
|
||||
{
|
||||
sessionId: 'mismatched-end-session',
|
||||
lastUpdated: '2025-01-02T03:00:00Z',
|
||||
},
|
||||
{
|
||||
sessionId: 'rejected-session',
|
||||
lastUpdated: '2025-01-02T02:00:00Z',
|
||||
},
|
||||
{
|
||||
sessionId: 'opened-session',
|
||||
lastUpdated: '2025-01-02T01:00:00Z',
|
||||
@@ -701,12 +840,19 @@ describe('memoryService', () => {
|
||||
},
|
||||
]);
|
||||
expect(state.runs[0].processedSessions).toEqual([
|
||||
{
|
||||
sessionId: 'mismatched-error-session',
|
||||
lastUpdated: '2025-01-02T04:00:00Z',
|
||||
},
|
||||
{
|
||||
sessionId: 'opened-session',
|
||||
lastUpdated: '2025-01-02T01:00:00Z',
|
||||
},
|
||||
]);
|
||||
expect(state.runs[0].sessionIds).toEqual(['opened-session']);
|
||||
expect(state.runs[0].sessionIds).toEqual([
|
||||
'mismatched-error-session',
|
||||
'opened-session',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -902,6 +1048,178 @@ describe('memoryService', () => {
|
||||
expect(result.sessionIndex).toContain(path.join(chatsDir, fileName));
|
||||
});
|
||||
|
||||
it('falls back to scratchpad workflow summary when summary is missing', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
const conversation = createConversation({
|
||||
sessionId: 'scratchpad-only',
|
||||
summary: undefined,
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary:
|
||||
'read_file -> edit | paths packages/core/src/services/memoryService.ts | validated',
|
||||
},
|
||||
messageCount: 20,
|
||||
});
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch01.jsonl`,
|
||||
),
|
||||
conversation,
|
||||
);
|
||||
|
||||
const result = await buildSessionIndex(chatsDir, { runs: [] });
|
||||
|
||||
expect(result.sessionIndex).toContain('read_file -> edit');
|
||||
expect(result.sessionIndex).not.toContain('(no summary)');
|
||||
});
|
||||
|
||||
it('ignores malformed scratchpad workflow summaries while indexing sessions', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
const malformedConversation = createConversation({
|
||||
sessionId: 'malformed-scratchpad',
|
||||
summary: undefined,
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary: 123,
|
||||
} as unknown as ConversationRecord['memoryScratchpad'],
|
||||
messageCount: 20,
|
||||
});
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-badpad.jsonl`,
|
||||
),
|
||||
malformedConversation,
|
||||
);
|
||||
|
||||
const validConversation = createConversation({
|
||||
sessionId: 'valid-session',
|
||||
summary: 'Still indexes other sessions',
|
||||
messageCount: 20,
|
||||
});
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-valid.jsonl`,
|
||||
),
|
||||
validConversation,
|
||||
);
|
||||
|
||||
const result = await buildSessionIndex(chatsDir, { runs: [] });
|
||||
|
||||
expect(result.sessionIndex).toContain('(no summary)');
|
||||
expect(result.sessionIndex).toContain('Still indexes other sessions');
|
||||
expect(result.sessionIndex).not.toContain('123');
|
||||
});
|
||||
|
||||
it('appends workflow summary when both summary and scratchpad are present', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
const conversation = createConversation({
|
||||
sessionId: 'summary-and-scratchpad',
|
||||
summary: 'Fix session scanning',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary:
|
||||
'read_file -> edit | paths packages/core/src/services/sessionSummaryUtils.ts',
|
||||
},
|
||||
messageCount: 20,
|
||||
});
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-scratch02.jsonl`,
|
||||
),
|
||||
conversation,
|
||||
);
|
||||
|
||||
const result = await buildSessionIndex(chatsDir, { runs: [] });
|
||||
|
||||
expect(result.sessionIndex).toContain('Fix session scanning | workflow:');
|
||||
expect(result.sessionIndex).toContain('sessionSummaryUtils.ts');
|
||||
});
|
||||
|
||||
it('omits stale scratchpad workflow summaries from resumed JSONL sessions', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
const conversation = createConversation({
|
||||
sessionId: 'stale-scratchpad',
|
||||
summary: 'Resume memory work',
|
||||
messageCount: 20,
|
||||
lastUpdated: '2025-01-01T01:00:00Z',
|
||||
});
|
||||
const filePath = path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-stale001.jsonl`,
|
||||
);
|
||||
await writeConversationJsonl(filePath, conversation);
|
||||
await fs.appendFile(
|
||||
filePath,
|
||||
`${JSON.stringify({
|
||||
$set: {
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary: 'stale_workflow | paths stale.ts',
|
||||
},
|
||||
},
|
||||
})}\n`,
|
||||
);
|
||||
await fs.appendFile(
|
||||
filePath,
|
||||
[
|
||||
JSON.stringify({
|
||||
id: 'resumed-user-message',
|
||||
timestamp: '2025-01-02T01:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Continue after the scratchpad was written' }],
|
||||
}),
|
||||
JSON.stringify({
|
||||
$set: { lastUpdated: '2025-01-02T01:00:01Z' },
|
||||
}),
|
||||
].join('\n') + '\n',
|
||||
);
|
||||
|
||||
const result = await buildSessionIndex(chatsDir, { runs: [] });
|
||||
|
||||
expect(result.sessionIndex).toContain('Resume memory work');
|
||||
expect(result.sessionIndex).not.toContain('stale_workflow');
|
||||
expect(result.sessionIndex).not.toContain('stale.ts');
|
||||
});
|
||||
|
||||
it('sanitizes shell command workflow summaries before indexing sessions', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
const conversation = createConversation({
|
||||
sessionId: 'raw-shell-scratchpad',
|
||||
summary: 'Investigate API migration',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary:
|
||||
'run_shell_command: curl https://api.example.com -H "Authorization: Bearer sk-secret-token" -> read_file | paths package.json',
|
||||
},
|
||||
messageCount: 20,
|
||||
});
|
||||
await writeConversationJsonl(
|
||||
path.join(
|
||||
chatsDir,
|
||||
`${SESSION_FILE_PREFIX}2025-01-01T00-00-shellraw.jsonl`,
|
||||
),
|
||||
conversation,
|
||||
);
|
||||
|
||||
const result = await buildSessionIndex(chatsDir, { runs: [] });
|
||||
|
||||
expect(result.sessionIndex).toContain(
|
||||
'workflow: run_shell_command: curl -> read_file | paths package.json',
|
||||
);
|
||||
expect(result.sessionIndex).not.toContain('Authorization');
|
||||
expect(result.sessionIndex).not.toContain('sk-secret-token');
|
||||
expect(result.sessionIndex).not.toContain('https://api.example.com');
|
||||
});
|
||||
|
||||
it('filters out subagent sessions', async () => {
|
||||
const { buildSessionIndex } = await import('./memoryService.js');
|
||||
|
||||
@@ -1176,6 +1494,9 @@ describe('memoryService', () => {
|
||||
},
|
||||
],
|
||||
skillsCreated: ['debug-helper', 'test-gen'],
|
||||
turnCount: 4,
|
||||
durationMs: 1875,
|
||||
terminateReason: 'GOAL',
|
||||
},
|
||||
],
|
||||
};
|
||||
@@ -1202,6 +1523,9 @@ describe('memoryService', () => {
|
||||
]);
|
||||
expect(result.runs[0].sessionIds).toEqual(['s1']);
|
||||
expect(result.runs[0].runAt).toBe('2025-06-01T00:00:00Z');
|
||||
expect(result.runs[0].turnCount).toBe(4);
|
||||
expect(result.runs[0].durationMs).toBe(1875);
|
||||
expect(result.runs[0].terminateReason).toBe('GOAL');
|
||||
});
|
||||
|
||||
it('writeExtractionState + readExtractionState roundtrips runs correctly', async () => {
|
||||
@@ -1235,11 +1559,17 @@ describe('memoryService', () => {
|
||||
},
|
||||
],
|
||||
skillsCreated: ['skill-x'],
|
||||
turnCount: 3,
|
||||
durationMs: 2400,
|
||||
terminateReason: 'GOAL',
|
||||
},
|
||||
{
|
||||
runAt: '2025-01-02T00:00:00Z',
|
||||
sessionIds: ['c'],
|
||||
skillsCreated: [],
|
||||
turnCount: 1,
|
||||
durationMs: 900,
|
||||
terminateReason: 'GOAL',
|
||||
},
|
||||
];
|
||||
const state: ExtractionState = { runs };
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
SESSION_FILE_PREFIX,
|
||||
loadConversationRecord,
|
||||
type ConversationRecord,
|
||||
type MemoryScratchpad,
|
||||
} from './chatRecordingService.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { coreEvents } from '../utils/events.js';
|
||||
@@ -22,7 +23,10 @@ import { FRONTMATTER_REGEX, parseFrontmatter } from '../skills/skillLoader.js';
|
||||
import { LocalAgentExecutor } from '../agents/local-executor.js';
|
||||
import { SkillExtractionAgent } from '../agents/skill-extraction-agent.js';
|
||||
import { getModelConfigAlias } from '../agents/registry.js';
|
||||
import type { SubagentActivityEvent } from '../agents/types.js';
|
||||
import {
|
||||
isToolActivityError,
|
||||
type SubagentActivityEvent,
|
||||
} from '../agents/types.js';
|
||||
import { ExecutionLifecycleService } from './executionLifecycleService.js';
|
||||
import { PromptRegistry } from '../prompts/prompt-registry.js';
|
||||
import { ResourceRegistry } from '../resources/resource-registry.js';
|
||||
@@ -36,6 +40,7 @@ import {
|
||||
applyParsedSkillPatches,
|
||||
hasParsedPatchHunks,
|
||||
} from './memoryPatchUtils.js';
|
||||
import { sanitizeWorkflowSummaryForScratchpad } from './sessionScratchpadUtils.js';
|
||||
|
||||
const LOCK_FILENAME = '.extraction.lock';
|
||||
const STATE_FILENAME = '.extraction-state.json';
|
||||
@@ -53,20 +58,6 @@ interface LockInfo {
|
||||
startedAt: string;
|
||||
}
|
||||
|
||||
function hasProperty<T extends string>(
|
||||
obj: unknown,
|
||||
prop: T,
|
||||
): obj is { [key in T]: unknown } {
|
||||
return obj !== null && typeof obj === 'object' && prop in obj;
|
||||
}
|
||||
|
||||
function isStringProperty<T extends string>(
|
||||
obj: unknown,
|
||||
prop: T,
|
||||
): obj is { [key in T]: string } {
|
||||
return hasProperty(obj, prop) && typeof obj[prop] === 'string';
|
||||
}
|
||||
|
||||
interface SessionVersion {
|
||||
sessionId: string;
|
||||
lastUpdated: string;
|
||||
@@ -75,6 +66,7 @@ interface SessionVersion {
|
||||
interface IndexedSession extends SessionVersion {
|
||||
filePath: string;
|
||||
summary?: string;
|
||||
memoryScratchpad?: MemoryScratchpad;
|
||||
userMessageCount: number;
|
||||
}
|
||||
|
||||
@@ -87,6 +79,9 @@ export interface ExtractionRun {
|
||||
candidateSessions?: SessionVersion[];
|
||||
processedSessions?: SessionVersion[];
|
||||
skillsCreated: string[];
|
||||
turnCount?: number;
|
||||
durationMs?: number;
|
||||
terminateReason?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -153,12 +148,25 @@ function normalizeStringArray(value: unknown): string[] {
|
||||
return value.filter((item): item is string => typeof item === 'string');
|
||||
}
|
||||
|
||||
function normalizeOptionalNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value)
|
||||
? value
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function normalizeOptionalString(value: unknown): string | undefined {
|
||||
return typeof value === 'string' ? value : undefined;
|
||||
}
|
||||
|
||||
function isExtractionRunLike(value: unknown): value is {
|
||||
runAt: string;
|
||||
sessionIds?: unknown;
|
||||
candidateSessions?: unknown;
|
||||
processedSessions?: unknown;
|
||||
skillsCreated: unknown;
|
||||
turnCount?: unknown;
|
||||
durationMs?: unknown;
|
||||
terminateReason?: unknown;
|
||||
} {
|
||||
return (
|
||||
typeof value === 'object' &&
|
||||
@@ -198,6 +206,9 @@ function buildExtractionRun(value: unknown): ExtractionRun | null {
|
||||
processedSessions:
|
||||
processedSessions.length > 0 ? processedSessions : undefined,
|
||||
skillsCreated: normalizeStringArray(value.skillsCreated),
|
||||
turnCount: normalizeOptionalNumber(value.turnCount),
|
||||
durationMs: normalizeOptionalNumber(value.durationMs),
|
||||
terminateReason: normalizeOptionalString(value.terminateReason),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -291,7 +302,7 @@ function shouldReplaceIndexedSession(
|
||||
return compareIndexedSessions(candidate, existing) < 0;
|
||||
}
|
||||
|
||||
function isReadFileStartActivity(
|
||||
function isReadFileActivity(
|
||||
activity: SubagentActivityEvent,
|
||||
): activity is SubagentActivityEvent & {
|
||||
data: { name: string; args?: { file_path?: unknown }; callId?: unknown };
|
||||
@@ -302,11 +313,36 @@ function isReadFileStartActivity(
|
||||
);
|
||||
}
|
||||
|
||||
function getResolvedReadFilePath(
|
||||
function getReadFileCallId(activity: SubagentActivityEvent): string | null {
|
||||
if (isReadFileActivity(activity)) {
|
||||
const { callId } = activity.data;
|
||||
return typeof callId === 'string' ? callId : null;
|
||||
}
|
||||
|
||||
if (
|
||||
activity.type === 'TOOL_CALL_END' &&
|
||||
activity.data['name'] === READ_FILE_TOOL_NAME
|
||||
) {
|
||||
const id = activity.data['id'];
|
||||
return typeof id === 'string' ? id : null;
|
||||
}
|
||||
|
||||
if (
|
||||
activity.type === 'ERROR' &&
|
||||
activity.data['name'] === READ_FILE_TOOL_NAME
|
||||
) {
|
||||
const callId = activity.data['callId'];
|
||||
return typeof callId === 'string' ? callId : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getResolvedActivityFilePath(
|
||||
config: Config,
|
||||
activity: SubagentActivityEvent,
|
||||
): string | null {
|
||||
if (!isReadFileStartActivity(activity)) {
|
||||
if (!isReadFileActivity(activity)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -320,48 +356,11 @@ function getResolvedReadFilePath(
|
||||
return null;
|
||||
}
|
||||
|
||||
return path.resolve(config.getTargetDir(), args.file_path);
|
||||
}
|
||||
|
||||
function getReadFileStartCallId(
|
||||
activity: SubagentActivityEvent,
|
||||
): string | null {
|
||||
if (
|
||||
!isReadFileStartActivity(activity) ||
|
||||
!isStringProperty(activity.data, 'callId')
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return activity.data.callId;
|
||||
}
|
||||
|
||||
function getCompletedReadFileCallId(
|
||||
activity: SubagentActivityEvent,
|
||||
): string | null {
|
||||
if (
|
||||
activity.type !== 'TOOL_CALL_END' ||
|
||||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
|
||||
!isStringProperty(activity.data, 'id')
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return activity.data['id'];
|
||||
}
|
||||
|
||||
function getFailedReadFileCallId(
|
||||
activity: SubagentActivityEvent,
|
||||
): string | null {
|
||||
if (
|
||||
activity.type !== 'ERROR' ||
|
||||
activity.data['name'] !== READ_FILE_TOOL_NAME ||
|
||||
!isStringProperty(activity.data, 'callId')
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return activity.data['callId'];
|
||||
const targetDir =
|
||||
'getTargetDir' in config && typeof config.getTargetDir === 'function'
|
||||
? config.getTargetDir()
|
||||
: process.cwd();
|
||||
return path.resolve(targetDir, args.file_path);
|
||||
}
|
||||
|
||||
function getUserMessageCount(
|
||||
@@ -580,6 +579,10 @@ async function scanEligibleSessions(
|
||||
lastUpdated: conversation.lastUpdated,
|
||||
filePath,
|
||||
summary: conversation.summary,
|
||||
memoryScratchpad:
|
||||
conversation.memoryScratchpadIsStale === true
|
||||
? undefined
|
||||
: conversation.memoryScratchpad,
|
||||
userMessageCount: getUserMessageCount(conversation),
|
||||
};
|
||||
|
||||
@@ -595,6 +598,28 @@ async function scanEligibleSessions(
|
||||
return Array.from(latestBySessionId.values()).sort(compareIndexedSessions);
|
||||
}
|
||||
|
||||
function formatSessionHeadline(session: IndexedSession): string {
|
||||
const rawWorkflowSummary = session.memoryScratchpad?.workflowSummary;
|
||||
const sanitizedWorkflowSummary =
|
||||
typeof rawWorkflowSummary === 'string'
|
||||
? sanitizeWorkflowSummaryForScratchpad(rawWorkflowSummary)
|
||||
: undefined;
|
||||
const workflowSummary = sanitizedWorkflowSummary?.trim()
|
||||
? sanitizedWorkflowSummary
|
||||
: undefined;
|
||||
const summary = session.summary ?? workflowSummary ?? '(no summary)';
|
||||
|
||||
if (
|
||||
session.summary &&
|
||||
workflowSummary &&
|
||||
workflowSummary !== session.summary
|
||||
) {
|
||||
return `${summary} | workflow: ${workflowSummary}`;
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a session index for the extraction agent: a compact listing of all
|
||||
* eligible sessions with their summary, file path, and new/previously-processed status.
|
||||
@@ -651,8 +676,7 @@ export async function buildSessionIndex(
|
||||
const status = candidateSessionIds.has(getSessionVersionKey(session))
|
||||
? '[NEW]'
|
||||
: '[old]';
|
||||
const summary = session.summary ?? '(no summary)';
|
||||
return `${status} ${summary} (${session.userMessageCount} user msgs) — ${session.filePath}`;
|
||||
return `${status} ${formatSessionHeadline(session)} (${session.userMessageCount} user msgs) — ${session.filePath}`;
|
||||
},
|
||||
);
|
||||
|
||||
@@ -999,18 +1023,19 @@ export async function startMemoryService(config: Config): Promise<void> {
|
||||
session,
|
||||
]),
|
||||
);
|
||||
const pendingReadFileSessions = new Map<string, SessionVersion>();
|
||||
const processedSessionKeys = new Set<string>();
|
||||
const pendingReadFileSessions = new Map<string, string>();
|
||||
|
||||
// Create and run the extraction agent
|
||||
const executor = await LocalAgentExecutor.create(
|
||||
agentDefinition,
|
||||
context,
|
||||
(activity) => {
|
||||
const readFileCallId = getReadFileStartCallId(activity);
|
||||
if (readFileCallId) {
|
||||
const resolvedPath = getResolvedReadFilePath(config, activity);
|
||||
if (!resolvedPath) {
|
||||
const readFileCallId = getReadFileCallId(activity);
|
||||
|
||||
if (activity.type === 'TOOL_CALL_START') {
|
||||
const resolvedPath = getResolvedActivityFilePath(config, activity);
|
||||
if (!resolvedPath || !readFileCallId) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1019,35 +1044,31 @@ export async function startMemoryService(config: Config): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
pendingReadFileSessions.set(
|
||||
readFileCallId,
|
||||
getSessionVersionKey(session),
|
||||
);
|
||||
pendingReadFileSessions.set(readFileCallId, session);
|
||||
return;
|
||||
}
|
||||
|
||||
const completedReadFileCallId = getCompletedReadFileCallId(activity);
|
||||
if (completedReadFileCallId) {
|
||||
const sessionKey = pendingReadFileSessions.get(
|
||||
completedReadFileCallId,
|
||||
);
|
||||
if (!sessionKey) {
|
||||
return;
|
||||
}
|
||||
|
||||
processedSessionKeys.add(sessionKey);
|
||||
pendingReadFileSessions.delete(completedReadFileCallId);
|
||||
if (!readFileCallId) {
|
||||
return;
|
||||
}
|
||||
|
||||
const failedReadFileCallId = getFailedReadFileCallId(activity);
|
||||
if (failedReadFileCallId) {
|
||||
pendingReadFileSessions.delete(failedReadFileCallId);
|
||||
const session = pendingReadFileSessions.get(readFileCallId);
|
||||
if (!session) {
|
||||
return;
|
||||
}
|
||||
|
||||
pendingReadFileSessions.delete(readFileCallId);
|
||||
|
||||
if (
|
||||
activity.type === 'TOOL_CALL_END' &&
|
||||
!isToolActivityError(activity.data['data'])
|
||||
) {
|
||||
processedSessionKeys.add(getSessionVersionKey(session));
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
await executor.run(
|
||||
const executorResult = await executor.run(
|
||||
{ request: 'Extract skills from the provided sessions.' },
|
||||
abortController.signal,
|
||||
);
|
||||
@@ -1107,6 +1128,11 @@ export async function startMemoryService(config: Config): Promise<void> {
|
||||
})),
|
||||
processedSessions,
|
||||
skillsCreated,
|
||||
turnCount: normalizeOptionalNumber(executorResult?.turn_count),
|
||||
durationMs: normalizeOptionalNumber(executorResult?.duration_ms),
|
||||
terminateReason: normalizeOptionalString(
|
||||
executorResult?.terminate_reason,
|
||||
),
|
||||
};
|
||||
const updatedState: ExtractionState = {
|
||||
runs: [...state.runs, run],
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
|
||||
import {
|
||||
sanitizeWorkflowSummaryForScratchpad,
|
||||
summarizeShellCommandForScratchpad,
|
||||
} from './sessionScratchpadUtils.js';
|
||||
|
||||
describe('sessionScratchpadUtils', () => {
|
||||
describe('summarizeShellCommandForScratchpad', () => {
|
||||
it('summarizes quoted and assignment-prefixed shell commands', () => {
|
||||
expect(summarizeShellCommandForScratchpad('"npm" run test')).toBe('npm');
|
||||
expect(
|
||||
summarizeShellCommandForScratchpad(
|
||||
'DATABASE_URL=postgres://user:password@example/db pnpm test',
|
||||
),
|
||||
).toBe('pnpm');
|
||||
});
|
||||
|
||||
it('handles adversarial unterminated quoted input without exposing arguments', () => {
|
||||
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
|
||||
|
||||
expect(summarizeShellCommandForScratchpad(adversarialCommand)).toBe(
|
||||
'shell',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sanitizeWorkflowSummaryForScratchpad', () => {
|
||||
it('sanitizes adversarial shell commands in workflow summaries', () => {
|
||||
const adversarialCommand = `"${'\\"!'.repeat(10_000)}`;
|
||||
|
||||
expect(
|
||||
sanitizeWorkflowSummaryForScratchpad(
|
||||
`${SHELL_TOOL_NAME}: ${adversarialCommand} -> read_file`,
|
||||
),
|
||||
).toBe(`${SHELL_TOOL_NAME}: shell -> read_file`);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
|
||||
|
||||
const WORKFLOW_PART_SEPARATOR = ' | ';
|
||||
const TOOL_SEQUENCE_SEPARATOR = ' -> ';
|
||||
const SHELL_ASSIGNMENT_REGEX = /^[A-Za-z_][A-Za-z0-9_]*=/;
|
||||
const SAFE_COMMAND_NAME_REGEX = /^[A-Za-z0-9_.@+-]+$/;
|
||||
const SAFE_TOOL_SEQUENCE_ENTRY_REGEX = /^[A-Za-z_][A-Za-z0-9_:.]*$/;
|
||||
|
||||
function tokenizeShellCommand(command: string): string[] {
|
||||
const tokens: string[] = [];
|
||||
let currentToken = '';
|
||||
let quote: '"' | "'" | '`' | undefined;
|
||||
|
||||
for (let i = 0; i < command.length; i++) {
|
||||
const char = command[i];
|
||||
|
||||
if (quote) {
|
||||
if (char === quote) {
|
||||
quote = undefined;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (quote === '"' && char === '\\' && i + 1 < command.length) {
|
||||
currentToken += command[i + 1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
currentToken += char;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === ' ' || char === '\t' || char === '\n' || char === '\r') {
|
||||
if (currentToken) {
|
||||
tokens.push(currentToken);
|
||||
currentToken = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '"' || char === "'" || char === '`') {
|
||||
quote = char;
|
||||
continue;
|
||||
}
|
||||
|
||||
currentToken += char;
|
||||
}
|
||||
|
||||
if (currentToken) {
|
||||
tokens.push(currentToken);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function getSafeCommandName(token: string): string | undefined {
|
||||
if (!token || SHELL_ASSIGNMENT_REGEX.test(token)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const pathParts = token.split(/[/\\]/).filter(Boolean);
|
||||
const basename = pathParts[pathParts.length - 1] ?? token;
|
||||
if (!basename || basename.includes('://')) {
|
||||
return 'shell';
|
||||
}
|
||||
|
||||
return SAFE_COMMAND_NAME_REGEX.test(basename) ? basename : 'shell';
|
||||
}
|
||||
|
||||
export function summarizeShellCommandForScratchpad(
|
||||
command: string,
|
||||
): string | undefined {
|
||||
const normalized = command.replace(/\s+/g, ' ').trim();
|
||||
if (normalized.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
for (const token of tokenizeShellCommand(normalized)) {
|
||||
const commandName = getSafeCommandName(token);
|
||||
if (commandName) {
|
||||
return commandName;
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function sanitizeWorkflowToolSequenceEntry(entry: string): string | undefined {
|
||||
const trimmed = entry.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const shellPrefix = `${SHELL_TOOL_NAME}:`;
|
||||
if (trimmed.startsWith(shellPrefix)) {
|
||||
const command = trimmed.slice(shellPrefix.length).trim();
|
||||
const commandSummary = summarizeShellCommandForScratchpad(command);
|
||||
return commandSummary
|
||||
? `${SHELL_TOOL_NAME}: ${commandSummary}`
|
||||
: SHELL_TOOL_NAME;
|
||||
}
|
||||
|
||||
if (
|
||||
trimmed === SHELL_TOOL_NAME ||
|
||||
SAFE_TOOL_SEQUENCE_ENTRY_REGEX.test(trimmed)
|
||||
) {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function sanitizeWorkflowSummaryForScratchpad(summary: string): string {
|
||||
const normalized = summary.replace(/\s+/g, ' ').trim();
|
||||
if (!normalized.includes(`${SHELL_TOOL_NAME}:`)) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
const sanitizedParts: string[] = [];
|
||||
for (const part of normalized.split(WORKFLOW_PART_SEPARATOR)) {
|
||||
const trimmed = part.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.includes(`${SHELL_TOOL_NAME}:`)) {
|
||||
const sanitizedToolSequence = trimmed
|
||||
.split(TOOL_SEQUENCE_SEPARATOR)
|
||||
.map(sanitizeWorkflowToolSequenceEntry)
|
||||
.filter((entry): entry is string => Boolean(entry));
|
||||
if (sanitizedToolSequence.length > 0) {
|
||||
sanitizedParts.push(
|
||||
sanitizedToolSequence.join(TOOL_SEQUENCE_SEPARATOR),
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
trimmed.startsWith('paths ') ||
|
||||
trimmed === 'validated' ||
|
||||
trimmed === 'validation failed'
|
||||
) {
|
||||
sanitizedParts.push(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
return sanitizedParts.join(WORKFLOW_PART_SEPARATOR);
|
||||
}
|
||||
@@ -9,6 +9,8 @@ import { generateSummary, getPreviousSession } from './sessionSummaryUtils.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import type { ContentGenerator } from '../core/contentGenerator.js';
|
||||
import * as chatRecordingService from './chatRecordingService.js';
|
||||
import type { ConversationRecord } from './chatRecordingService.js';
|
||||
import { CoreToolCallStatus } from '../scheduler/types.js';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import * as os from 'node:os';
|
||||
@@ -37,25 +39,33 @@ vi.mock('./chatRecordingService.js', async () => {
|
||||
|
||||
interface SessionFixture {
|
||||
summary?: string;
|
||||
memoryScratchpad?: unknown;
|
||||
sessionId?: string;
|
||||
startTime?: string;
|
||||
lastUpdated?: string;
|
||||
kind?: ConversationRecord['kind'];
|
||||
messages?: ConversationRecord['messages'];
|
||||
userMessageCount: number;
|
||||
}
|
||||
|
||||
function buildLegacySessionJson(fixture: SessionFixture): string {
|
||||
const messages =
|
||||
fixture.messages ??
|
||||
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
|
||||
id: String(i + 1),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: `Message ${i + 1}` }],
|
||||
}));
|
||||
return JSON.stringify({
|
||||
sessionId: fixture.sessionId ?? 'session-id',
|
||||
projectHash: 'abc123',
|
||||
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
|
||||
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
|
||||
summary: fixture.summary,
|
||||
messages: Array.from({ length: fixture.userMessageCount }, (_, i) => ({
|
||||
id: String(i + 1),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: `Message ${i + 1}` }],
|
||||
})),
|
||||
memoryScratchpad: fixture.memoryScratchpad,
|
||||
...(fixture.kind ? { kind: fixture.kind } : {}),
|
||||
messages,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -66,17 +76,22 @@ function buildJsonlSession(fixture: SessionFixture): string {
|
||||
startTime: fixture.startTime ?? '2024-01-01T00:00:00Z',
|
||||
lastUpdated: fixture.lastUpdated ?? '2024-01-01T00:00:00Z',
|
||||
...(fixture.summary !== undefined ? { summary: fixture.summary } : {}),
|
||||
...(fixture.memoryScratchpad !== undefined
|
||||
? { memoryScratchpad: fixture.memoryScratchpad }
|
||||
: {}),
|
||||
...(fixture.kind ? { kind: fixture.kind } : {}),
|
||||
};
|
||||
const messages =
|
||||
fixture.messages ??
|
||||
Array.from({ length: fixture.userMessageCount }, (_, i) => ({
|
||||
id: String(i + 1),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: `Message ${i + 1}` }],
|
||||
}));
|
||||
const lines: string[] = [JSON.stringify(metadata)];
|
||||
for (let i = 0; i < fixture.userMessageCount; i++) {
|
||||
lines.push(
|
||||
JSON.stringify({
|
||||
id: String(i + 1),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: `Message ${i + 1}` }],
|
||||
}),
|
||||
);
|
||||
for (const message of messages) {
|
||||
lines.push(JSON.stringify(message));
|
||||
}
|
||||
return lines.join('\n') + '\n';
|
||||
}
|
||||
@@ -119,6 +134,7 @@ describe('sessionSummaryUtils', () => {
|
||||
|
||||
mockConfig = {
|
||||
getContentGenerator: vi.fn().mockReturnValue(mockContentGenerator),
|
||||
getProjectRoot: vi.fn().mockReturnValue(projectTempDir),
|
||||
getSessionId: vi.fn().mockReturnValue('current-session'),
|
||||
storage: {
|
||||
getProjectTempDir: vi.fn().mockReturnValue(projectTempDir),
|
||||
@@ -157,13 +173,50 @@ describe('sessionSummaryUtils', () => {
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null if most recent session already has summary', async () => {
|
||||
it('should return null if most recent session already has summary metadata', async () => {
|
||||
await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-abc12345.json',
|
||||
buildLegacySessionJson({
|
||||
userMessageCount: 5,
|
||||
summary: 'Existing summary',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary: 'read_file -> edit',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await getPreviousSession(mockConfig);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should return path if most recent session has summary but no scratchpad', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-abc12345.json',
|
||||
buildLegacySessionJson({
|
||||
userMessageCount: 5,
|
||||
summary: 'Existing summary',
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await getPreviousSession(mockConfig);
|
||||
|
||||
expect(result).toBe(filePath);
|
||||
});
|
||||
|
||||
it('should return null if most recent session has scratchpad but no summary', async () => {
|
||||
await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-abc12345.json',
|
||||
buildLegacySessionJson({
|
||||
userMessageCount: 5,
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary: 'read_file -> edit',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -302,6 +355,36 @@ describe('sessionSummaryUtils', () => {
|
||||
metadataOnly: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should skip subagent sessions when backfilling scratchpads', async () => {
|
||||
const mainPath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-main0001.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'main-session',
|
||||
userMessageCount: 2,
|
||||
lastUpdated: '2024-01-01T10:00:00Z',
|
||||
summary: 'Main session summary',
|
||||
}),
|
||||
);
|
||||
await setSessionMtime(mainPath, '2024-01-01T10:00:00Z');
|
||||
|
||||
await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-02T10-00-sub00001.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'subagent-session',
|
||||
userMessageCount: 2,
|
||||
lastUpdated: '2024-01-02T10:00:00Z',
|
||||
summary: 'Subagent summary',
|
||||
kind: 'subagent',
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await getPreviousSession(mockConfig);
|
||||
|
||||
expect(result).toBe(mainPath);
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateSummary', () => {
|
||||
@@ -324,6 +407,7 @@ describe('sessionSummaryUtils', () => {
|
||||
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
|
||||
const written = JSON.parse(await fs.readFile(filePath, 'utf-8'));
|
||||
expect(written.summary).toBe('Add dark mode to the app');
|
||||
expect(written.memoryScratchpad).toEqual({ version: 1 });
|
||||
expect(written.lastUpdated).toBe(lastUpdated);
|
||||
});
|
||||
|
||||
@@ -356,10 +440,160 @@ describe('sessionSummaryUtils', () => {
|
||||
expect(lastRecord).toEqual({
|
||||
$set: {
|
||||
summary: 'Add dark mode to the app',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should backfill scratchpad without regenerating summary', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-backfill.jsonl',
|
||||
buildJsonlSession({
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
expect(mockGenerateSummary).not.toHaveBeenCalled();
|
||||
const lines = (await fs.readFile(filePath, 'utf-8'))
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
const lastRecord = JSON.parse(lines[lines.length - 1]);
|
||||
expect(lastRecord).toEqual({
|
||||
$set: {
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should not retry summary generation after writing a scratchpad fallback', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-summary-fallback.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'summary-fallback-session',
|
||||
userMessageCount: 2,
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Read package metadata' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Reading package.json' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'read_file',
|
||||
args: { file_path: 'package.json' },
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
mockGenerateSummary.mockResolvedValue(undefined);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
expect(mockGenerateSummary).toHaveBeenCalledTimes(1);
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.summary).toBeUndefined();
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'read_file | paths package.json',
|
||||
toolSequence: ['read_file'],
|
||||
touchedPaths: ['package.json'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should refresh stale scratchpads when messages were appended after metadata', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-resumed1.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'resumed-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
lastUpdated: '2024-01-01T10:00:00Z',
|
||||
}),
|
||||
);
|
||||
await fs.appendFile(
|
||||
filePath,
|
||||
`${JSON.stringify({
|
||||
$set: {
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
workflowSummary: 'read_file',
|
||||
toolSequence: ['read_file'],
|
||||
},
|
||||
},
|
||||
})}\n`,
|
||||
);
|
||||
await fs.appendFile(
|
||||
filePath,
|
||||
[
|
||||
JSON.stringify({
|
||||
id: 'u-resumed',
|
||||
timestamp: '2024-01-02T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Update src/app.ts' }],
|
||||
}),
|
||||
JSON.stringify({
|
||||
id: 'g-resumed',
|
||||
timestamp: '2024-01-02T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Editing file' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-resumed',
|
||||
name: 'replace',
|
||||
args: { file_path: 'src/app.ts' },
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-02T00:00:01Z',
|
||||
},
|
||||
],
|
||||
}),
|
||||
JSON.stringify({
|
||||
$set: { lastUpdated: '2024-01-02T00:00:02Z' },
|
||||
}),
|
||||
].join('\n') + '\n',
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
expect(mockGenerateSummary).not.toHaveBeenCalled();
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'replace | paths src/app.ts',
|
||||
toolSequence: ['replace'],
|
||||
touchedPaths: ['src/app.ts'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve a newer JSONL lastUpdated written concurrently', async () => {
|
||||
const initialLastUpdated = '2024-01-01T10:00:00Z';
|
||||
const newerLastUpdated = '2024-01-02T12:34:56Z';
|
||||
@@ -411,6 +645,7 @@ describe('sessionSummaryUtils', () => {
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.summary).toBe('Add dark mode to the app');
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({ version: 1 });
|
||||
expect(savedConversation?.lastUpdated).toBe(newerLastUpdated);
|
||||
|
||||
const lines = (await fs.readFile(filePath, 'utf-8'))
|
||||
@@ -420,6 +655,9 @@ describe('sessionSummaryUtils', () => {
|
||||
expect(lastRecord).toEqual({
|
||||
$set: {
|
||||
summary: 'Add dark mode to the app',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -454,6 +692,9 @@ describe('sessionSummaryUtils', () => {
|
||||
expect(JSON.parse(previousLines[previousLines.length - 1])).toEqual({
|
||||
$set: {
|
||||
summary: 'Add dark mode to the app',
|
||||
memoryScratchpad: {
|
||||
version: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -462,5 +703,312 @@ describe('sessionSummaryUtils', () => {
|
||||
.filter(Boolean);
|
||||
expect(currentLines).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('should preserve repo-root file names in scratchpad touched paths', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-rootpath.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'root-path-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Inspect package.json' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Reading files' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'read_file',
|
||||
args: { file_path: 'package.json' },
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'read_file | paths package.json',
|
||||
toolSequence: ['read_file'],
|
||||
touchedPaths: ['package.json'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should summarize shell commands without raw arguments in scratchpad tool sequence', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-shellcmd.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'shell-command-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Run the migration and regenerate docs' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Running commands' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'run_shell_command',
|
||||
args: {
|
||||
command:
|
||||
'curl https://api.example.com -H "Authorization: Bearer sk-secret-token"',
|
||||
},
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
{
|
||||
id: 'tool-2',
|
||||
name: 'run_shell_command',
|
||||
args: {
|
||||
command:
|
||||
'DATABASE_URL=postgresql://user:password@localhost/db npm run migrate -- --name add-users',
|
||||
},
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:03Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'run_shell_command: curl -> run_shell_command: npm',
|
||||
toolSequence: ['run_shell_command: curl', 'run_shell_command: npm'],
|
||||
});
|
||||
expect(
|
||||
savedConversation?.memoryScratchpad?.workflowSummary,
|
||||
).not.toContain('Authorization');
|
||||
expect(
|
||||
savedConversation?.memoryScratchpad?.workflowSummary,
|
||||
).not.toContain('sk-secret-token');
|
||||
expect(
|
||||
savedConversation?.memoryScratchpad?.workflowSummary,
|
||||
).not.toContain('password');
|
||||
expect(
|
||||
savedConversation?.memoryScratchpad?.workflowSummary,
|
||||
).not.toContain('add-users');
|
||||
});
|
||||
|
||||
it('should not classify validation substrings as validation tools', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-validation-substring.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'validation-substring-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Run the contest helper' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Running helper' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'contest_runner',
|
||||
args: {},
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'contest_runner',
|
||||
toolSequence: ['contest_runner'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should cap nested path extraction depth', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-deep-paths.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'deep-paths-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Edit shallow and deeply nested files' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Editing files' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'replace',
|
||||
args: {
|
||||
file_path: 'src/shallow.ts',
|
||||
level1: {
|
||||
level2: {
|
||||
level3: {
|
||||
level4: {
|
||||
level5: {
|
||||
level6: {
|
||||
level7: {
|
||||
file_path: 'src/deep.ts',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'replace | paths src/shallow.ts',
|
||||
toolSequence: ['replace'],
|
||||
touchedPaths: ['src/shallow.ts'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should use the latest validation result in scratchpad metadata', async () => {
|
||||
const filePath = await writeSession(
|
||||
chatsDir,
|
||||
'session-2024-01-01T10-00-validation.jsonl',
|
||||
buildJsonlSession({
|
||||
sessionId: 'validation-session',
|
||||
userMessageCount: 2,
|
||||
summary: 'Existing summary',
|
||||
messages: [
|
||||
{
|
||||
id: 'u1',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Fix the tests' }],
|
||||
},
|
||||
{
|
||||
id: 'g1',
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
type: 'gemini',
|
||||
content: [{ text: 'Running tests' }],
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tool-1',
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'npm test' },
|
||||
status: CoreToolCallStatus.Error,
|
||||
timestamp: '2024-01-01T00:00:01Z',
|
||||
},
|
||||
{
|
||||
id: 'tool-2',
|
||||
name: 'run_shell_command',
|
||||
args: { command: 'npm test' },
|
||||
status: CoreToolCallStatus.Success,
|
||||
timestamp: '2024-01-01T00:00:02Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'u2',
|
||||
timestamp: '2024-01-01T00:00:03Z',
|
||||
type: 'user',
|
||||
content: [{ text: 'Done' }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
await generateSummary(mockConfig);
|
||||
|
||||
const savedConversation =
|
||||
await chatRecordingService.loadConversationRecord(filePath);
|
||||
expect(savedConversation?.memoryScratchpad).toEqual({
|
||||
version: 1,
|
||||
workflowSummary: 'run_shell_command: npm | validated',
|
||||
toolSequence: ['run_shell_command: npm'],
|
||||
validationStatus: 'passed',
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,15 +12,29 @@ import {
|
||||
SESSION_FILE_PREFIX,
|
||||
loadConversationRecord,
|
||||
type ConversationRecord,
|
||||
type MemoryScratchpad,
|
||||
type ToolCallRecord,
|
||||
} from './chatRecordingService.js';
|
||||
import { CoreToolCallStatus } from '../scheduler/types.js';
|
||||
import { SHELL_TOOL_NAME } from '../tools/definitions/base-declarations.js';
|
||||
import { summarizeShellCommandForScratchpad } from './sessionScratchpadUtils.js';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
const MIN_MESSAGES_FOR_SUMMARY = 1;
|
||||
const MAX_SCRATCHPAD_TOOLS = 6;
|
||||
const MAX_SCRATCHPAD_PATHS = 4;
|
||||
const MAX_SCRATCHPAD_PATH_DEPTH = 6;
|
||||
const MAX_WORKFLOW_SUMMARY_LENGTH = 160;
|
||||
const VALIDATION_COMMAND_REGEX =
|
||||
/\b(test|tests|vitest|jest|pytest|cargo test|npm test|pnpm test|yarn test|bun test|lint|build|check|typecheck)\b/i;
|
||||
const PATH_KEY_REGEX = /(path|file|dir|directory|cwd|root)/i;
|
||||
const VALIDATION_TOOL_REGEX = /\b(test|lint|build|check|typecheck)\b/i;
|
||||
|
||||
type LoadedSession = ConversationRecord & {
|
||||
messageCount?: number;
|
||||
userMessageCount?: number;
|
||||
memoryScratchpadIsStale?: boolean;
|
||||
};
|
||||
|
||||
interface SessionFileCandidate {
|
||||
@@ -72,6 +86,238 @@ function getSessionTimestampMs(session: LoadedSession): number {
|
||||
return Number.isNaN(parsed) ? 0 : parsed;
|
||||
}
|
||||
|
||||
function normalizeToolName(name: string): string {
|
||||
const trimmed = name.trim();
|
||||
return trimmed.length > 0 ? trimmed : 'unknown_tool';
|
||||
}
|
||||
|
||||
function pushUniqueLimited(
|
||||
target: string[],
|
||||
value: string,
|
||||
limit: number,
|
||||
): void {
|
||||
if (!value || target.includes(value) || target.length >= limit) {
|
||||
return;
|
||||
}
|
||||
target.push(value);
|
||||
}
|
||||
|
||||
function normalizePathCandidate(
|
||||
candidate: string,
|
||||
projectRoot: string,
|
||||
): string | null {
|
||||
const trimmed = candidate.trim();
|
||||
if (
|
||||
trimmed.length === 0 ||
|
||||
trimmed.length > 240 ||
|
||||
trimmed.includes('\n') ||
|
||||
(!trimmed.includes('/') &&
|
||||
!trimmed.includes('\\') &&
|
||||
!trimmed.startsWith('.') &&
|
||||
path.extname(trimmed).length === 0)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let normalized = trimmed.replace(/\\/g, '/');
|
||||
if (path.isAbsolute(trimmed)) {
|
||||
const relative = path.relative(projectRoot, trimmed);
|
||||
normalized =
|
||||
relative && !relative.startsWith('..') && !path.isAbsolute(relative)
|
||||
? relative.replace(/\\/g, '/')
|
||||
: path.basename(trimmed);
|
||||
}
|
||||
|
||||
if (normalized.length > 120) {
|
||||
normalized = normalized.split('/').slice(-3).join('/');
|
||||
}
|
||||
|
||||
return normalized.length > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
function collectPathsFromValue(
|
||||
value: unknown,
|
||||
projectRoot: string,
|
||||
paths: string[],
|
||||
keyHint?: string,
|
||||
depth = 0,
|
||||
): void {
|
||||
if (
|
||||
paths.length >= MAX_SCRATCHPAD_PATHS ||
|
||||
depth > MAX_SCRATCHPAD_PATH_DEPTH
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof value === 'string') {
|
||||
if (!keyHint || !PATH_KEY_REGEX.test(keyHint)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const normalized = normalizePathCandidate(value, projectRoot);
|
||||
if (normalized) {
|
||||
pushUniqueLimited(paths, normalized, MAX_SCRATCHPAD_PATHS);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) {
|
||||
collectPathsFromValue(item, projectRoot, paths, keyHint, depth + 1);
|
||||
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof value !== 'object' || value === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const [key, nestedValue] of Object.entries(value)) {
|
||||
collectPathsFromValue(nestedValue, projectRoot, paths, key, depth + 1);
|
||||
if (paths.length >= MAX_SCRATCHPAD_PATHS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getToolCallCommand(toolCall: ToolCallRecord): string | undefined {
|
||||
for (const key of ['command', 'cmd', 'script']) {
|
||||
const value = toolCall.args[key];
|
||||
if (typeof value === 'string' && value.trim().length > 0) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getToolSequenceEntry(toolCall: ToolCallRecord): string {
|
||||
const toolName = normalizeToolName(toolCall.name);
|
||||
if (toolName !== SHELL_TOOL_NAME) {
|
||||
return toolName;
|
||||
}
|
||||
|
||||
const command = getToolCallCommand(toolCall);
|
||||
const commandSummary = command
|
||||
? summarizeShellCommandForScratchpad(command)
|
||||
: undefined;
|
||||
return commandSummary ? `${toolName}: ${commandSummary}` : toolName;
|
||||
}
|
||||
|
||||
function getValidationStatusForToolCall(
|
||||
toolCall: ToolCallRecord,
|
||||
): MemoryScratchpad['validationStatus'] | undefined {
|
||||
const command = getToolCallCommand(toolCall);
|
||||
const isValidationTool =
|
||||
VALIDATION_TOOL_REGEX.test(toolCall.name) ||
|
||||
(command ? VALIDATION_COMMAND_REGEX.test(command) : false);
|
||||
if (!isValidationTool) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (toolCall.status === CoreToolCallStatus.Success) {
|
||||
return 'passed';
|
||||
}
|
||||
if (
|
||||
toolCall.status === CoreToolCallStatus.Error ||
|
||||
toolCall.status === CoreToolCallStatus.Cancelled
|
||||
) {
|
||||
return 'failed';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
function buildWorkflowSummary(
|
||||
toolSequence: string[],
|
||||
touchedPaths: string[],
|
||||
validationStatus?: MemoryScratchpad['validationStatus'],
|
||||
): string | undefined {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (toolSequence.length > 0) {
|
||||
parts.push(toolSequence.join(' -> '));
|
||||
}
|
||||
if (touchedPaths.length > 0) {
|
||||
parts.push(`paths ${touchedPaths.join(', ')}`);
|
||||
}
|
||||
if (validationStatus === 'passed') {
|
||||
parts.push('validated');
|
||||
} else if (validationStatus === 'failed') {
|
||||
parts.push('validation failed');
|
||||
}
|
||||
|
||||
if (parts.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const summary = parts.join(' | ');
|
||||
if (summary.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return summary.length > MAX_WORKFLOW_SUMMARY_LENGTH
|
||||
? `${summary.slice(0, MAX_WORKFLOW_SUMMARY_LENGTH - 3)}...`
|
||||
: summary;
|
||||
}
|
||||
|
||||
function buildMemoryScratchpad(
|
||||
messages: ConversationRecord['messages'],
|
||||
projectRoot: string,
|
||||
): MemoryScratchpad {
|
||||
const toolSequence: string[] = [];
|
||||
const touchedPaths: string[] = [];
|
||||
let validationStatus: MemoryScratchpad['validationStatus'];
|
||||
|
||||
for (const message of messages) {
|
||||
if (message.type !== 'gemini' || !message.toolCalls) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const toolCall of message.toolCalls) {
|
||||
pushUniqueLimited(
|
||||
toolSequence,
|
||||
getToolSequenceEntry(toolCall),
|
||||
MAX_SCRATCHPAD_TOOLS,
|
||||
);
|
||||
collectPathsFromValue(toolCall.args, projectRoot, touchedPaths);
|
||||
|
||||
const toolValidationStatus = getValidationStatusForToolCall(toolCall);
|
||||
if (toolValidationStatus) {
|
||||
validationStatus = toolValidationStatus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const workflowSummary = buildWorkflowSummary(
|
||||
toolSequence,
|
||||
touchedPaths,
|
||||
validationStatus,
|
||||
);
|
||||
|
||||
return {
|
||||
version: 1,
|
||||
...(workflowSummary ? { workflowSummary } : {}),
|
||||
...(toolSequence.length > 0 ? { toolSequence } : {}),
|
||||
...(touchedPaths.length > 0 ? { touchedPaths } : {}),
|
||||
...(validationStatus ? { validationStatus } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function hasCurrentMemoryScratchpad(session: LoadedSession): boolean {
|
||||
return Boolean(
|
||||
session.memoryScratchpad && session.memoryScratchpadIsStale !== true,
|
||||
);
|
||||
}
|
||||
|
||||
function hasSessionSummaryMetadata(session: LoadedSession): boolean {
|
||||
return hasCurrentMemoryScratchpad(session);
|
||||
}
|
||||
|
||||
function getLoadedMessageCount(session: LoadedSession): number {
|
||||
return session.messageCount ?? session.messages.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates and saves a summary for a session file.
|
||||
*/
|
||||
@@ -85,10 +331,11 @@ async function generateAndSaveSummary(
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if summary already exists
|
||||
if (conversation.summary) {
|
||||
// Skip if workflow metadata already exists; memory extraction can use the
|
||||
// scratchpad even when summary generation was unavailable.
|
||||
if (hasSessionSummaryMetadata(conversation)) {
|
||||
debugLogger.debug(
|
||||
`[SessionSummary] Summary already exists for ${sessionPath}, skipping`,
|
||||
`[SessionSummary] Summary metadata already exists for ${sessionPath}, skipping`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
@@ -101,29 +348,31 @@ async function generateAndSaveSummary(
|
||||
return;
|
||||
}
|
||||
|
||||
// Create summary service
|
||||
const contentGenerator = config.getContentGenerator();
|
||||
if (!contentGenerator) {
|
||||
debugLogger.debug(
|
||||
'[SessionSummary] Content generator not available, skipping summary generation',
|
||||
);
|
||||
return;
|
||||
}
|
||||
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
|
||||
const summaryService = new SessionSummaryService(baseLlmClient);
|
||||
|
||||
// Generate summary
|
||||
const summary = await summaryService.generateSummary({
|
||||
messages: conversation.messages,
|
||||
});
|
||||
|
||||
let summary = conversation.summary;
|
||||
if (!summary) {
|
||||
debugLogger.warn(
|
||||
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
|
||||
);
|
||||
return;
|
||||
const contentGenerator = config.getContentGenerator();
|
||||
if (!contentGenerator) {
|
||||
debugLogger.debug(
|
||||
'[SessionSummary] Content generator not available, skipping summary generation',
|
||||
);
|
||||
} else {
|
||||
const baseLlmClient = new BaseLlmClient(contentGenerator, config);
|
||||
const summaryService = new SessionSummaryService(baseLlmClient);
|
||||
summary =
|
||||
(await summaryService.generateSummary({
|
||||
messages: conversation.messages,
|
||||
})) ?? undefined;
|
||||
|
||||
if (!summary) {
|
||||
debugLogger.warn(
|
||||
`[SessionSummary] Failed to generate summary for ${sessionPath}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let scratchpadSourceConversation = conversation;
|
||||
|
||||
// Re-read the file before writing to handle race conditions. For JSONL we
|
||||
// only need the metadata; for legacy JSON we need the full record so we can
|
||||
// round-trip the messages back to disk.
|
||||
@@ -136,18 +385,53 @@ async function generateAndSaveSummary(
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if summary was added by another process
|
||||
if (freshConversation.summary) {
|
||||
// Check if summary metadata was added by another process
|
||||
if (hasSessionSummaryMetadata(freshConversation)) {
|
||||
debugLogger.debug(
|
||||
`[SessionSummary] Summary was added by another process for ${sessionPath}`,
|
||||
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
!hasCurrentMemoryScratchpad(freshConversation) &&
|
||||
(getLoadedMessageCount(freshConversation) !==
|
||||
getLoadedMessageCount(conversation) ||
|
||||
freshConversation.lastUpdated !== conversation.lastUpdated)
|
||||
) {
|
||||
const latestConversation = await loadConversationRecord(sessionPath);
|
||||
if (!latestConversation) {
|
||||
debugLogger.debug(`[SessionSummary] Could not re-read ${sessionPath}`);
|
||||
return;
|
||||
}
|
||||
if (hasSessionSummaryMetadata(latestConversation)) {
|
||||
debugLogger.debug(
|
||||
`[SessionSummary] Summary metadata was added by another process for ${sessionPath}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
scratchpadSourceConversation = latestConversation;
|
||||
}
|
||||
|
||||
const metadataUpdate: Partial<ConversationRecord> = {};
|
||||
if (!freshConversation.summary && summary) {
|
||||
metadataUpdate.summary = summary;
|
||||
}
|
||||
if (!hasCurrentMemoryScratchpad(freshConversation)) {
|
||||
metadataUpdate.memoryScratchpad = buildMemoryScratchpad(
|
||||
scratchpadSourceConversation.messages,
|
||||
config.getProjectRoot(),
|
||||
);
|
||||
}
|
||||
|
||||
if (Object.keys(metadataUpdate).length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isJsonl) {
|
||||
await fs.appendFile(
|
||||
sessionPath,
|
||||
`${JSON.stringify({ $set: { summary } })}\n`,
|
||||
`${JSON.stringify({ $set: metadataUpdate })}\n`,
|
||||
);
|
||||
} else {
|
||||
const lastUpdated = freshConversation.lastUpdated;
|
||||
@@ -156,7 +440,7 @@ async function generateAndSaveSummary(
|
||||
JSON.stringify(
|
||||
{
|
||||
...freshConversation,
|
||||
summary,
|
||||
...metadataUpdate,
|
||||
lastUpdated,
|
||||
},
|
||||
null,
|
||||
@@ -165,13 +449,13 @@ async function generateAndSaveSummary(
|
||||
);
|
||||
}
|
||||
debugLogger.debug(
|
||||
`[SessionSummary] Saved summary for ${sessionPath}: "${summary}"`,
|
||||
`[SessionSummary] Saved summary metadata for ${sessionPath}${summary ? `: "${summary}"` : ''}`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the most recently updated previous session that still needs a summary.
|
||||
* Returns the path if it needs a summary, null otherwise.
|
||||
* Finds the most recently updated previous session that still needs workflow metadata.
|
||||
* Returns the path if it needs a scratchpad, null otherwise.
|
||||
*/
|
||||
export async function getPreviousSession(
|
||||
config: Config,
|
||||
@@ -217,7 +501,8 @@ export async function getPreviousSession(
|
||||
});
|
||||
if (!conversation) continue;
|
||||
if (conversation.sessionId === config.getSessionId()) continue;
|
||||
if (conversation.summary) continue;
|
||||
if (conversation.kind === 'subagent') continue;
|
||||
if (hasSessionSummaryMetadata(conversation)) continue;
|
||||
|
||||
// Only generate summaries for sessions with more than 1 user message.
|
||||
// `loadConversationRecord` populates `userMessageCount` in metadataOnly
|
||||
@@ -264,7 +549,7 @@ export async function getPreviousSession(
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates summary for the previous session if it lacks one.
|
||||
* Generates summary metadata for the previous session if it lacks a scratchpad.
|
||||
* This is designed to be called fire-and-forget on startup.
|
||||
*/
|
||||
export async function generateSummary(config: Config): Promise<void> {
|
||||
|
||||
Reference in New Issue
Block a user