2026-04-01 22:05:31 -07:00
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as fs from 'node:fs/promises' ;
import * as path from 'node:path' ;
import { constants as fsConstants } from 'node:fs' ;
import { randomUUID } from 'node:crypto' ;
2026-04-13 10:44:52 -07:00
import * as Diff from 'diff' ;
2026-04-01 22:05:31 -07:00
import type { Config } from '../config/config.js' ;
import {
SESSION_FILE_PREFIX ,
2026-04-22 16:07:39 -07:00
loadConversationRecord ,
2026-04-01 22:05:31 -07:00
type ConversationRecord ,
2026-04-24 17:21:12 -07:00
type MemoryScratchpad ,
2026-04-01 22:05:31 -07:00
} from './chatRecordingService.js' ;
import { debugLogger } from '../utils/debugLogger.js' ;
2026-04-08 11:08:49 -07:00
import { coreEvents } from '../utils/events.js' ;
2026-04-01 22:05:31 -07:00
import { isNodeError } from '../utils/errors.js' ;
import { FRONTMATTER_REGEX , parseFrontmatter } from '../skills/skillLoader.js' ;
import { LocalAgentExecutor } from '../agents/local-executor.js' ;
import { SkillExtractionAgent } from '../agents/skill-extraction-agent.js' ;
import { getModelConfigAlias } from '../agents/registry.js' ;
2026-04-24 17:21:12 -07:00
import {
isToolActivityError ,
type SubagentActivityEvent ,
} from '../agents/types.js' ;
2026-04-01 22:05:31 -07:00
import { ExecutionLifecycleService } from './executionLifecycleService.js' ;
import { PromptRegistry } from '../prompts/prompt-registry.js' ;
import { ResourceRegistry } from '../resources/resource-registry.js' ;
import { PolicyEngine } from '../policy/policy-engine.js' ;
import { PolicyDecision } from '../policy/types.js' ;
import { MessageBus } from '../confirmation-bus/message-bus.js' ;
import { Storage } from '../config/storage.js' ;
import type { AgentLoopContext } from '../config/agent-loop-context.js' ;
2026-04-22 16:07:39 -07:00
import { READ_FILE_TOOL_NAME } from '../tools/tool-names.js' ;
2026-04-13 10:44:52 -07:00
import {
applyParsedSkillPatches ,
hasParsedPatchHunks ,
} from './memoryPatchUtils.js' ;
2026-04-24 17:21:12 -07:00
import { sanitizeWorkflowSummaryForScratchpad } from './sessionScratchpadUtils.js' ;
2026-04-01 22:05:31 -07:00
const LOCK_FILENAME = '.extraction.lock' ;
const STATE_FILENAME = '.extraction-state.json' ;
const LOCK_STALE_MS = 35 * 60 * 1000 ; // 35 minutes (exceeds agent's 30-min time limit)
const MIN_USER_MESSAGES = 10 ;
const MIN_IDLE_MS = 3 * 60 * 60 * 1000 ; // 3 hours
const MAX_SESSION_INDEX_SIZE = 50 ;
2026-04-22 16:07:39 -07:00
const MAX_NEW_SESSION_BATCH_SIZE = 10 ;
2026-04-01 22:05:31 -07:00
/**
* Lock file content for coordinating across CLI instances.
*/
interface LockInfo {
pid : number ;
startedAt : string ;
}
2026-04-22 16:07:39 -07:00
interface SessionVersion {
sessionId : string ;
lastUpdated : string ;
}
interface IndexedSession extends SessionVersion {
filePath : string ;
summary? : string ;
2026-04-24 17:21:12 -07:00
memoryScratchpad? : MemoryScratchpad ;
2026-04-22 16:07:39 -07:00
userMessageCount : number ;
}
2026-04-01 22:05:31 -07:00
/**
* Metadata for a single extraction run.
*/
export interface ExtractionRun {
runAt : string ;
sessionIds : string [ ] ;
2026-04-22 16:07:39 -07:00
candidateSessions? : SessionVersion [ ] ;
processedSessions? : SessionVersion [ ] ;
2026-04-01 22:05:31 -07:00
skillsCreated : string [ ] ;
2026-04-24 17:21:12 -07:00
turnCount? : number ;
durationMs? : number ;
terminateReason? : string ;
2026-04-01 22:05:31 -07:00
}
/**
* Tracks extraction history with per-run metadata.
*/
export interface ExtractionState {
runs : ExtractionRun [ ] ;
}
/**
* Returns all session IDs that have been processed across all runs.
*/
export function getProcessedSessionIds ( state : ExtractionState ) : Set < string > {
const ids = new Set < string > ( ) ;
for ( const run of state . runs ) {
2026-04-22 16:07:39 -07:00
const processedSessionIds =
run . processedSessions ? . map ( ( session ) = > session . sessionId ) ? ?
run . sessionIds ;
for ( const id of processedSessionIds ) {
2026-04-01 22:05:31 -07:00
ids . add ( id ) ;
}
}
return ids ;
}
function isLockInfo ( value : unknown ) : value is LockInfo {
return (
typeof value === 'object' &&
value !== null &&
'pid' in value &&
typeof value . pid === 'number' &&
'startedAt' in value &&
typeof value . startedAt === 'string'
) ;
}
2026-04-22 16:07:39 -07:00
function isSessionVersion ( value : unknown ) : value is SessionVersion {
2026-04-01 22:05:31 -07:00
return (
typeof value === 'object' &&
value !== null &&
'sessionId' in value &&
typeof value . sessionId === 'string' &&
2026-04-22 16:07:39 -07:00
'lastUpdated' in value &&
typeof value . lastUpdated === 'string'
2026-04-01 22:05:31 -07:00
) ;
}
2026-04-22 16:07:39 -07:00
function normalizeSessionVersions ( value : unknown ) : SessionVersion [ ] {
if ( ! Array . isArray ( value ) ) {
return [ ] ;
}
return value . filter ( isSessionVersion ) . map ( ( session ) = > ( {
sessionId : session.sessionId ,
lastUpdated : session.lastUpdated ,
} ) ) ;
}
function normalizeStringArray ( value : unknown ) : string [ ] {
if ( ! Array . isArray ( value ) ) {
return [ ] ;
}
return value . filter ( ( item ) : item is string = > typeof item === 'string' ) ;
}
2026-04-24 17:21:12 -07:00
function normalizeOptionalNumber ( value : unknown ) : number | undefined {
return typeof value === 'number' && Number . isFinite ( value )
? value
: undefined ;
}
function normalizeOptionalString ( value : unknown ) : string | undefined {
return typeof value === 'string' ? value : undefined ;
}
2026-04-22 16:07:39 -07:00
function isExtractionRunLike ( value : unknown ) : value is {
runAt : string ;
sessionIds? : unknown ;
candidateSessions? : unknown ;
processedSessions? : unknown ;
skillsCreated : unknown ;
2026-04-24 17:21:12 -07:00
turnCount? : unknown ;
durationMs? : unknown ;
terminateReason? : unknown ;
2026-04-22 16:07:39 -07:00
} {
2026-04-01 22:05:31 -07:00
return (
typeof value === 'object' &&
value !== null &&
'runAt' in value &&
typeof value . runAt === 'string' &&
2026-04-22 16:07:39 -07:00
'skillsCreated' in value
2026-04-01 22:05:31 -07:00
) ;
}
function isExtractionState ( value : unknown ) : value is { runs : unknown [ ] } {
return (
typeof value === 'object' &&
value !== null &&
'runs' in value &&
Array . isArray ( value . runs )
) ;
}
2026-04-22 16:07:39 -07:00
function buildExtractionRun ( value : unknown ) : ExtractionRun | null {
if ( ! isExtractionRunLike ( value ) ) {
return null ;
}
const candidateSessions = normalizeSessionVersions ( value . candidateSessions ) ;
const processedSessions = normalizeSessionVersions ( value . processedSessions ) ;
const sessionIds = normalizeStringArray ( value . sessionIds ) ;
return {
runAt : value.runAt ,
sessionIds :
sessionIds.length > 0
? sessionIds
: processedSessions.map ( ( session ) = > session . sessionId ) ,
candidateSessions :
candidateSessions.length > 0 ? candidateSessions : undefined ,
processedSessions :
processedSessions.length > 0 ? processedSessions : undefined ,
skillsCreated : normalizeStringArray ( value . skillsCreated ) ,
2026-04-24 17:21:12 -07:00
turnCount : normalizeOptionalNumber ( value . turnCount ) ,
durationMs : normalizeOptionalNumber ( value . durationMs ) ,
terminateReason : normalizeOptionalString ( value . terminateReason ) ,
2026-04-22 16:07:39 -07:00
} ;
}
function getTimestampMs ( timestamp : string ) : number {
const parsed = Date . parse ( timestamp ) ;
return Number . isNaN ( parsed ) ? 0 : parsed ;
}
function getSessionVersionKey ( session : SessionVersion ) : string {
return ` ${ session . sessionId } \ u0000 ${ session . lastUpdated } ` ;
}
function hasLegacyRunProcessedSession (
run : ExtractionRun ,
session : SessionVersion ,
) : boolean {
return (
run . sessionIds . includes ( session . sessionId ) &&
getTimestampMs ( run . runAt ) >= getTimestampMs ( session . lastUpdated )
) ;
}
function isSessionVersionProcessed (
state : ExtractionState ,
session : SessionVersion ,
) : boolean {
const sessionKey = getSessionVersionKey ( session ) ;
for ( const run of state . runs ) {
if (
run . processedSessions ? . some (
( processed ) = > getSessionVersionKey ( processed ) === sessionKey ,
)
) {
return true ;
}
if ( ! run . processedSessions && hasLegacyRunProcessedSession ( run , session ) ) {
return true ;
}
}
return false ;
}
function getSessionAttemptCount (
state : ExtractionState ,
session : SessionVersion ,
) : number {
const sessionKey = getSessionVersionKey ( session ) ;
let attempts = 0 ;
for ( const run of state . runs ) {
if ( run . candidateSessions ) {
if (
run . candidateSessions . some (
( candidate ) = > getSessionVersionKey ( candidate ) === sessionKey ,
)
) {
attempts ++ ;
}
continue ;
}
if ( hasLegacyRunProcessedSession ( run , session ) ) {
attempts ++ ;
}
}
return attempts ;
}
function compareIndexedSessions ( a : IndexedSession , b : IndexedSession ) : number {
const timestampDelta =
getTimestampMs ( b . lastUpdated ) - getTimestampMs ( a . lastUpdated ) ;
if ( timestampDelta !== 0 ) {
return timestampDelta ;
}
if ( a . filePath . endsWith ( '.jsonl' ) !== b . filePath . endsWith ( '.jsonl' ) ) {
return a . filePath . endsWith ( '.jsonl' ) ? - 1 : 1 ;
}
return b . filePath . localeCompare ( a . filePath ) ;
}
function shouldReplaceIndexedSession (
existing : IndexedSession ,
candidate : IndexedSession ,
) : boolean {
return compareIndexedSessions ( candidate , existing ) < 0 ;
}
2026-04-24 17:21:12 -07:00
function isReadFileActivity (
2026-04-22 16:07:39 -07:00
activity : SubagentActivityEvent ,
) : activity is SubagentActivityEvent & {
data : { name : string ; args ? : { file_path? : unknown } ; callId? : unknown } ;
} {
return (
activity . type === 'TOOL_CALL_START' &&
activity . data [ 'name' ] === READ_FILE_TOOL_NAME
) ;
}
2026-04-24 17:21:12 -07:00
function getReadFileCallId ( activity : SubagentActivityEvent ) : string | null {
if ( isReadFileActivity ( activity ) ) {
const { callId } = activity . data ;
return typeof callId === 'string' ? callId : null ;
2026-04-22 16:07:39 -07:00
}
if (
2026-04-24 17:21:12 -07:00
activity . type === 'TOOL_CALL_END' &&
activity . data [ 'name' ] === READ_FILE_TOOL_NAME
2026-04-22 16:07:39 -07:00
) {
2026-04-24 17:21:12 -07:00
const id = activity . data [ 'id' ] ;
return typeof id === 'string' ? id : null ;
2026-04-22 16:07:39 -07:00
}
if (
2026-04-24 17:21:12 -07:00
activity . type === 'ERROR' &&
activity . data [ 'name' ] === READ_FILE_TOOL_NAME
2026-04-22 16:07:39 -07:00
) {
2026-04-24 17:21:12 -07:00
const callId = activity . data [ 'callId' ] ;
return typeof callId === 'string' ? callId : null ;
2026-04-22 16:07:39 -07:00
}
2026-04-24 17:21:12 -07:00
return null ;
2026-04-22 16:07:39 -07:00
}
2026-04-24 17:21:12 -07:00
function getResolvedActivityFilePath (
config : Config ,
2026-04-22 16:07:39 -07:00
activity : SubagentActivityEvent ,
) : string | null {
2026-04-24 17:21:12 -07:00
if ( ! isReadFileActivity ( activity ) ) {
2026-04-22 16:07:39 -07:00
return null ;
}
2026-04-24 17:21:12 -07:00
const args = activity . data . args ;
2026-04-22 16:07:39 -07:00
if (
2026-04-24 17:21:12 -07:00
typeof args !== 'object' ||
args === null ||
! ( 'file_path' in args ) ||
typeof args . file_path !== 'string'
2026-04-22 16:07:39 -07:00
) {
return null ;
}
2026-04-24 17:21:12 -07:00
const targetDir =
'getTargetDir' in config && typeof config . getTargetDir === 'function'
? config . getTargetDir ( )
: process . cwd ( ) ;
return path . resolve ( targetDir , args . file_path ) ;
2026-04-22 16:07:39 -07:00
}
function getUserMessageCount (
conversation : ConversationRecord & { userMessageCount? : number } ,
) : number {
return (
conversation . userMessageCount ? ?
conversation . messages . filter ( ( message ) = > message . type === 'user' ) . length
) ;
}
function isSupportedSessionFile ( fileName : string ) : boolean {
return (
fileName . startsWith ( SESSION_FILE_PREFIX ) &&
( fileName . endsWith ( '.json' ) || fileName . endsWith ( '.jsonl' ) )
) ;
}
2026-04-01 22:05:31 -07:00
/**
* Attempts to acquire an exclusive lock file using O_CREAT | O_EXCL.
* Returns true if the lock was acquired, false if another instance owns it.
*/
export async function tryAcquireLock (
lockPath : string ,
retries = 1 ,
) : Promise < boolean > {
const lockInfo : LockInfo = {
pid : process.pid ,
startedAt : new Date ( ) . toISOString ( ) ,
} ;
try {
// Atomic create-if-not-exists
const fd = await fs . open (
lockPath ,
fsConstants . O_CREAT | fsConstants . O_EXCL | fsConstants . O_WRONLY ,
) ;
try {
await fd . writeFile ( JSON . stringify ( lockInfo ) ) ;
} finally {
await fd . close ( ) ;
}
return true ;
} catch ( error : unknown ) {
if ( isNodeError ( error ) && error . code === 'EEXIST' ) {
// Lock exists — check if it's stale
if ( retries > 0 && ( await isLockStale ( lockPath ) ) ) {
debugLogger . debug ( '[MemoryService] Cleaning up stale lock file' ) ;
await releaseLock ( lockPath ) ;
return tryAcquireLock ( lockPath , retries - 1 ) ;
}
debugLogger . debug (
'[MemoryService] Lock held by another instance, skipping' ,
) ;
return false ;
}
throw error ;
}
}
/**
* Checks if a lock file is stale (owner PID is dead or lock is too old).
*/
export async function isLockStale ( lockPath : string ) : Promise < boolean > {
try {
const content = await fs . readFile ( lockPath , 'utf-8' ) ;
const parsed : unknown = JSON . parse ( content ) ;
if ( ! isLockInfo ( parsed ) ) {
return true ; // Invalid lock data — treat as stale
}
const lockInfo = parsed ;
// Check if PID is still alive
try {
process . kill ( lockInfo . pid , 0 ) ;
} catch {
// PID is dead — lock is stale
return true ;
}
// Check if lock is too old
const lockAge = Date . now ( ) - new Date ( lockInfo . startedAt ) . getTime ( ) ;
if ( lockAge > LOCK_STALE_MS ) {
return true ;
}
return false ;
} catch {
// Can't read lock — treat as stale
return true ;
}
}
/**
* Releases the lock file.
*/
export async function releaseLock ( lockPath : string ) : Promise < void > {
try {
await fs . unlink ( lockPath ) ;
} catch ( error : unknown ) {
if ( isNodeError ( error ) && error . code === 'ENOENT' ) {
return ; // Already removed
}
debugLogger . warn (
` [MemoryService] Failed to release lock: ${ error instanceof Error ? error.message : String ( error ) } ` ,
) ;
}
}
/**
* Reads the extraction state file, or returns a default state.
*/
export async function readExtractionState (
statePath : string ,
) : Promise < ExtractionState > {
try {
const content = await fs . readFile ( statePath , 'utf-8' ) ;
const parsed : unknown = JSON . parse ( content ) ;
if ( ! isExtractionState ( parsed ) ) {
return { runs : [ ] } ;
}
const runs : ExtractionRun [ ] = [ ] ;
for ( const run of parsed . runs ) {
2026-04-22 16:07:39 -07:00
const normalizedRun = buildExtractionRun ( run ) ;
if ( ! normalizedRun ) continue ;
runs . push ( normalizedRun ) ;
2026-04-01 22:05:31 -07:00
}
return { runs } ;
} catch ( error ) {
debugLogger . debug (
'[MemoryService] Failed to read extraction state:' ,
error ,
) ;
return { runs : [ ] } ;
}
}
/**
* Writes the extraction state atomically (temp file + rename).
*/
export async function writeExtractionState (
statePath : string ,
state : ExtractionState ,
) : Promise < void > {
const tmpPath = ` ${ statePath } .tmp ` ;
await fs . writeFile ( tmpPath , JSON . stringify ( state , null , 2 ) ) ;
await fs . rename ( tmpPath , statePath ) ;
}
/**
* Determines if a conversation record should be considered for processing.
* Filters out subagent sessions, sessions that haven't been idle long enough,
* and sessions with too few user messages.
*/
2026-04-22 16:07:39 -07:00
function shouldProcessConversation (
parsed : ConversationRecord & { userMessageCount? : number } ,
) : boolean {
2026-04-01 22:05:31 -07:00
// Skip subagent sessions
if ( parsed . kind === 'subagent' ) return false ;
// Skip sessions that are still active (not idle for 3+ hours)
2026-04-22 16:07:39 -07:00
const lastUpdated = getTimestampMs ( parsed . lastUpdated ) ;
2026-04-01 22:05:31 -07:00
if ( Date . now ( ) - lastUpdated < MIN_IDLE_MS ) return false ;
// Skip sessions with too few user messages
2026-04-22 16:07:39 -07:00
if ( getUserMessageCount ( parsed ) < MIN_USER_MESSAGES ) return false ;
2026-04-01 22:05:31 -07:00
return true ;
}
/**
2026-04-22 16:07:39 -07:00
* Scans the chats directory for eligible session files, loading metadata from
* both JSONL and legacy JSON sessions, deduplicating migrated sessions by
* session ID, and sorting by actual lastUpdated. We scan the full directory
* here so already-processed recent sessions cannot permanently block older
* backlog sessions from surfacing as new candidates.
2026-04-01 22:05:31 -07:00
*/
async function scanEligibleSessions (
chatsDir : string ,
2026-04-22 16:07:39 -07:00
) : Promise < IndexedSession [ ] > {
2026-04-01 22:05:31 -07:00
let allFiles : string [ ] ;
try {
allFiles = await fs . readdir ( chatsDir ) ;
} catch {
return [ ] ;
}
2026-04-22 16:07:39 -07:00
const candidates : Array < { filePath : string ; mtimeMs : number } > = [ ] ;
for ( const file of allFiles ) {
if ( ! isSupportedSessionFile ( file ) ) continue ;
const filePath = path . join ( chatsDir , file ) ;
try {
const stat = await fs . stat ( filePath ) ;
if ( ! stat . isFile ( ) ) continue ;
candidates . push ( { filePath , mtimeMs : stat.mtimeMs } ) ;
} catch {
// Skip files that disappeared between readdir and stat.
}
}
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
candidates . sort ( ( a , b ) = > b . mtimeMs - a . mtimeMs ) ;
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
const latestBySessionId = new Map < string , IndexedSession > ( ) ;
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
for ( const { filePath } of candidates ) {
2026-04-01 22:05:31 -07:00
try {
2026-04-22 16:07:39 -07:00
const conversation = await loadConversationRecord ( filePath , {
metadataOnly : true ,
} ) ;
if ( ! conversation || ! shouldProcessConversation ( conversation ) ) continue ;
const indexedSession : IndexedSession = {
sessionId : conversation.sessionId ,
lastUpdated : conversation.lastUpdated ,
filePath ,
summary : conversation.summary ,
2026-04-24 17:21:12 -07:00
memoryScratchpad :
conversation.memoryScratchpadIsStale === true
? undefined
: conversation . memoryScratchpad ,
2026-04-22 16:07:39 -07:00
userMessageCount : getUserMessageCount ( conversation ) ,
} ;
const existing = latestBySessionId . get ( indexedSession . sessionId ) ;
if ( ! existing || shouldReplaceIndexedSession ( existing , indexedSession ) ) {
latestBySessionId . set ( indexedSession . sessionId , indexedSession ) ;
}
2026-04-01 22:05:31 -07:00
} catch {
// Skip unreadable files
}
}
2026-04-22 16:07:39 -07:00
return Array . from ( latestBySessionId . values ( ) ) . sort ( compareIndexedSessions ) ;
2026-04-01 22:05:31 -07:00
}
2026-04-24 17:21:12 -07:00
function formatSessionHeadline ( session : IndexedSession ) : string {
const rawWorkflowSummary = session . memoryScratchpad ? . workflowSummary ;
const sanitizedWorkflowSummary =
typeof rawWorkflowSummary === 'string'
? sanitizeWorkflowSummaryForScratchpad ( rawWorkflowSummary )
: undefined ;
const workflowSummary = sanitizedWorkflowSummary ? . trim ( )
? sanitizedWorkflowSummary
: undefined ;
const summary = session . summary ? ? workflowSummary ? ? '(no summary)' ;
if (
session . summary &&
workflowSummary &&
workflowSummary !== session . summary
) {
return ` ${ summary } | workflow: ${ workflowSummary } ` ;
}
return summary ;
}
2026-04-01 22:05:31 -07:00
/**
* Builds a session index for the extraction agent: a compact listing of all
* eligible sessions with their summary, file path, and new/previously-processed status.
* The agent can use read_file on paths to inspect sessions that look promising.
*
2026-04-22 16:07:39 -07:00
* Returns the index text, the list of selected new (unprocessed) session IDs,
* and the surfaced candidate sessions for this run.
2026-04-01 22:05:31 -07:00
*/
export async function buildSessionIndex (
chatsDir : string ,
state : ExtractionState ,
2026-04-22 16:07:39 -07:00
) : Promise < {
sessionIndex : string ;
newSessionIds : string [ ] ;
candidateSessions : IndexedSession [ ] ;
} > {
2026-04-01 22:05:31 -07:00
const eligible = await scanEligibleSessions ( chatsDir ) ;
if ( eligible . length === 0 ) {
2026-04-22 16:07:39 -07:00
return { sessionIndex : '' , newSessionIds : [ ] , candidateSessions : [ ] } ;
2026-04-01 22:05:31 -07:00
}
2026-04-22 16:07:39 -07:00
const newSessions : IndexedSession [ ] = [ ] ;
const oldSessions : IndexedSession [ ] = [ ] ;
for ( const session of eligible ) {
if ( isSessionVersionProcessed ( state , session ) ) {
oldSessions . push ( session ) ;
} else {
newSessions . push ( session ) ;
}
}
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
newSessions . sort ( ( a , b ) = > {
const attemptDelta =
getSessionAttemptCount ( state , a ) - getSessionAttemptCount ( state , b ) ;
if ( attemptDelta !== 0 ) {
return attemptDelta ;
2026-04-01 22:05:31 -07:00
}
2026-04-22 16:07:39 -07:00
return compareIndexedSessions ( a , b ) ;
} ) ;
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
const candidateSessions = newSessions . slice ( 0 , MAX_NEW_SESSION_BATCH_SIZE ) ;
const remainingSlots = Math . max (
0 ,
MAX_SESSION_INDEX_SIZE - candidateSessions . length ,
) ;
const displayedOldSessions = oldSessions . slice ( 0 , remainingSlots ) ;
const candidateSessionIds = new Set (
candidateSessions . map ( ( session ) = > getSessionVersionKey ( session ) ) ,
) ;
2026-04-01 22:05:31 -07:00
2026-04-22 16:07:39 -07:00
const lines = [ . . . candidateSessions , . . . displayedOldSessions ] . map (
( session ) = > {
const status = candidateSessionIds . has ( getSessionVersionKey ( session ) )
? '[NEW]'
: '[old]' ;
2026-04-24 17:21:12 -07:00
return ` ${ status } ${ formatSessionHeadline ( session ) } ( ${ session . userMessageCount } user msgs) — ${ session . filePath } ` ;
2026-04-22 16:07:39 -07:00
} ,
) ;
return {
sessionIndex : lines.join ( '\n' ) ,
newSessionIds : candidateSessions.map ( ( session ) = > session . sessionId ) ,
candidateSessions ,
} ;
2026-04-01 22:05:31 -07:00
}
/**
* Builds a summary of all existing skills — both memory-extracted skills
* in the skillsDir and globally/workspace-discovered skills from the SkillManager.
* This prevents the extraction agent from duplicating already-available skills.
*/
async function buildExistingSkillsSummary (
skillsDir : string ,
config : Config ,
) : Promise < string > {
const sections : string [ ] = [ ] ;
// 1. Memory-extracted skills (from previous runs)
const memorySkills : string [ ] = [ ] ;
try {
const entries = await fs . readdir ( skillsDir , { withFileTypes : true } ) ;
for ( const entry of entries ) {
if ( ! entry . isDirectory ( ) ) continue ;
const skillPath = path . join ( skillsDir , entry . name , 'SKILL.md' ) ;
try {
const content = await fs . readFile ( skillPath , 'utf-8' ) ;
const match = content . match ( FRONTMATTER_REGEX ) ;
if ( match ) {
const parsed = parseFrontmatter ( match [ 1 ] ) ;
const name = parsed ? . name ? ? entry . name ;
const desc = parsed ? . description ? ? '' ;
memorySkills . push ( ` - ** ${ name } **: ${ desc } ` ) ;
} else {
memorySkills . push ( ` - ** ${ entry . name } ** ` ) ;
}
} catch {
// Skill directory without SKILL.md, skip
}
}
} catch {
// Skills directory doesn't exist yet
}
if ( memorySkills . length > 0 ) {
sections . push (
` ## Previously Extracted Skills (in ${ skillsDir } ) \ n ${ memorySkills . join ( '\n' ) } ` ,
) ;
}
// 2. Discovered skills — categorize by source location
try {
const discoveredSkills = config . getSkillManager ( ) . getSkills ( ) ;
if ( discoveredSkills . length > 0 ) {
const userSkillsDir = Storage . getUserSkillsDir ( ) ;
const globalSkills : string [ ] = [ ] ;
const workspaceSkills : string [ ] = [ ] ;
const extensionSkills : string [ ] = [ ] ;
const builtinSkills : string [ ] = [ ] ;
for ( const s of discoveredSkills ) {
const loc = s . location ;
if ( loc . includes ( '/bundle/' ) || loc . includes ( '\\bundle\\' ) ) {
2026-04-13 10:44:52 -07:00
builtinSkills . push ( ` - ** ${ s . name } **: ${ s . description } ` ) ;
2026-04-01 22:05:31 -07:00
} else if ( loc . startsWith ( userSkillsDir ) ) {
2026-04-13 10:44:52 -07:00
globalSkills . push ( ` - ** ${ s . name } **: ${ s . description } ( ${ loc } ) ` ) ;
2026-04-01 22:05:31 -07:00
} else if (
loc . includes ( '/extensions/' ) ||
loc . includes ( '\\extensions\\' )
) {
2026-04-13 10:44:52 -07:00
extensionSkills . push ( ` - ** ${ s . name } **: ${ s . description } ` ) ;
2026-04-01 22:05:31 -07:00
} else {
2026-04-13 10:44:52 -07:00
workspaceSkills . push ( ` - ** ${ s . name } **: ${ s . description } ( ${ loc } ) ` ) ;
2026-04-01 22:05:31 -07:00
}
}
if ( globalSkills . length > 0 ) {
sections . push (
` ## Global Skills (~/.gemini/skills — do NOT duplicate) \ n ${ globalSkills . join ( '\n' ) } ` ,
) ;
}
if ( workspaceSkills . length > 0 ) {
sections . push (
` ## Workspace Skills (.gemini/skills — do NOT duplicate) \ n ${ workspaceSkills . join ( '\n' ) } ` ,
) ;
}
if ( extensionSkills . length > 0 ) {
sections . push (
` ## Extension Skills (from installed extensions — do NOT duplicate) \ n ${ extensionSkills . join ( '\n' ) } ` ,
) ;
}
if ( builtinSkills . length > 0 ) {
sections . push (
` ## Builtin Skills (bundled with CLI — do NOT duplicate) \ n ${ builtinSkills . join ( '\n' ) } ` ,
) ;
}
}
} catch {
// SkillManager not available
}
return sections . join ( '\n\n' ) ;
}
/**
* Builds an AgentLoopContext from a Config for background agent execution.
*/
function buildAgentLoopContext ( config : Config ) : AgentLoopContext {
// Create a PolicyEngine that auto-approves all tool calls so the
// background sub-agent never prompts the user for confirmation.
const autoApprovePolicy = new PolicyEngine ( {
rules : [
{
toolName : '*' ,
decision : PolicyDecision.ALLOW ,
priority : 100 ,
} ,
] ,
} ) ;
const autoApproveBus = new MessageBus ( autoApprovePolicy ) ;
return {
config ,
promptId : ` skill-extraction- ${ randomUUID ( ) . slice ( 0 , 8 ) } ` ,
toolRegistry : config.getToolRegistry ( ) ,
promptRegistry : new PromptRegistry ( ) ,
resourceRegistry : new ResourceRegistry ( ) ,
messageBus : autoApproveBus ,
geminiClient : config.getGeminiClient ( ) ,
sandboxManager : config.sandboxManager ,
} ;
}
2026-04-13 10:44:52 -07:00
/**
* Validates all .patch files in the skills directory using the `diff` library.
* Parses each patch, reads the target file(s), and attempts a dry-run apply.
* Removes patches that fail validation. Returns the filenames of valid patches.
*/
export async function validatePatches (
skillsDir : string ,
config : Config ,
) : Promise < string [ ] > {
let entries : string [ ] ;
try {
entries = await fs . readdir ( skillsDir ) ;
} catch {
return [ ] ;
}
const patchFiles = entries . filter ( ( e ) = > e . endsWith ( '.patch' ) ) ;
const validPatches : string [ ] = [ ] ;
for ( const patchFile of patchFiles ) {
const patchPath = path . join ( skillsDir , patchFile ) ;
let valid = true ;
let reason = '' ;
try {
const patchContent = await fs . readFile ( patchPath , 'utf-8' ) ;
const parsedPatches = Diff . parsePatch ( patchContent ) ;
if ( ! hasParsedPatchHunks ( parsedPatches ) ) {
valid = false ;
reason = 'no hunks found in patch' ;
} else {
const applied = await applyParsedSkillPatches ( parsedPatches , config ) ;
if ( ! applied . success ) {
valid = false ;
switch ( applied . reason ) {
case 'missingTargetPath' :
reason = 'missing target file path in patch header' ;
break ;
case 'invalidPatchHeaders' :
reason = 'invalid diff headers' ;
break ;
case 'outsideAllowedRoots' :
reason = ` target file is outside skill roots: ${ applied . targetPath } ` ;
break ;
case 'newFileAlreadyExists' :
reason = ` new file target already exists: ${ applied . targetPath } ` ;
break ;
case 'targetNotFound' :
reason = ` target file not found: ${ applied . targetPath } ` ;
break ;
case 'doesNotApply' :
reason = ` patch does not apply cleanly to ${ applied . targetPath } ` ;
break ;
default :
reason = 'unknown patch validation failure' ;
break ;
}
}
}
} catch ( err ) {
valid = false ;
reason = ` failed to read or parse patch: ${ err } ` ;
}
if ( valid ) {
validPatches . push ( patchFile ) ;
debugLogger . log ( ` [MemoryService] Patch validated: ${ patchFile } ` ) ;
} else {
debugLogger . warn (
` [MemoryService] Removing invalid patch ${ patchFile } : ${ reason } ` ,
) ;
try {
await fs . unlink ( patchPath ) ;
} catch {
// Best-effort cleanup
}
}
}
return validPatches ;
}
2026-04-01 22:05:31 -07:00
/**
* Main entry point for the skill extraction background task.
* Designed to be called fire-and-forget on session startup.
*
* Coordinates across multiple CLI instances via a lock file,
* scans past sessions for reusable patterns, and runs a sub-agent
* to extract and write SKILL.md files.
*/
export async function startMemoryService ( config : Config ) : Promise < void > {
const memoryDir = config . storage . getProjectMemoryTempDir ( ) ;
const skillsDir = config . storage . getProjectSkillsMemoryDir ( ) ;
const lockPath = path . join ( memoryDir , LOCK_FILENAME ) ;
const statePath = path . join ( memoryDir , STATE_FILENAME ) ;
const chatsDir = path . join ( config . storage . getProjectTempDir ( ) , 'chats' ) ;
// Ensure directories exist
await fs . mkdir ( skillsDir , { recursive : true } ) ;
debugLogger . log ( ` [MemoryService] Starting. Skills dir: ${ skillsDir } ` ) ;
// Try to acquire exclusive lock
if ( ! ( await tryAcquireLock ( lockPath ) ) ) {
debugLogger . log ( '[MemoryService] Skipped: lock held by another instance' ) ;
return ;
}
debugLogger . log ( '[MemoryService] Lock acquired' ) ;
// Register with ExecutionLifecycleService for background tracking
const abortController = new AbortController ( ) ;
const handle = ExecutionLifecycleService . createExecution (
'' , // no initial output
( ) = > abortController . abort ( ) , // onKill
'none' ,
undefined , // no format injection
'Skill extraction' ,
'silent' ,
) ;
const executionId = handle . pid ;
const startTime = Date . now ( ) ;
let completionResult : { error : Error } | undefined ;
try {
// Read extraction state
const state = await readExtractionState ( statePath ) ;
const previousRuns = state . runs . length ;
const previouslyProcessed = getProcessedSessionIds ( state ) . size ;
debugLogger . log (
` [MemoryService] State loaded: ${ previousRuns } previous run(s), ${ previouslyProcessed } session(s) already processed ` ,
) ;
// Build session index: all eligible sessions with summaries + file paths.
// The agent decides which to read in full via read_file.
2026-04-22 16:07:39 -07:00
const { sessionIndex , newSessionIds , candidateSessions } =
await buildSessionIndex ( chatsDir , state ) ;
2026-04-01 22:05:31 -07:00
const totalInIndex = sessionIndex ? sessionIndex . split ( '\n' ) . length : 0 ;
debugLogger . log (
2026-04-22 16:07:39 -07:00
` [MemoryService] Session scan: ${ totalInIndex } indexed session(s), ${ candidateSessions . length } surfaced as new candidates ` ,
2026-04-01 22:05:31 -07:00
) ;
if ( newSessionIds . length === 0 ) {
debugLogger . log ( '[MemoryService] Skipped: no new sessions to process' ) ;
return ;
}
// Snapshot existing skill directories before extraction
const skillsBefore = new Set < string > ( ) ;
2026-04-13 10:44:52 -07:00
const patchContentsBefore = new Map < string , string > ( ) ;
2026-04-01 22:05:31 -07:00
try {
const entries = await fs . readdir ( skillsDir ) ;
for ( const e of entries ) {
2026-04-13 10:44:52 -07:00
if ( e . endsWith ( '.patch' ) ) {
try {
patchContentsBefore . set (
e ,
await fs . readFile ( path . join ( skillsDir , e ) , 'utf-8' ) ,
) ;
} catch {
// Ignore unreadable existing patches.
}
continue ;
}
2026-04-01 22:05:31 -07:00
skillsBefore . add ( e ) ;
}
} catch {
// Empty skills dir
}
debugLogger . log (
` [MemoryService] ${ skillsBefore . size } existing skill(s) in memory ` ,
) ;
// Read existing skills for context (memory-extracted + global/workspace)
const existingSkillsSummary = await buildExistingSkillsSummary (
skillsDir ,
config ,
) ;
if ( existingSkillsSummary ) {
debugLogger . log (
` [MemoryService] Existing skills context: \ n ${ existingSkillsSummary } ` ,
) ;
}
// Build agent definition and context
const agentDefinition = SkillExtractionAgent (
skillsDir ,
sessionIndex ,
existingSkillsSummary ,
) ;
const context = buildAgentLoopContext ( config ) ;
// Register the agent's model config since it's not going through AgentRegistry.
const modelAlias = getModelConfigAlias ( agentDefinition ) ;
config . modelConfigService . registerRuntimeModelConfig ( modelAlias , {
modelConfig : agentDefinition.modelConfig ,
} ) ;
debugLogger . log (
` [MemoryService] Starting extraction agent (model: ${ agentDefinition . modelConfig . model } , maxTurns: 30, maxTime: 30min) ` ,
) ;
2026-04-22 16:07:39 -07:00
const candidateSessionsByPath = new Map (
candidateSessions . map ( ( session ) = > [
path . resolve ( session . filePath ) ,
session ,
] ) ,
) ;
2026-04-24 17:21:12 -07:00
const pendingReadFileSessions = new Map < string , SessionVersion > ( ) ;
2026-04-22 16:07:39 -07:00
const processedSessionKeys = new Set < string > ( ) ;
2026-04-01 22:05:31 -07:00
// Create and run the extraction agent
2026-04-22 16:07:39 -07:00
const executor = await LocalAgentExecutor . create (
agentDefinition ,
context ,
( activity ) = > {
2026-04-24 17:21:12 -07:00
const readFileCallId = getReadFileCallId ( activity ) ;
if ( activity . type === 'TOOL_CALL_START' ) {
const resolvedPath = getResolvedActivityFilePath ( config , activity ) ;
if ( ! resolvedPath || ! readFileCallId ) {
2026-04-22 16:07:39 -07:00
return ;
}
const session = candidateSessionsByPath . get ( resolvedPath ) ;
if ( ! session ) {
return ;
}
2026-04-24 17:21:12 -07:00
pendingReadFileSessions . set ( readFileCallId , session ) ;
2026-04-22 16:07:39 -07:00
return ;
}
2026-04-24 17:21:12 -07:00
if ( ! readFileCallId ) {
return ;
}
2026-04-22 16:07:39 -07:00
2026-04-24 17:21:12 -07:00
const session = pendingReadFileSessions . get ( readFileCallId ) ;
if ( ! session ) {
2026-04-22 16:07:39 -07:00
return ;
}
2026-04-24 17:21:12 -07:00
pendingReadFileSessions . delete ( readFileCallId ) ;
if (
activity . type === 'TOOL_CALL_END' &&
! isToolActivityError ( activity . data [ 'data' ] )
) {
processedSessionKeys . add ( getSessionVersionKey ( session ) ) ;
2026-04-22 16:07:39 -07:00
}
} ,
) ;
2026-04-01 22:05:31 -07:00
2026-04-24 17:21:12 -07:00
const executorResult = await executor . run (
2026-04-01 22:05:31 -07:00
{ request : 'Extract skills from the provided sessions.' } ,
abortController . signal ,
) ;
const elapsed = ( ( Date . now ( ) - startTime ) / 1000 ) . toFixed ( 1 ) ;
// Diff skills directory to find newly created skills
const skillsCreated : string [ ] = [ ] ;
try {
const entriesAfter = await fs . readdir ( skillsDir ) ;
for ( const e of entriesAfter ) {
2026-04-13 10:44:52 -07:00
if ( ! skillsBefore . has ( e ) && ! e . endsWith ( '.patch' ) ) {
2026-04-01 22:05:31 -07:00
skillsCreated . push ( e ) ;
}
}
} catch {
// Skills dir read failed
}
2026-04-13 10:44:52 -07:00
// Validate any .patch files the agent generated
const validPatches = await validatePatches ( skillsDir , config ) ;
const patchesCreatedThisRun : string [ ] = [ ] ;
for ( const patchFile of validPatches ) {
const patchPath = path . join ( skillsDir , patchFile ) ;
let currentContent : string ;
try {
currentContent = await fs . readFile ( patchPath , 'utf-8' ) ;
} catch {
continue ;
}
if ( patchContentsBefore . get ( patchFile ) !== currentContent ) {
patchesCreatedThisRun . push ( patchFile ) ;
}
}
if ( validPatches . length > 0 ) {
debugLogger . log (
` [MemoryService] ${ validPatches . length } valid patch(es) currently in inbox; ${ patchesCreatedThisRun . length } created or updated this run ` ,
) ;
}
2026-04-22 16:07:39 -07:00
const processedSessions = candidateSessions
. filter ( ( session ) = >
processedSessionKeys . has ( getSessionVersionKey ( session ) ) ,
)
. map ( ( session ) = > ( {
sessionId : session.sessionId ,
lastUpdated : session.lastUpdated ,
} ) ) ;
2026-04-01 22:05:31 -07:00
// Record the run with full metadata
const run : ExtractionRun = {
runAt : new Date ( ) . toISOString ( ) ,
2026-04-22 16:07:39 -07:00
sessionIds : processedSessions.map ( ( session ) = > session . sessionId ) ,
candidateSessions : candidateSessions.map ( ( session ) = > ( {
sessionId : session.sessionId ,
lastUpdated : session.lastUpdated ,
} ) ) ,
processedSessions ,
2026-04-01 22:05:31 -07:00
skillsCreated ,
2026-04-24 17:21:12 -07:00
turnCount : normalizeOptionalNumber ( executorResult ? . turn_count ) ,
durationMs : normalizeOptionalNumber ( executorResult ? . duration_ms ) ,
terminateReason : normalizeOptionalString (
executorResult ? . terminate_reason ,
) ,
2026-04-01 22:05:31 -07:00
} ;
const updatedState : ExtractionState = {
runs : [ . . . state . runs , run ] ,
} ;
await writeExtractionState ( statePath , updatedState ) ;
2026-04-13 10:44:52 -07:00
if ( skillsCreated . length > 0 || patchesCreatedThisRun . length > 0 ) {
const completionParts : string [ ] = [ ] ;
if ( skillsCreated . length > 0 ) {
completionParts . push (
` created ${ skillsCreated . length } skill(s): ${ skillsCreated . join ( ', ' ) } ` ,
) ;
}
if ( patchesCreatedThisRun . length > 0 ) {
completionParts . push (
` prepared ${ patchesCreatedThisRun . length } patch(es): ${ patchesCreatedThisRun . join ( ', ' ) } ` ,
) ;
}
2026-04-01 22:05:31 -07:00
debugLogger . log (
2026-04-22 16:07:39 -07:00
` [MemoryService] Completed in ${ elapsed } s. ${ completionParts . join ( '; ' ) } (read ${ processedSessions . length } / ${ candidateSessions . length } surfaced session(s)) ` ,
2026-04-01 22:05:31 -07:00
) ;
2026-04-13 10:44:52 -07:00
const feedbackParts : string [ ] = [ ] ;
if ( skillsCreated . length > 0 ) {
feedbackParts . push (
` ${ skillsCreated . length } new skill ${ skillsCreated . length > 1 ? 's' : '' } extracted from past sessions: ${ skillsCreated . join ( ', ' ) } ` ,
) ;
}
if ( patchesCreatedThisRun . length > 0 ) {
feedbackParts . push (
` ${ patchesCreatedThisRun . length } skill update ${ patchesCreatedThisRun . length > 1 ? 's' : '' } extracted from past sessions ` ,
) ;
}
2026-04-08 11:08:49 -07:00
coreEvents . emitFeedback (
'info' ,
2026-04-13 10:44:52 -07:00
` ${ feedbackParts . join ( '. ' ) } . Use /memory inbox to review. ` ,
2026-04-08 11:08:49 -07:00
) ;
2026-04-01 22:05:31 -07:00
} else {
debugLogger . log (
2026-04-22 16:07:39 -07:00
` [MemoryService] Completed in ${ elapsed } s. No new skills or patches created (read ${ processedSessions . length } / ${ candidateSessions . length } surfaced session(s)) ` ,
2026-04-01 22:05:31 -07:00
) ;
}
} catch ( error ) {
const elapsed = ( ( Date . now ( ) - startTime ) / 1000 ) . toFixed ( 1 ) ;
if ( abortController . signal . aborted ) {
debugLogger . log ( ` [MemoryService] Cancelled after ${ elapsed } s ` ) ;
} else {
debugLogger . log (
` [MemoryService] Failed after ${ elapsed } s: ${ error instanceof Error ? error.message : String ( error ) } ` ,
) ;
}
completionResult = {
error : error instanceof Error ? error : new Error ( String ( error ) ) ,
} ;
return ;
} finally {
await releaseLock ( lockPath ) ;
debugLogger . log ( '[MemoryService] Lock released' ) ;
if ( executionId !== undefined ) {
ExecutionLifecycleService . completeExecution (
executionId ,
completionResult ,
) ;
}
}
}