improve(core): require recurrence evidence before extracting skills (#25147)

This commit is contained in:
Sandy Tao
2026-04-15 11:45:31 -07:00
committed by GitHub
parent 5333e5ab20
commit 485f3d92d8
4 changed files with 502 additions and 22 deletions

View File

@@ -16,6 +16,7 @@ import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import { randomUUID } from 'node:crypto';
import { vi } from 'vitest';
import {
Config,
type ConfigParameters,
@@ -52,6 +53,7 @@ export interface ComponentEvalCase extends BaseEvalCase {
export class ComponentRig {
public config: Config | undefined;
public testDir: string;
public homeDir: string;
public sessionId: string;
constructor(
@@ -61,6 +63,9 @@ export class ComponentRig {
this.testDir = fs.mkdtempSync(
path.join(os.tmpdir(), `gemini-component-rig-${uniqueId.slice(0, 8)}-`),
);
this.homeDir = fs.mkdtempSync(
path.join(os.tmpdir(), `gemini-component-home-${uniqueId.slice(0, 8)}-`),
);
this.sessionId = `test-session-${uniqueId}`;
}
@@ -89,12 +94,23 @@ export class ComponentRig {
this.config = makeFakeConfig(configParams);
await this.config.initialize();
// Refresh auth using USE_GEMINI to initialize the real BaseLlmClient
// Refresh auth using USE_GEMINI to initialize the real BaseLlmClient.
// This must happen BEFORE stubbing GEMINI_CLI_HOME because OAuth credential
// lookup resolves through homedir() → GEMINI_CLI_HOME.
await this.config.refreshAuth(AuthType.USE_GEMINI);
// Isolate storage paths (session files, skills, extraction state) by
// pointing GEMINI_CLI_HOME at a per-test temp directory. Storage resolves
// global paths through `homedir()` which reads this env var. This is set
// after auth so credential lookup uses the real home directory.
vi.stubEnv('GEMINI_CLI_HOME', this.homeDir);
}
async cleanup() {
await this.config?.dispose();
vi.unstubAllEnvs();
fs.rmSync(this.testDir, { recursive: true, force: true });
fs.rmSync(this.homeDir, { recursive: true, force: true });
}
}

View File

@@ -0,0 +1,341 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fsp from 'node:fs/promises';
import path from 'node:path';
import { describe, expect } from 'vitest';
import {
type Config,
ApprovalMode,
SESSION_FILE_PREFIX,
getProjectHash,
startMemoryService,
} from '@google/gemini-cli-core';
import { componentEvalTest } from './component-test-helper.js';
interface SeedSession {
sessionId: string;
summary: string;
userTurns: string[];
timestampOffsetMinutes: number;
}
interface MessageRecord {
id: string;
timestamp: string;
type: string;
content: Array<{ text: string }>;
}
const WORKSPACE_FILES = {
'package.json': JSON.stringify(
{
name: 'skill-extraction-eval',
private: true,
scripts: {
build: 'echo build',
lint: 'echo lint',
test: 'echo test',
},
},
null,
2,
),
'README.md': `# Skill Extraction Eval
This workspace exists to exercise background skill extraction from prior chats.
`,
};
function buildMessages(userTurns: string[]): MessageRecord[] {
const baseTime = new Date(Date.now() - 6 * 60 * 60 * 1000).toISOString();
return userTurns.flatMap((text, index) => [
{
id: `u${index + 1}`,
timestamp: baseTime,
type: 'user',
content: [{ text }],
},
{
id: `a${index + 1}`,
timestamp: baseTime,
type: 'gemini',
content: [{ text: `Acknowledged: ${index + 1}` }],
},
]);
}
async function seedSessions(
config: Config,
sessions: SeedSession[],
): Promise<void> {
const chatsDir = path.join(config.storage.getProjectTempDir(), 'chats');
await fsp.mkdir(chatsDir, { recursive: true });
const projectRoot = config.storage.getProjectRoot();
for (const session of sessions) {
const timestamp = new Date(
Date.now() - session.timestampOffsetMinutes * 60 * 1000,
)
.toISOString()
.slice(0, 16)
.replace(/:/g, '-');
const filename = `${SESSION_FILE_PREFIX}${timestamp}-${session.sessionId.slice(0, 8)}.json`;
const conversation = {
sessionId: session.sessionId,
projectHash: getProjectHash(projectRoot),
summary: session.summary,
startTime: new Date(Date.now() - 7 * 60 * 60 * 1000).toISOString(),
lastUpdated: new Date(Date.now() - 4 * 60 * 60 * 1000).toISOString(),
messages: buildMessages(session.userTurns),
};
await fsp.writeFile(
path.join(chatsDir, filename),
JSON.stringify(conversation, null, 2),
);
}
}
async function runExtractionAndReadState(config: Config): Promise<{
state: { runs: Array<{ sessionIds: string[]; skillsCreated: string[] }> };
skillsDir: string;
}> {
await startMemoryService(config);
const memoryDir = config.storage.getProjectMemoryTempDir();
const skillsDir = config.storage.getProjectSkillsMemoryDir();
const statePath = path.join(memoryDir, '.extraction-state.json');
const raw = await fsp.readFile(statePath, 'utf-8');
const state = JSON.parse(raw) as {
runs?: Array<{ sessionIds?: string[]; skillsCreated?: string[] }>;
};
if (!Array.isArray(state.runs) || state.runs.length === 0) {
throw new Error('Skill extraction finished without writing any run state');
}
return {
state: {
runs: state.runs.map((run) => ({
sessionIds: Array.isArray(run.sessionIds) ? run.sessionIds : [],
skillsCreated: Array.isArray(run.skillsCreated)
? run.skillsCreated
: [],
})),
},
skillsDir,
};
}
async function readSkillBodies(skillsDir: string): Promise<string[]> {
try {
const entries = await fsp.readdir(skillsDir, { withFileTypes: true });
const skillDirs = entries.filter((entry) => entry.isDirectory());
const bodies = await Promise.all(
skillDirs.map((entry) =>
fsp.readFile(path.join(skillsDir, entry.name, 'SKILL.md'), 'utf-8'),
),
);
return bodies;
} catch {
return [];
}
}
/**
* Shared configOverrides for all skill extraction component evals.
* - experimentalMemoryManager: enables the memory extraction pipeline.
* - approvalMode: YOLO auto-approves tool calls (write_file, read_file) so the
* background agent can execute without interactive confirmation.
*/
const EXTRACTION_CONFIG_OVERRIDES = {
experimentalMemoryManager: true,
approvalMode: ApprovalMode.YOLO,
};
describe('Skill Extraction', () => {
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
name: 'ignores one-off incidents even when session summaries look similar',
files: WORKSPACE_FILES,
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await seedSessions(config, [
{
sessionId: 'incident-login-redirect',
summary: 'Debug login redirect loop in staging',
timestampOffsetMinutes: 420,
userTurns: [
'We only need a one-off fix for incident INC-4412 on branch hotfix/login-loop.',
'The exact failing string is ERR_REDIRECT_4412 and this workaround is incident-specific.',
'Patch packages/auth/src/redirect.ts just for this branch and do not generalize it.',
'The thing that worked was deleting the stale staging cookie before retrying.',
'This is not a normal workflow and should not become a reusable instruction.',
'It only reproduced against the 2026-04-08 staging rollout.',
'After the cookie clear, the branch-specific redirect logic passed.',
'Do not turn this incident writeup into a standing process.',
'Yes, the hotfix worked for this exact redirect-loop incident.',
'Close out INC-4412 once the staging login succeeds again.',
],
},
{
sessionId: 'incident-login-timeout',
summary: 'Debug login callback timeout in staging',
timestampOffsetMinutes: 360,
userTurns: [
'This is another one-off staging incident, this time TICKET-991 for callback timeout.',
'The exact failing string is ERR_CALLBACK_TIMEOUT_991 and it is unrelated to the redirect loop.',
'The temporary fix was rotating the staging secret and deleting a bad feature-flag row.',
'Do not write a generic login-debugging playbook from this.',
'This only applied to the callback timeout during the April rollout.',
'The successful fix was specific to the stale secret in staging.',
'It does not define a durable repo workflow for future tasks.',
'After rotating the secret, the callback timeout stopped reproducing.',
'Treat this as incident response only, not a reusable skill.',
'Once staging passed again, we closed TICKET-991.',
],
},
]);
},
assert: async (config) => {
const { state, skillsDir } = await runExtractionAndReadState(config);
const skillBodies = await readSkillBodies(skillsDir);
expect(state.runs).toHaveLength(1);
expect(state.runs[0].sessionIds).toHaveLength(2);
expect(state.runs[0].skillsCreated).toEqual([]);
expect(skillBodies).toEqual([]);
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
name: 'extracts a repeated project-specific workflow into a skill',
files: WORKSPACE_FILES,
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await seedSessions(config, [
{
sessionId: 'settings-docs-regen-1',
summary: 'Update settings docs after adding a config option',
timestampOffsetMinutes: 420,
userTurns: [
'When we add a new config option, we have to regenerate the settings docs in a specific order.',
'The sequence that worked was npm run predocs:settings, npm run schema:settings, then npm run docs:settings.',
'Do not hand-edit generated settings docs.',
'If predocs is skipped, the generated schema docs miss the new defaults.',
'Update the source first, then run that generation sequence.',
'After regenerating, verify the schema output and docs changed together.',
'We used this same sequence the last time we touched settings docs.',
'That ordered workflow passed and produced the expected generated files.',
'Please keep the exact command order because reversing it breaks the output.',
'Yes, the generated settings docs were correct after those three commands.',
],
},
{
sessionId: 'settings-docs-regen-2',
summary: 'Regenerate settings schema docs for another new setting',
timestampOffsetMinutes: 360,
userTurns: [
'We are touching another setting, so follow the same settings-doc regeneration workflow again.',
'Run npm run predocs:settings before npm run schema:settings and npm run docs:settings.',
'The project keeps generated settings docs in sync through those commands, not manual edits.',
'Skipping predocs caused stale defaults in the generated output before.',
'Change the source, then execute the same three commands in order.',
'Verify both the schema artifact and docs update together after regeneration.',
'This is the recurring workflow we use whenever a setting changes.',
'The exact order worked again on this second settings update.',
'Please preserve that ordering constraint for future settings changes.',
'Confirmed: the settings docs regenerated correctly with the same command sequence.',
],
},
]);
},
assert: async (config) => {
const { state, skillsDir } = await runExtractionAndReadState(config);
const skillBodies = await readSkillBodies(skillsDir);
const combinedSkills = skillBodies.join('\n\n');
expect(state.runs).toHaveLength(1);
expect(state.runs[0].sessionIds).toHaveLength(2);
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
expect(combinedSkills).toContain('npm run predocs:settings');
expect(combinedSkills).toContain('npm run schema:settings');
expect(combinedSkills).toContain('npm run docs:settings');
expect(combinedSkills).toMatch(/When to Use/i);
expect(combinedSkills).toMatch(/Verification/i);
},
});
componentEvalTest('USUALLY_PASSES', {
suiteName: 'skill-extraction',
suiteType: 'component-level',
name: 'extracts a repeated multi-step migration workflow with ordering constraints',
files: WORKSPACE_FILES,
timeout: 180000,
configOverrides: EXTRACTION_CONFIG_OVERRIDES,
setup: async (config) => {
await seedSessions(config, [
{
sessionId: 'db-migration-v12',
summary: 'Run database migration for v12 schema update',
timestampOffsetMinutes: 420,
userTurns: [
'Every time we change the database schema we follow a specific migration workflow.',
'First run npm run db:check to verify no pending migrations conflict.',
'Then run npm run db:migrate to apply the new migration files.',
'After migration, always run npm run db:validate to confirm schema integrity.',
'If db:validate fails, immediately run npm run db:rollback before anything else.',
'Never skip db:check — last time we did, two migrations collided and corrupted the index.',
'The ordering is critical: check, migrate, validate. Reversing migrate and validate caused silent data loss before.',
'This v12 migration passed after following that exact sequence.',
'We use this same three-step workflow every time the schema changes.',
'Confirmed: db:check, db:migrate, db:validate completed successfully for v12.',
],
},
{
sessionId: 'db-migration-v13',
summary: 'Run database migration for v13 schema update',
timestampOffsetMinutes: 360,
userTurns: [
'New schema change for v13, following the same database migration workflow as before.',
'Start with npm run db:check to ensure no conflicting pending migrations.',
'Then npm run db:migrate to apply the v13 migration files.',
'Then npm run db:validate to confirm the schema is consistent.',
'If validation fails, run npm run db:rollback immediately — do not attempt manual fixes.',
'We learned the hard way that skipping db:check causes index corruption.',
'The check-migrate-validate order is mandatory for every schema change.',
'This is the same recurring workflow we used for v12 and earlier migrations.',
'The v13 migration passed with the same three-step sequence.',
'Confirmed: the standard db migration workflow succeeded again for v13.',
],
},
]);
},
assert: async (config) => {
const { state, skillsDir } = await runExtractionAndReadState(config);
const skillBodies = await readSkillBodies(skillsDir);
const combinedSkills = skillBodies.join('\n\n');
expect(state.runs).toHaveLength(1);
expect(state.runs[0].sessionIds).toHaveLength(2);
expect(state.runs[0].skillsCreated.length).toBeGreaterThanOrEqual(1);
expect(skillBodies.length).toBeGreaterThanOrEqual(1);
expect(combinedSkills).toContain('npm run db:check');
expect(combinedSkills).toContain('npm run db:migrate');
expect(combinedSkills).toContain('npm run db:validate');
expect(combinedSkills).toMatch(/rollback/i);
expect(combinedSkills).toMatch(/When to Use/i);
},
});
});

View File

@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { SkillExtractionAgent } from './skill-extraction-agent.js';
import {
EDIT_TOOL_NAME,
GLOB_TOOL_NAME,
GREP_TOOL_NAME,
LS_TOOL_NAME,
READ_FILE_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
} from '../tools/tool-names.js';
import { PREVIEW_GEMINI_FLASH_MODEL } from '../config/models.js';
describe('SkillExtractionAgent', () => {
const skillsDir = '/tmp/skills';
const sessionIndex =
'[NEW] Debug login flow (12 user msgs) — /tmp/chats/session-1.json';
const existingSkillsSummary =
'## Workspace Skills (.gemini/skills — do NOT duplicate)\n- **existing-skill**: Existing description';
const agent = SkillExtractionAgent(
skillsDir,
sessionIndex,
existingSkillsSummary,
);
it('should expose expected metadata, model, and tools', () => {
expect(agent.kind).toBe('local');
expect(agent.name).toBe('confucius');
expect(agent.displayName).toBe('Skill Extractor');
expect(agent.modelConfig.model).toBe(PREVIEW_GEMINI_FLASH_MODEL);
expect(agent.toolConfig?.tools).toEqual(
expect.arrayContaining([
READ_FILE_TOOL_NAME,
WRITE_FILE_TOOL_NAME,
EDIT_TOOL_NAME,
LS_TOOL_NAME,
GLOB_TOOL_NAME,
GREP_TOOL_NAME,
]),
);
});
it('should default to no skill unless recurrence and durability are proven', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain('Default to NO SKILL.');
expect(prompt).toContain(
'strong evidence this will recur for future agents in this repo/workflow',
);
expect(prompt).toContain('broader than a single incident');
expect(prompt).toContain('A skill MUST meet ALL of these criteria:');
expect(prompt).toContain(
'Future agents in this repo/workflow are likely to need it',
);
});
it('should explicitly reject one-off incidents and single-session preferences', () => {
const prompt = agent.promptConfig.systemPrompt;
expect(prompt).toContain('Single-session preferences');
expect(prompt).toContain('One-off incidents');
expect(prompt).toContain('Output-style preferences');
expect(prompt).toContain('cannot survive renaming the specific');
});
it('should warn that session summaries are user-intent summaries, not workflow evidence', () => {
const query = agent.promptConfig.query ?? '';
expect(query).toContain(existingSkillsSummary);
expect(query).toContain(sessionIndex);
expect(query).toContain(
'The summary is a user-intent summary, not a workflow summary.',
);
expect(query).toContain(
'The session summaries describe user intent, not workflow details.',
);
expect(query).toContain(
'Only write a skill if the evidence shows a durable, recurring workflow',
);
expect(query).toContain(
'If recurrence or future reuse is unclear, create no skill and explain why.',
);
});
});

View File

@@ -36,7 +36,7 @@ function buildSystemPrompt(skillsDir: string): string {
'- solve similar tasks with fewer tool calls and fewer reasoning tokens',
'- reuse proven workflows and verification checklists',
'- avoid known failure modes and landmines',
'- anticipate user preferences without being reminded',
'- capture durable workflow constraints that future agents are likely to encounter again',
'',
'============================================================',
'SAFETY AND HYGIENE (STRICT)',
@@ -59,6 +59,10 @@ function buildSystemPrompt(skillsDir: string): string {
'1. "Is this something a competent agent would NOT already know?" If no, STOP.',
'2. "Does an existing skill (listed below) already cover this?" If yes, STOP.',
'3. "Can I write a concrete, step-by-step procedure?" If no, STOP.',
'4. "Is there strong evidence this will recur for future agents in this repo/workflow?" If no, STOP.',
'5. "Is this broader than a single incident (one bug, one ticket, one branch, one date, one exact error)?" If no, STOP.',
'',
'Default to NO SKILL.',
'',
'Do NOT create skills for:',
'',
@@ -67,6 +71,10 @@ function buildSystemPrompt(skillsDir: string): string {
'- **Pure Q&A**: The user asked "how does X work?" and got an answer. No procedure.',
'- **Brainstorming/design**: Discussion of how to build something, without a validated',
' implementation that produced a reusable procedure.',
'- **Single-session preferences**: User-specific style/output preferences or workflow',
' preferences mentioned only once.',
'- **One-off incidents**: Debugging or incident response tied to a single bug, ticket,',
' branch, date, or exact error string.',
'- **Anything already covered by an existing skill** (global, workspace, builtin, or',
' previously extracted). Check the "Existing Skills" section carefully.',
'',
@@ -74,31 +82,40 @@ function buildSystemPrompt(skillsDir: string): string {
'WHAT COUNTS AS A SKILL',
'============================================================',
'',
'A skill MUST meet BOTH of these criteria:',
'A skill MUST meet ALL of these criteria:',
'',
'1. **Procedural and concrete**: It can be expressed as numbered steps with specific',
' commands, paths, or code patterns. If you can only write vague guidance, it is NOT',
' a skill. "Be careful with X" is advice, not a skill.',
'',
'2. **Non-obvious and project-specific**: A competent agent would NOT already know this.',
' It encodes project-specific knowledge, non-obvious ordering constraints, or',
' hard-won failure shields that cannot be inferred from the codebase alone.',
'2. **Durable and reusable**: Future agents in this repo/workflow are likely to need it',
' again. If it only solved one incident, it is NOT a skill.',
'',
'Confidence tiers (prefer higher tiers):',
'3. **Evidence-backed and project-specific**: It encodes project-specific knowledge,',
' repeated operational constraints, or hard-won failure shields supported by session',
' evidence. Do not assume something is non-obvious just because it sounds detailed.',
'',
'**High confidence** — create the skill:',
'- The same workflow appeared in multiple sessions (cross-session repetition)',
'- A multi-step procedure was validated (tests passed, user confirmed success)',
'Confidence tiers:',
'',
'**Medium confidence** — create the skill if it is clearly project-specific:',
'- A project-specific build/test/deploy/release procedure was established',
'- A non-obvious ordering constraint or prerequisite was discovered',
'- A failure mode was hit and a concrete fix was found and verified',
'**High confidence** — create the skill only when recurrence/durability is clear:',
'- The same workflow appeared in multiple sessions (cross-session repetition), OR it is',
' a stable recurring repo workflow (for example setup/build/test/deploy/release) with a',
' clear future trigger',
'- The workflow was validated (tests passed, user confirmed success, or the same fix',
' worked repeatedly)',
'- The skill can be named without referencing a specific incident, bug, branch, or date',
'',
'**Medium confidence** — usually do NOT create the skill yet:',
'- A project-specific procedure appeared once and seems useful, but recurrence is not yet',
' clear',
'- A verified fix exists, but it is still tied to one incident',
'- A user correction changed the approach once, but durability is uncertain',
'',
'**Low confidence** — do NOT create the skill:',
'- A one-off debugging session with no reusable procedure',
'- Generic workflows any agent could figure out from the codebase',
'- A code review or investigation with no durable takeaway',
'- Output-style preferences that do not materially change procedure',
'',
'Aim for 0-2 skills per run. Quality over quantity.',
'',
@@ -117,8 +134,10 @@ function buildSystemPrompt(skillsDir: string): string {
'',
'What to look for:',
'',
'- User corrections: "No, do it this way" -> preference signal',
'- User corrections that change procedure in a durable way, especially when repeated',
' across sessions',
'- Repeated patterns across sessions: same commands, same file paths, same workflow',
'- Stable recurring repo lifecycle workflows with clear future triggers',
'- Failed attempts followed by successful ones -> failure shield',
'- Multi-step procedures that were validated (tests passed, user confirmed)',
'- User interruptions: "Stop, you need to X first" -> ordering constraint',
@@ -129,6 +148,8 @@ function buildSystemPrompt(skillsDir: string): string {
'- Tool outputs that are just data (file contents, search results)',
'- Speculative plans that were never executed',
"- Temporary context (current branch name, today's date, specific error IDs)",
'- Similar session summaries without matching workflow evidence',
'- One-off artifact names: bug IDs, branch names, timestamps, exact incident strings',
'',
'============================================================',
'SKILL FORMAT',
@@ -214,7 +235,10 @@ function buildSystemPrompt(skillsDir: string): string {
'- Keep scopes distinct. Avoid overlapping "do-everything" skills.',
'- Every skill MUST have: triggers, procedure, at least one pitfall or verification step.',
'- If you cannot write a reliable procedure (too many unknowns), do NOT create the skill.',
'- Do not create skills for generic advice that any competent agent would already know.',
'- If the candidate is tied to one incident or cannot survive renaming the specific',
' bug/ticket, do NOT create it.',
'- Do not create skills for generic advice, output-style preferences, or ephemeral',
' choices that any competent agent would already know or adapt to on the fly.',
'- Prefer fewer, higher-quality skills. 0-2 skills per run is typical. 3+ is unusual.',
'',
'============================================================',
@@ -224,17 +248,23 @@ function buildSystemPrompt(skillsDir: string): string {
`1. Use list_directory on ${skillsDir} to see existing skills.`,
'2. If skills exist, read their SKILL.md files to understand what is already captured.',
'3. Scan the session index provided in the query. Look for [NEW] sessions whose summaries',
' suggest workflows that ALSO appear in other sessions (either [NEW] or [old]).',
'4. Apply the minimum signal gate. If no repeated patterns are visible, report that and finish.',
' hint at workflows that ALSO appear in other sessions (either [NEW] or [old]) or at a',
' stable recurring repo workflow. Remember: summary similarity alone is NOT enough.',
'4. Apply the minimum signal gate. If recurrence or durability is not visible, report that',
' no skill should be created and finish.',
'5. For promising patterns, use read_file on the session file paths to inspect the full',
' conversation. Confirm the workflow was actually repeated and validated.',
'6. For each confirmed skill, verify it meets ALL criteria (repeatable, procedural, high-leverage).',
' conversation. Confirm the workflow was actually repeated and validated. Read at least',
' two sessions unless the candidate is clearly a stable recurring repo lifecycle workflow.',
'6. For each candidate, verify it meets ALL criteria. Before writing, make sure you can',
' state: future trigger, evidence sessions, recurrence signal, validation signal, and',
' why it is not generic.',
'7. Write new SKILL.md files or update existing ones in your directory using write_file.',
' For skills that live OUTSIDE your directory, write a .patch file instead (see UPDATING EXISTING SKILLS).',
'8. Write COMPLETE files — never partially update a SKILL.md.',
'',
'IMPORTANT: Do NOT read every session. Only read sessions whose summaries suggest a',
'repeated pattern worth investigating. Most runs should read 0-3 sessions and create 0 skills.',
'repeated pattern or a stable recurring repo workflow worth investigating. Most runs',
'should read 0-3 sessions and create 0 skills.',
'Do not explore the codebase. Work only with the session index, session files, and the skills directory.',
].join('\n');
}
@@ -301,6 +331,9 @@ export const SkillExtractionAgent = (
'Below is an index of past conversation sessions. Each line shows:',
'[NEW] or [old] status, a 1-line summary, message count, and the file path.',
'',
'The summary is a user-intent summary, not a workflow summary.',
'Matching summary text alone is never enough evidence for a reusable skill.',
'',
'[NEW] = not yet processed for skill extraction (focus on these)',
'[old] = previously processed (read only if a [NEW] session hints at a repeated pattern)',
'',
@@ -319,7 +352,7 @@ export const SkillExtractionAgent = (
return {
systemPrompt: buildSystemPrompt(skillsDir),
query: `${initialContext}\n\nAnalyze the session index above. Read sessions that suggest repeated workflows using read_file. Extract reusable skills to ${skillsDir}/.`,
query: `${initialContext}\n\nAnalyze the session index above. The session summaries describe user intent, not workflow details. Read sessions that suggest repeated workflows using read_file. Only write a skill if the evidence shows a durable, recurring workflow or a stable recurring repo procedure. If recurrence or future reuse is unclear, create no skill and explain why.`,
};
},
runConfig: {