feat(skills): add ci skill for automated failure replication (#23720)

This commit is contained in:
matt korwel
2026-03-24 17:43:04 -07:00
committed by GitHub
parent 0552464eed
commit f74f2b0780
2 changed files with 290 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
---
name: ci
description:
A specialized skill for Gemini CLI that provides high-performance, fail-fast
monitoring of GitHub Actions workflows and automated local verification of CI
failures. It handles run discovery automatically—simply provide the branch name.
---
# CI Replicate & Status
This skill enables the agent to efficiently monitor GitHub Actions, triage
failures, and bridge remote CI errors to local development. It defaults to
**automatic replication** of failures to streamline the fix cycle.
## Core Capabilities
- **Automatic Replication**: Automatically monitors CI and immediately executes
suggested test or lint commands locally upon failure.
- **Real-time Monitoring**: Aggregated status line for all concurrent workflows
on the current branch.
- **Fail-Fast Triage**: Immediately stops on the first job failure to provide a
structured report.
## Workflow
### 1. CI Replicate (`replicate`) - DEFAULT
Use this as the primary path to monitor CI and **automatically** replicate
failures locally for immediate triage and fixing.
- **Behavior**: When this workflow is triggered, the agent will monitor the CI
and **immediately and automatically execute** all suggested test or lint
commands (marked with 🚀) as soon as a failure is detected.
- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch]`
- **Discovery**: The script **automatically** finds the latest active or recent
run for the branch. Do NOT manually search for run IDs.
- **Goal**: Reproduce the failure locally without manual intervention, then
proceed to analyze and fix the code.
### 1. CI Status (`status`)
Use this when you have pushed changes and need to monitor the CI and reproduce
any failures locally.
- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch] [run_id]`
- **Discovery**: The script **automatically** finds the latest active or recent
run for the branch. You should NOT manually search for \`run_id\` using \`gh run list\`
unless a specific historical run is requested. Simply provide the branch name.
- **Step 1 (Monitor)**: Execute the tool with the branch name.
- **Step 2 (Extract)**: Extract suggested \`npm test\` or \`npm run lint\` commands
from the output (marked with 🚀).
- **Step 3 (Reproduce)**: Execute those commands locally to confirm the failure.
- **Behavior**: It will poll every 15 seconds. If it detects a failure, it will
exit with a structured report and provide the exact commands to run locally.
## Failure Categories & Actions
- **Test Failures**: Agent should run the specific `npm test -w <pkg> -- <path>`
command suggested.
- **Lint Errors**: Agent should run `npm run lint:all` or the specific package
lint command.
- **Build Errors**: Agent should check `tsc` output or build logs to resolve
compilation issues.
- **Job Errors**: Investigate `gh run view --job <job_id> --log` for
infrastructure or setup failures.
## Noise Filtering
The underlying scripts automatically filter noise (Git logs, NPM warnings, stack
trace overhead). The agent should focus on the "Structured Failure Report"
provided by the tool.

224
.gemini/skills/ci/scripts/ci.mjs Executable file
View File

@@ -0,0 +1,224 @@
#!/usr/bin/env node
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { execSync } from 'node:child_process';
const BRANCH = process.argv[2] || execSync('git branch --show-current').toString().trim();
const RUN_ID_OVERRIDE = process.argv[3];
let REPO;
try {
const remoteUrl = execSync('git remote get-url origin').toString().trim();
REPO = remoteUrl.replace(/.*github\.com[\/:]/, '').replace(/\.git$/, '').trim();
} catch (e) {
REPO = 'google-gemini/gemini-cli';
}
const FAILED_FILES = new Set();
function runGh(args) {
try {
return execSync(`gh ${args}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
} catch (e) {
return null;
}
}
function fetchFailuresViaApi(jobId) {
try {
const cmd = `gh api repos/${REPO}/actions/jobs/${jobId}/logs | grep -iE " FAIL |❌|ERROR|Lint failed|Build failed|Exception|failed with exit code"`;
return execSync(cmd, { stdio: ['ignore', 'pipe', 'ignore'], maxBuffer: 10 * 1024 * 1024 }).toString();
} catch (e) {
return "";
}
}
function isNoise(line) {
const lower = line.toLowerCase();
return (
lower.includes('* [new branch]') ||
lower.includes('npm warn') ||
lower.includes('fetching updates') ||
lower.includes('node:internal/errors') ||
lower.includes('at ') || // Stack traces
lower.includes('checkexecsyncerror') ||
lower.includes('node_modules')
);
}
function extractTestFile(failureText) {
const cleanLine = failureText.replace(/[|#\[\]()]/g, " ").replace(/<[^>]*>/g, " ").trim();
const fileMatch = cleanLine.match(/([\w\/._-]+\.test\.[jt]sx?)/);
if (fileMatch) return fileMatch[1];
return null;
}
function generateTestCommand(failedFilesMap) {
const workspaceToFiles = new Map();
for (const [file, info] of failedFilesMap.entries()) {
if (["Job Error", "Unknown File", "Build Error", "Lint Error"].includes(file)) continue;
let workspace = "@google/gemini-cli";
let relPath = file;
if (file.startsWith("packages/core/")) {
workspace = "@google/gemini-cli-core";
relPath = file.replace("packages/core/", "");
} else if (file.startsWith("packages/cli/")) {
workspace = "@google/gemini-cli";
relPath = file.replace("packages/cli/", "");
}
relPath = relPath.replace(/^.*packages\/[^\/]+\//, "");
if (!workspaceToFiles.has(workspace)) workspaceToFiles.set(workspace, new Set());
workspaceToFiles.get(workspace).add(relPath);
}
const commands = [];
for (const [workspace, files] of workspaceToFiles.entries()) {
commands.push(`npm test -w ${workspace} -- ${Array.from(files).join(" ")}`);
}
return commands.join(" && ");
}
async function monitor() {
let targetRunIds = [];
if (RUN_ID_OVERRIDE) {
targetRunIds = [RUN_ID_OVERRIDE];
} else {
// 1. Get runs directly associated with the branch
const runListOutput = runGh(`run list --branch "${BRANCH}" --limit 10 --json databaseId,status,workflowName,createdAt`);
if (runListOutput) {
const runs = JSON.parse(runListOutput);
const activeRuns = runs.filter(r => r.status !== 'completed');
if (activeRuns.length > 0) {
targetRunIds = activeRuns.map(r => r.databaseId);
} else if (runs.length > 0) {
const latestTime = new Date(runs[0].createdAt).getTime();
targetRunIds = runs.filter(r => (latestTime - new Date(r.createdAt).getTime()) < 60000).map(r => r.databaseId);
}
}
// 2. Get runs associated with commit statuses (handles chained/indirect runs)
try {
const headSha = execSync(`git rev-parse "${BRANCH}"`).toString().trim();
const statusOutput = runGh(`api repos/${REPO}/commits/${headSha}/status -q '.statuses[] | select(.target_url | contains("actions/runs/")) | .target_url'`);
if (statusOutput) {
const statusRunIds = statusOutput.split('\n').filter(Boolean).map(url => {
const match = url.match(/actions\/runs\/(\d+)/);
return match ? parseInt(match[1], 10) : null;
}).filter(Boolean);
for (const runId of statusRunIds) {
if (!targetRunIds.includes(runId)) {
targetRunIds.push(runId);
}
}
}
} catch (e) {
// Ignore if branch/SHA not found or API fails
}
if (targetRunIds.length > 0) {
const runNames = [];
for (const runId of targetRunIds) {
const runInfo = runGh(`run view "${runId}" --json workflowName`);
if (runInfo) {
runNames.push(JSON.parse(runInfo).workflowName);
}
}
console.log(`Monitoring workflows: ${[...new Set(runNames)].join(', ')}`);
}
}
if (targetRunIds.length === 0) {
console.log(`No runs found for branch ${BRANCH}.`);
process.exit(0);
}
while (true) {
let allPassed = 0, allFailed = 0, allRunning = 0, allQueued = 0, totalJobs = 0;
let anyRunInProgress = false;
const fileToTests = new Map();
let failuresFoundInLoop = false;
for (const runId of targetRunIds) {
const runOutput = runGh(`run view "${runId}" --json databaseId,status,conclusion,workflowName`);
if (!runOutput) continue;
const run = JSON.parse(runOutput);
if (run.status !== 'completed') anyRunInProgress = true;
const jobsOutput = runGh(`run view "${runId}" --json jobs`);
if (jobsOutput) {
const { jobs } = JSON.parse(jobsOutput);
totalJobs += jobs.length;
const failedJobs = jobs.filter(j => j.conclusion === 'failure');
if (failedJobs.length > 0) {
failuresFoundInLoop = true;
for (const job of failedJobs) {
const failures = fetchFailuresViaApi(job.databaseId);
if (failures.trim()) {
failures.split('\n').forEach(line => {
if (!line.trim() || isNoise(line)) return;
const file = extractTestFile(line);
const filePath = file || (line.toLowerCase().includes('lint') ? 'Lint Error' : (line.toLowerCase().includes('build') ? 'Build Error' : 'Unknown File'));
let testName = line;
if (line.includes(' > ')) {
testName = line.split(' > ').slice(1).join(' > ').trim();
}
if (!fileToTests.has(filePath)) fileToTests.set(filePath, new Set());
fileToTests.get(filePath).add(testName);
});
} else {
const step = job.steps?.find(s => s.conclusion === 'failure')?.name || 'unknown';
const category = step.toLowerCase().includes('lint') ? 'Lint Error' : (step.toLowerCase().includes('build') ? 'Build Error' : 'Job Error');
if (!fileToTests.has(category)) fileToTests.set(category, new Set());
fileToTests.get(category).add(`${job.name}: Failed at step "${step}"`);
}
}
}
for (const job of jobs) {
if (job.status === "in_progress") allRunning++;
else if (job.status === "queued") allQueued++;
else if (job.conclusion === "success") allPassed++;
else if (job.conclusion === "failure") allFailed++;
}
}
}
if (failuresFoundInLoop) {
console.log(`\n\n❌ Failures detected across ${allFailed} job(s). Stopping monitor...`);
console.log('\n--- Structured Failure Report (Noise Filtered) ---');
for (const [file, tests] of fileToTests.entries()) {
console.log(`\nCategory/File: ${file}`);
// Limit output per file if it's too large
const testsArr = Array.from(tests).map(t => t.length > 500 ? t.substring(0, 500) + "... [TRUNCATED]" : t);
testsArr.slice(0, 10).forEach(t => console.log(` - ${t}`));
if (testsArr.length > 10) console.log(` ... and ${testsArr.length - 10} more`);
}
const testCmd = generateTestCommand(fileToTests);
if (testCmd) {
console.log('\n🚀 Run this to verify fixes:');
console.log(testCmd);
} else if (Array.from(fileToTests.keys()).some(k => k.includes('Lint'))) {
console.log('\n🚀 Run this to verify lint fixes:\nnpm run lint:all');
}
console.log('---------------------------------');
process.exit(1);
}
const completed = allPassed + allFailed;
process.stdout.write(`\r⏳ Monitoring ${targetRunIds.length} runs... ${completed}/${totalJobs} jobs (${allPassed} passed, ${allFailed} failed, ${allRunning} running, ${allQueued} queued) `);
if (!anyRunInProgress) {
console.log('\n✅ All workflows passed!');
process.exit(0);
}
await new Promise(r => setTimeout(r, 15000));
}
}
monitor().catch(err => {
console.error('\nMonitor error:', err.message);
process.exit(1);
});