feat(skills): add ci skill for automated failure replication (#23720)

2026-05-13 13:22:35 -07:00 · 2026-03-24 17:43:04 -07:00
parent 0552464eed
commit f74f2b0780
2 changed files with 290 additions and 0 deletions
@@ -0,0 +1,66 @@
+---
+name: ci
+description:
+  A specialized skill for Gemini CLI that provides high-performance, fail-fast
+  monitoring of GitHub Actions workflows and automated local verification of CI
+  failures. It handles run discovery automatically—simply provide the branch name.
+---
+
+# CI Replicate & Status
+
+This skill enables the agent to efficiently monitor GitHub Actions, triage
+failures, and bridge remote CI errors to local development. It defaults to
+**automatic replication** of failures to streamline the fix cycle.
+
+## Core Capabilities
+
+- **Automatic Replication**: Automatically monitors CI and immediately executes 
+  suggested test or lint commands locally upon failure.
+- **Real-time Monitoring**: Aggregated status line for all concurrent workflows
+  on the current branch.
+- **Fail-Fast Triage**: Immediately stops on the first job failure to provide a
+  structured report.
+
+## Workflow
+
+### 1. CI Replicate (`replicate`) - DEFAULT
+Use this as the primary path to monitor CI and **automatically** replicate 
+failures locally for immediate triage and fixing.
+- **Behavior**: When this workflow is triggered, the agent will monitor the CI
+  and **immediately and automatically execute** all suggested test or lint
+  commands (marked with 🚀) as soon as a failure is detected. 
+- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch]`
+- **Discovery**: The script **automatically** finds the latest active or recent
+  run for the branch. Do NOT manually search for run IDs.
+- **Goal**: Reproduce the failure locally without manual intervention, then
+  proceed to analyze and fix the code.
+
+### 1. CI Status (`status`)
+Use this when you have pushed changes and need to monitor the CI and reproduce
+any failures locally.
+- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch] [run_id]`
+- **Discovery**: The script **automatically** finds the latest active or recent
+  run for the branch. You should NOT manually search for \`run_id\` using \`gh run list\`
+  unless a specific historical run is requested. Simply provide the branch name.
+- **Step 1 (Monitor)**: Execute the tool with the branch name.
+- **Step 2 (Extract)**: Extract suggested \`npm test\` or \`npm run lint\` commands
+  from the output (marked with 🚀).
+- **Step 3 (Reproduce)**: Execute those commands locally to confirm the failure.
+- **Behavior**: It will poll every 15 seconds. If it detects a failure, it will
+  exit with a structured report and provide the exact commands to run locally.
+
+## Failure Categories & Actions
+
+- **Test Failures**: Agent should run the specific `npm test -w <pkg> -- <path>`
+  command suggested.
+- **Lint Errors**: Agent should run `npm run lint:all` or the specific package
+  lint command.
+- **Build Errors**: Agent should check `tsc` output or build logs to resolve
+  compilation issues.
+- **Job Errors**: Investigate `gh run view --job <job_id> --log` for
+  infrastructure or setup failures.
+
+## Noise Filtering
+The underlying scripts automatically filter noise (Git logs, NPM warnings, stack
+trace overhead). The agent should focus on the "Structured Failure Report"
+provided by the tool.
@@ -0,0 +1,224 @@
+#!/usr/bin/env node
+
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { execSync } from 'node:child_process';
+
+const BRANCH = process.argv[2] || execSync('git branch --show-current').toString().trim();
+const RUN_ID_OVERRIDE = process.argv[3];
+
+let REPO;
+try {
+  const remoteUrl = execSync('git remote get-url origin').toString().trim();
+  REPO = remoteUrl.replace(/.*github\.com[\/:]/, '').replace(/\.git$/, '').trim();
+} catch (e) {
+  REPO = 'google-gemini/gemini-cli';
+}
+
+const FAILED_FILES = new Set();
+
+function runGh(args) {
+  try {
+    return execSync(`gh ${args}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
+  } catch (e) {
+    return null;
+  }
+}
+
+function fetchFailuresViaApi(jobId) {
+  try {
+    const cmd = `gh api repos/${REPO}/actions/jobs/${jobId}/logs | grep -iE " FAIL |❌|ERROR|Lint failed|Build failed|Exception|failed with exit code"`;
+    return execSync(cmd, { stdio: ['ignore', 'pipe', 'ignore'], maxBuffer: 10 * 1024 * 1024 }).toString();
+  } catch (e) {
+    return "";
+  }
+}
+
+function isNoise(line) {
+  const lower = line.toLowerCase();
+  return (
+    lower.includes('* [new branch]') ||
+    lower.includes('npm warn') ||
+    lower.includes('fetching updates') ||
+    lower.includes('node:internal/errors') ||
+    lower.includes('at ') || // Stack traces
+    lower.includes('checkexecsyncerror') ||
+    lower.includes('node_modules')
+  );
+}
+
+function extractTestFile(failureText) {
+  const cleanLine = failureText.replace(/[|#\[\]()]/g, " ").replace(/<[^>]*>/g, " ").trim();
+  const fileMatch = cleanLine.match(/([\w\/._-]+\.test\.[jt]sx?)/);
+  if (fileMatch) return fileMatch[1];
+  return null;
+}
+
+function generateTestCommand(failedFilesMap) {
+  const workspaceToFiles = new Map();
+  for (const [file, info] of failedFilesMap.entries()) {
+    if (["Job Error", "Unknown File", "Build Error", "Lint Error"].includes(file)) continue;
+    let workspace = "@google/gemini-cli";
+    let relPath = file;
+    if (file.startsWith("packages/core/")) {
+      workspace = "@google/gemini-cli-core";
+      relPath = file.replace("packages/core/", "");
+    } else if (file.startsWith("packages/cli/")) {
+      workspace = "@google/gemini-cli";
+      relPath = file.replace("packages/cli/", "");
+    }
+    relPath = relPath.replace(/^.*packages\/[^\/]+\//, "");
+    if (!workspaceToFiles.has(workspace)) workspaceToFiles.set(workspace, new Set());
+    workspaceToFiles.get(workspace).add(relPath);
+  }
+  const commands = [];
+  for (const [workspace, files] of workspaceToFiles.entries()) {
+    commands.push(`npm test -w ${workspace} -- ${Array.from(files).join(" ")}`);
+  }
+  return commands.join(" && ");
+}
+
+async function monitor() {
+  let targetRunIds = [];
+  if (RUN_ID_OVERRIDE) {
+    targetRunIds = [RUN_ID_OVERRIDE];
+  } else {
+    // 1. Get runs directly associated with the branch
+    const runListOutput = runGh(`run list --branch "${BRANCH}" --limit 10 --json databaseId,status,workflowName,createdAt`);
+    if (runListOutput) {
+      const runs = JSON.parse(runListOutput);
+      const activeRuns = runs.filter(r => r.status !== 'completed');
+      if (activeRuns.length > 0) {
+        targetRunIds = activeRuns.map(r => r.databaseId);
+      } else if (runs.length > 0) {
+        const latestTime = new Date(runs[0].createdAt).getTime();
+        targetRunIds = runs.filter(r => (latestTime - new Date(r.createdAt).getTime()) < 60000).map(r => r.databaseId);
+      }
+    }
+
+    // 2. Get runs associated with commit statuses (handles chained/indirect runs)
+    try {
+      const headSha = execSync(`git rev-parse "${BRANCH}"`).toString().trim();
+      const statusOutput = runGh(`api repos/${REPO}/commits/${headSha}/status -q '.statuses[] | select(.target_url | contains("actions/runs/")) | .target_url'`);
+      if (statusOutput) {
+        const statusRunIds = statusOutput.split('\n').filter(Boolean).map(url => {
+          const match = url.match(/actions\/runs\/(\d+)/);
+          return match ? parseInt(match[1], 10) : null;
+        }).filter(Boolean);
+        
+        for (const runId of statusRunIds) {
+          if (!targetRunIds.includes(runId)) {
+            targetRunIds.push(runId);
+          }
+        }
+      }
+    } catch (e) {
+      // Ignore if branch/SHA not found or API fails
+    }
+
+    if (targetRunIds.length > 0) {
+      const runNames = [];
+      for (const runId of targetRunIds) {
+        const runInfo = runGh(`run view "${runId}" --json workflowName`);
+        if (runInfo) {
+          runNames.push(JSON.parse(runInfo).workflowName);
+        }
+      }
+      console.log(`Monitoring workflows: ${[...new Set(runNames)].join(', ')}`);
+    }
+  }
+
+  if (targetRunIds.length === 0) {
+    console.log(`No runs found for branch ${BRANCH}.`);
+    process.exit(0);
+  }
+
+  while (true) {
+    let allPassed = 0, allFailed = 0, allRunning = 0, allQueued = 0, totalJobs = 0;
+    let anyRunInProgress = false;
+    const fileToTests = new Map();
+    let failuresFoundInLoop = false;
+
+    for (const runId of targetRunIds) {
+      const runOutput = runGh(`run view "${runId}" --json databaseId,status,conclusion,workflowName`);
+      if (!runOutput) continue;
+      const run = JSON.parse(runOutput);
+      if (run.status !== 'completed') anyRunInProgress = true;
+
+      const jobsOutput = runGh(`run view "${runId}" --json jobs`);
+      if (jobsOutput) {
+        const { jobs } = JSON.parse(jobsOutput);
+        totalJobs += jobs.length;
+        const failedJobs = jobs.filter(j => j.conclusion === 'failure');
+        if (failedJobs.length > 0) {
+          failuresFoundInLoop = true;
+          for (const job of failedJobs) {
+            const failures = fetchFailuresViaApi(job.databaseId);
+            if (failures.trim()) {
+              failures.split('\n').forEach(line => {
+                if (!line.trim() || isNoise(line)) return;
+                const file = extractTestFile(line);
+                const filePath = file || (line.toLowerCase().includes('lint') ? 'Lint Error' : (line.toLowerCase().includes('build') ? 'Build Error' : 'Unknown File'));
+                let testName = line;
+                if (line.includes(' > ')) {
+                   testName = line.split(' > ').slice(1).join(' > ').trim();
+                }
+                if (!fileToTests.has(filePath)) fileToTests.set(filePath, new Set());
+                fileToTests.get(filePath).add(testName);
+              });
+            } else {
+              const step = job.steps?.find(s => s.conclusion === 'failure')?.name || 'unknown';
+              const category = step.toLowerCase().includes('lint') ? 'Lint Error' : (step.toLowerCase().includes('build') ? 'Build Error' : 'Job Error');
+              if (!fileToTests.has(category)) fileToTests.set(category, new Set());
+              fileToTests.get(category).add(`${job.name}: Failed at step "${step}"`);
+            }
+          }
+        }
+        for (const job of jobs) {
+          if (job.status === "in_progress") allRunning++;
+          else if (job.status === "queued") allQueued++;
+          else if (job.conclusion === "success") allPassed++;
+          else if (job.conclusion === "failure") allFailed++;
+        }
+      }
+    }
+
+    if (failuresFoundInLoop) {
+      console.log(`\n\n❌ Failures detected across ${allFailed} job(s). Stopping monitor...`);
+      console.log('\n--- Structured Failure Report (Noise Filtered) ---');
+      for (const [file, tests] of fileToTests.entries()) {
+        console.log(`\nCategory/File: ${file}`);
+        // Limit output per file if it's too large
+        const testsArr = Array.from(tests).map(t => t.length > 500 ? t.substring(0, 500) + "... [TRUNCATED]" : t);
+        testsArr.slice(0, 10).forEach(t => console.log(`  - ${t}`));
+        if (testsArr.length > 10) console.log(`  ... and ${testsArr.length - 10} more`);
+      }
+      const testCmd = generateTestCommand(fileToTests);
+      if (testCmd) {
+        console.log('\n🚀 Run this to verify fixes:');
+        console.log(testCmd);
+      } else if (Array.from(fileToTests.keys()).some(k => k.includes('Lint'))) {
+         console.log('\n🚀 Run this to verify lint fixes:\nnpm run lint:all');
+      }
+      console.log('---------------------------------');
+      process.exit(1);
+    }
+
+    const completed = allPassed + allFailed;
+    process.stdout.write(`\r⏳ Monitoring ${targetRunIds.length} runs... ${completed}/${totalJobs} jobs (${allPassed} passed, ${allFailed} failed, ${allRunning} running, ${allQueued} queued)          `);
+    if (!anyRunInProgress) {
+      console.log('\n✅ All workflows passed!');
+      process.exit(0);
+    }
+    await new Promise(r => setTimeout(r, 15000));
+  }
+}
+
+monitor().catch(err => {
+  console.error('\nMonitor error:', err.message);
+  process.exit(1);
+});