feat(skills): add ci skill for automated failure replication (#23720)

2026-05-12 12:54:07 -07:00 · 2026-03-24 17:43:04 -07:00
parent 0552464eed
commit f74f2b0780
2 changed files with 290 additions and 0 deletions
@@ -0,0 +1,66 @@
 ---
 name: ci
 description:
  A specialized skill for Gemini CLI that provides high-performance, fail-fast
  monitoring of GitHub Actions workflows and automated local verification of CI
  failures. It handles run discovery automatically—simply provide the branch name.
 ---
 # CI Replicate & Status
 This skill enables the agent to efficiently monitor GitHub Actions, triage
 failures, and bridge remote CI errors to local development. It defaults to
 **automatic replication** of failures to streamline the fix cycle.
 ## Core Capabilities
 - **Automatic Replication**: Automatically monitors CI and immediately executes 
  suggested test or lint commands locally upon failure.
 - **Real-time Monitoring**: Aggregated status line for all concurrent workflows
  on the current branch.
 - **Fail-Fast Triage**: Immediately stops on the first job failure to provide a
  structured report.
 ## Workflow
 ### 1. CI Replicate (`replicate`) - DEFAULT
 Use this as the primary path to monitor CI and **automatically** replicate 
 failures locally for immediate triage and fixing.
 - **Behavior**: When this workflow is triggered, the agent will monitor the CI
  and **immediately and automatically execute** all suggested test or lint
  commands (marked with 🚀) as soon as a failure is detected. 
 - **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch]`
 - **Discovery**: The script **automatically** finds the latest active or recent
  run for the branch. Do NOT manually search for run IDs.
 - **Goal**: Reproduce the failure locally without manual intervention, then
  proceed to analyze and fix the code.
 ### 1. CI Status (`status`)
 Use this when you have pushed changes and need to monitor the CI and reproduce
 any failures locally.
 - **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch] [run_id]`
 - **Discovery**: The script **automatically** finds the latest active or recent
  run for the branch. You should NOT manually search for \`run_id\` using \`gh run list\`
  unless a specific historical run is requested. Simply provide the branch name.
 - **Step 1 (Monitor)**: Execute the tool with the branch name.
 - **Step 2 (Extract)**: Extract suggested \`npm test\` or \`npm run lint\` commands
  from the output (marked with 🚀).
 - **Step 3 (Reproduce)**: Execute those commands locally to confirm the failure.
 - **Behavior**: It will poll every 15 seconds. If it detects a failure, it will
  exit with a structured report and provide the exact commands to run locally.
 ## Failure Categories & Actions
 - **Test Failures**: Agent should run the specific `npm test -w <pkg> -- <path>`
  command suggested.
 - **Lint Errors**: Agent should run `npm run lint:all` or the specific package
  lint command.
 - **Build Errors**: Agent should check `tsc` output or build logs to resolve
  compilation issues.
 - **Job Errors**: Investigate `gh run view --job <job_id> --log` for
  infrastructure or setup failures.
 ## Noise Filtering
 The underlying scripts automatically filter noise (Git logs, NPM warnings, stack
 trace overhead). The agent should focus on the "Structured Failure Report"
 provided by the tool.
@@ -0,0 +1,224 @@
 #!/usr/bin/env node
 /**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */
 import { execSync } from 'node:child_process';
 const BRANCH = process.argv[2] || execSync('git branch --show-current').toString().trim();
 const RUN_ID_OVERRIDE = process.argv[3];
 let REPO;
 try {
  const remoteUrl = execSync('git remote get-url origin').toString().trim();
  REPO = remoteUrl.replace(/.*github\.com[\/:]/, '').replace(/\.git$/, '').trim();
 } catch (e) {
  REPO = 'google-gemini/gemini-cli';
 }
 const FAILED_FILES = new Set();
 function runGh(args) {
  try {
    return execSync(`gh ${args}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
  } catch (e) {
    return null;
  }
 }
 function fetchFailuresViaApi(jobId) {
  try {
    const cmd = `gh api repos/${REPO}/actions/jobs/${jobId}/logs | grep -iE " FAIL |❌|ERROR|Lint failed|Build failed|Exception|failed with exit code"`;
    return execSync(cmd, { stdio: ['ignore', 'pipe', 'ignore'], maxBuffer: 10 * 1024 * 1024 }).toString();
  } catch (e) {
    return "";
  }
 }
 function isNoise(line) {
  const lower = line.toLowerCase();
  return (
    lower.includes('* [new branch]') ||
    lower.includes('npm warn') ||
    lower.includes('fetching updates') ||
    lower.includes('node:internal/errors') ||
    lower.includes('at ') || // Stack traces
    lower.includes('checkexecsyncerror') ||
    lower.includes('node_modules')
  );
 }
 function extractTestFile(failureText) {
  const cleanLine = failureText.replace(/[|#\[\]()]/g, " ").replace(/<[^>]*>/g, " ").trim();
  const fileMatch = cleanLine.match(/([\w\/._-]+\.test\.[jt]sx?)/);
  if (fileMatch) return fileMatch[1];
  return null;
 }
 function generateTestCommand(failedFilesMap) {
  const workspaceToFiles = new Map();
  for (const [file, info] of failedFilesMap.entries()) {
    if (["Job Error", "Unknown File", "Build Error", "Lint Error"].includes(file)) continue;
    let workspace = "@google/gemini-cli";
    let relPath = file;
    if (file.startsWith("packages/core/")) {
      workspace = "@google/gemini-cli-core";
      relPath = file.replace("packages/core/", "");
    } else if (file.startsWith("packages/cli/")) {
      workspace = "@google/gemini-cli";
      relPath = file.replace("packages/cli/", "");
    }
    relPath = relPath.replace(/^.*packages\/[^\/]+\//, "");
    if (!workspaceToFiles.has(workspace)) workspaceToFiles.set(workspace, new Set());
    workspaceToFiles.get(workspace).add(relPath);
  }
  const commands = [];
  for (const [workspace, files] of workspaceToFiles.entries()) {
    commands.push(`npm test -w ${workspace} -- ${Array.from(files).join(" ")}`);
  }
  return commands.join(" && ");
 }
 async function monitor() {
  let targetRunIds = [];
  if (RUN_ID_OVERRIDE) {
    targetRunIds = [RUN_ID_OVERRIDE];
  } else {
    // 1. Get runs directly associated with the branch
    const runListOutput = runGh(`run list --branch "${BRANCH}" --limit 10 --json databaseId,status,workflowName,createdAt`);
    if (runListOutput) {
      const runs = JSON.parse(runListOutput);
      const activeRuns = runs.filter(r => r.status !== 'completed');
      if (activeRuns.length > 0) {
        targetRunIds = activeRuns.map(r => r.databaseId);
      } else if (runs.length > 0) {
        const latestTime = new Date(runs[0].createdAt).getTime();
        targetRunIds = runs.filter(r => (latestTime - new Date(r.createdAt).getTime()) < 60000).map(r => r.databaseId);
      }
    }
    // 2. Get runs associated with commit statuses (handles chained/indirect runs)
    try {
      const headSha = execSync(`git rev-parse "${BRANCH}"`).toString().trim();
      const statusOutput = runGh(`api repos/${REPO}/commits/${headSha}/status -q '.statuses[] | select(.target_url | contains("actions/runs/")) | .target_url'`);
      if (statusOutput) {
        const statusRunIds = statusOutput.split('\n').filter(Boolean).map(url => {
          const match = url.match(/actions\/runs\/(\d+)/);
          return match ? parseInt(match[1], 10) : null;
        }).filter(Boolean);
        for (const runId of statusRunIds) {
          if (!targetRunIds.includes(runId)) {
            targetRunIds.push(runId);
          }
        }
      }
    } catch (e) {
      // Ignore if branch/SHA not found or API fails
    }
    if (targetRunIds.length > 0) {
      const runNames = [];
      for (const runId of targetRunIds) {
        const runInfo = runGh(`run view "${runId}" --json workflowName`);
        if (runInfo) {
          runNames.push(JSON.parse(runInfo).workflowName);
        }
      }
      console.log(`Monitoring workflows: ${[...new Set(runNames)].join(', ')}`);
    }
  }
  if (targetRunIds.length === 0) {
    console.log(`No runs found for branch ${BRANCH}.`);
    process.exit(0);
  }
  while (true) {
    let allPassed = 0, allFailed = 0, allRunning = 0, allQueued = 0, totalJobs = 0;
    let anyRunInProgress = false;
    const fileToTests = new Map();
    let failuresFoundInLoop = false;
    for (const runId of targetRunIds) {
      const runOutput = runGh(`run view "${runId}" --json databaseId,status,conclusion,workflowName`);
      if (!runOutput) continue;
      const run = JSON.parse(runOutput);
      if (run.status !== 'completed') anyRunInProgress = true;
      const jobsOutput = runGh(`run view "${runId}" --json jobs`);
      if (jobsOutput) {
        const { jobs } = JSON.parse(jobsOutput);
        totalJobs += jobs.length;
        const failedJobs = jobs.filter(j => j.conclusion === 'failure');
        if (failedJobs.length > 0) {
          failuresFoundInLoop = true;
          for (const job of failedJobs) {
            const failures = fetchFailuresViaApi(job.databaseId);
            if (failures.trim()) {
              failures.split('\n').forEach(line => {
                if (!line.trim() || isNoise(line)) return;
                const file = extractTestFile(line);
                const filePath = file || (line.toLowerCase().includes('lint') ? 'Lint Error' : (line.toLowerCase().includes('build') ? 'Build Error' : 'Unknown File'));
                let testName = line;
                if (line.includes(' > ')) {
                   testName = line.split(' > ').slice(1).join(' > ').trim();
                }
                if (!fileToTests.has(filePath)) fileToTests.set(filePath, new Set());
                fileToTests.get(filePath).add(testName);
              });
            } else {
              const step = job.steps?.find(s => s.conclusion === 'failure')?.name || 'unknown';
              const category = step.toLowerCase().includes('lint') ? 'Lint Error' : (step.toLowerCase().includes('build') ? 'Build Error' : 'Job Error');
              if (!fileToTests.has(category)) fileToTests.set(category, new Set());
              fileToTests.get(category).add(`${job.name}: Failed at step "${step}"`);
            }
          }
        }
        for (const job of jobs) {
          if (job.status === "in_progress") allRunning++;
          else if (job.status === "queued") allQueued++;
          else if (job.conclusion === "success") allPassed++;
          else if (job.conclusion === "failure") allFailed++;
        }
      }
    }
    if (failuresFoundInLoop) {
      console.log(`\n\n❌ Failures detected across ${allFailed} job(s). Stopping monitor...`);
      console.log('\n--- Structured Failure Report (Noise Filtered) ---');
      for (const [file, tests] of fileToTests.entries()) {
        console.log(`\nCategory/File: ${file}`);
        // Limit output per file if it's too large
        const testsArr = Array.from(tests).map(t => t.length > 500 ? t.substring(0, 500) + "... [TRUNCATED]" : t);
        testsArr.slice(0, 10).forEach(t => console.log(`  - ${t}`));
        if (testsArr.length > 10) console.log(`  ... and ${testsArr.length - 10} more`);
      }
      const testCmd = generateTestCommand(fileToTests);
      if (testCmd) {
        console.log('\n🚀 Run this to verify fixes:');
        console.log(testCmd);
      } else if (Array.from(fileToTests.keys()).some(k => k.includes('Lint'))) {
         console.log('\n🚀 Run this to verify lint fixes:\nnpm run lint:all');
      }
      console.log('---------------------------------');
      process.exit(1);
    }
    const completed = allPassed + allFailed;
    process.stdout.write(`\r⏳ Monitoring ${targetRunIds.length} runs... ${completed}/${totalJobs} jobs (${allPassed} passed, ${allFailed} failed, ${allRunning} running, ${allQueued} queued)          `);
    if (!anyRunInProgress) {
      console.log('\n✅ All workflows passed!');
      process.exit(0);
    }
    await new Promise(r => setTimeout(r, 15000));
  }
 }
 monitor().catch(err => {
  console.error('\nMonitor error:', err.message);
  process.exit(1);
 });