From 295831105b72941c85217588e8b646237f2b1490 Mon Sep 17 00:00:00 2001 From: mkorwel Date: Fri, 13 Mar 2026 19:39:41 -0700 Subject: [PATCH] feat: add 'review-pr' skill and sunset 'async-pr-review' --- .gemini/skills/async-pr-review/SKILL.md | 45 ---- .gemini/skills/async-pr-review/policy.toml | 148 ----------- .../async-pr-review/scripts/async-review.sh | 241 ------------------ .../scripts/check-async-review.sh | 65 ----- .gemini/skills/offload/README.md | 13 + .gemini/skills/offload/policy.toml | 25 +- .../offload/scripts/playbooks/review.ts | 3 +- .gemini/skills/review-pr/SKILL.md | 59 +++++ .../review-pr/scripts/extract-failures.ts | 59 +++++ 9 files changed, 156 insertions(+), 502 deletions(-) delete mode 100644 .gemini/skills/async-pr-review/SKILL.md delete mode 100644 .gemini/skills/async-pr-review/policy.toml delete mode 100755 .gemini/skills/async-pr-review/scripts/async-review.sh delete mode 100755 .gemini/skills/async-pr-review/scripts/check-async-review.sh create mode 100644 .gemini/skills/review-pr/SKILL.md create mode 100644 .gemini/skills/review-pr/scripts/extract-failures.ts diff --git a/.gemini/skills/async-pr-review/SKILL.md b/.gemini/skills/async-pr-review/SKILL.md deleted file mode 100644 index 74bc469b56..0000000000 --- a/.gemini/skills/async-pr-review/SKILL.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -name: async-pr-review -description: Trigger this skill when the user wants to start an asynchronous PR review, run background checks on a PR, or check the status of a previously started async PR review. ---- - -# Async PR Review - -This skill provides a set of tools to asynchronously review a Pull Request. It will create a background job to run the project's preflight checks, execute Gemini-powered test plans, and perform a comprehensive code review using custom prompts. - -This skill is designed to showcase an advanced "Agentic Asynchronous Pattern": -1. **Native Background Shells vs Headless Inference**: While Gemini CLI can natively spawn and detach background shell commands (using the `run_shell_command` tool with `is_background: true`), a standard bash background job cannot perform LLM inference. To conduct AI-driven code reviews and test generation in the background, the shell script *must* invoke the `gemini` executable headlessly using `-p`. This offloads the AI tasks to independent worker agents. -2. **Dynamic Git Scoping**: The review scripts avoid hardcoded paths. They use `git rev-parse --show-toplevel` to automatically resolve the root of the user's current project. -3. **Ephemeral Worktrees**: Instead of checking out branches in the user's main workspace, the skill provisions temporary git worktrees in `.gemini/tmp/async-reviews/pr-`. This prevents git lock conflicts and namespace pollution. -4. **Agentic Evaluation (`check-async-review.sh`)**: The check script outputs clean JSON/text statuses for the main agent to parse. The interactive agent itself synthesizes the final assessment dynamically from the generated log files. - -## Workflow - -1. **Determine Action**: Establish whether the user wants to start a new async review or check the status of an existing one. - * If the user says "start an async review for PR #123" or similar, proceed to **Start Review**. - * If the user says "check the status of my async review for PR #123" or similar, proceed to **Check Status**. - -### Start Review - -If the user wants to start a new async PR review: - -1. Ask the user for the PR number if they haven't provided it. -2. Execute the `async-review.sh` script, passing the PR number as the first argument. Be sure to run it with the `is_background` flag set to true to ensure it immediately detaches. - ```bash - .gemini/skills/async-pr-review/scripts/async-review.sh - ``` -3. Inform the user that the tasks have started successfully and they can check the status later. - -### Check Status - -If the user wants to check the status or view the final assessment of a previously started async review: - -1. Ask the user for the PR number if they haven't provided it. -2. Execute the `check-async-review.sh` script, passing the PR number as the first argument: - ```bash - .gemini/skills/async-pr-review/scripts/check-async-review.sh - ``` -3. **Evaluate Output**: Read the output from the script. - * If the output contains `STATUS: IN_PROGRESS`, tell the user which tasks are still running. - * If the output contains `STATUS: COMPLETE`, use your file reading tools (`read_file`) to retrieve the contents of `final-assessment.md`, `review.md`, `pr-diff.diff`, `npm-test.log`, and `test-execution.log` files from the `LOG_DIR` specified in the output. - * **Final Assessment**: Read those files, synthesize their results, and give the user a concise recommendation on whether the PR builds successfully, passes tests, and if you recommend they approve it based on the review. \ No newline at end of file diff --git a/.gemini/skills/async-pr-review/policy.toml b/.gemini/skills/async-pr-review/policy.toml deleted file mode 100644 index dd26fd772c..0000000000 --- a/.gemini/skills/async-pr-review/policy.toml +++ /dev/null @@ -1,148 +0,0 @@ -# --- CORE TOOLS --- -[[rule]] -toolName = "read_file" -decision = "allow" -priority = 100 - -[[rule]] -toolName = "write_file" -decision = "allow" -priority = 100 - -[[rule]] -toolName = "grep_search" -decision = "allow" -priority = 100 - -[[rule]] -toolName = "glob" -decision = "allow" -priority = 100 - -[[rule]] -toolName = "list_directory" -decision = "allow" -priority = 100 - -[[rule]] -toolName = "codebase_investigator" -decision = "allow" -priority = 100 - -# --- SHELL COMMANDS --- - -# Git (Safe/Read-only) -[[rule]] -toolName = "run_shell_command" -commandPrefix = [ - "git blame", - "git show", - "git grep", - "git show-ref", - "git ls-tree", - "git ls-remote", - "git reflog", - "git remote -v", - "git diff", - "git rev-list", - "git rev-parse", - "git merge-base", - "git cherry", - "git fetch", - "git status", - "git st", - "git branch", - "git br", - "git log", - "git --version" -] -decision = "allow" -priority = 100 - -# GitHub CLI (Read-only) -[[rule]] -toolName = "run_shell_command" -commandPrefix = [ - "gh workflow list", - "gh auth status", - "gh checkout view", - "gh run view", - "gh run job view", - "gh run list", - "gh run --help", - "gh issue view", - "gh issue list", - "gh label list", - "gh pr diff", - "gh pr check", - "gh pr checks", - "gh pr view", - "gh pr list", - "gh pr status", - "gh repo view", - "gh job view", - "gh api", - "gh log" -] -decision = "allow" -priority = 100 - -# Node.js/NPM (Generic Tests, Checks, and Build) -[[rule]] -toolName = "run_shell_command" -commandPrefix = [ - "npm run start", - "npm install", - "npm run", - "npm test", - "npm ci", - "npm list", - "npm --version" -] -decision = "allow" -priority = 100 - -# Core Utilities (Safe) -[[rule]] -toolName = "run_shell_command" -commandPrefix = [ - "sleep", - "env", - "break", - "xargs", - "base64", - "uniq", - "sort", - "echo", - "which", - "ls", - "find", - "tail", - "head", - "cat", - "cd", - "grep", - "ps", - "pwd", - "wc", - "file", - "stat", - "diff", - "lsof", - "date", - "whoami", - "uname", - "du", - "cut", - "true", - "false", - "readlink", - "awk", - "jq", - "rg", - "less", - "more", - "tree" -] -decision = "allow" -priority = 100 diff --git a/.gemini/skills/async-pr-review/scripts/async-review.sh b/.gemini/skills/async-pr-review/scripts/async-review.sh deleted file mode 100755 index d408c5f2f1..0000000000 --- a/.gemini/skills/async-pr-review/scripts/async-review.sh +++ /dev/null @@ -1,241 +0,0 @@ -#!/bin/bash - -notify() { - local title="$1" - local message="$2" - local pr="$3" - # Terminal escape sequence - printf "\e]9;%s | PR #%s | %s\a" "$title" "$pr" "$message" - # Native macOS notification - if [[ "$(uname)" == "Darwin" ]]; then - osascript -e "display notification \"$message\" with title \"$title\" subtitle \"PR #$pr\"" - fi -} - -pr_number=$1 -if [[ -z "$pr_number" ]]; then - echo "Usage: async-review " - exit 1 -fi - -base_dir=$(git rev-parse --show-toplevel 2>/dev/null) -if [[ -z "$base_dir" ]]; then - echo "โŒ Must be run from within a git repository." - exit 1 -fi - -# Use the repository's local .gemini/tmp directory for ephemeral worktrees and logs -pr_dir="$base_dir/.gemini/tmp/async-reviews/pr-$pr_number" -target_dir="$pr_dir/worktree" -log_dir="$pr_dir/logs" - -cd "$base_dir" || exit 1 - -mkdir -p "$log_dir" -rm -f "$log_dir/setup.exit" "$log_dir/final-assessment.exit" "$log_dir/final-assessment.md" - -echo "๐Ÿงน Cleaning up previous worktree if it exists..." | tee -a "$log_dir/setup.log" -git worktree remove -f "$target_dir" >> "$log_dir/setup.log" 2>&1 || true -git branch -D "gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1 || true -git worktree prune >> "$log_dir/setup.log" 2>&1 || true - -echo "๐Ÿ“ก Fetching PR #$pr_number..." | tee -a "$log_dir/setup.log" -if ! git fetch origin -f "pull/$pr_number/head:gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1; then - echo 1 > "$log_dir/setup.exit" - echo "โŒ Fetch failed. Check $log_dir/setup.log" - notify "Async Review Failed" "Fetch failed." "$pr_number" - exit 1 -fi - -if [[ ! -d "$target_dir" ]]; then - echo "๐Ÿงน Pruning missing worktrees..." | tee -a "$log_dir/setup.log" - git worktree prune >> "$log_dir/setup.log" 2>&1 - echo "๐ŸŒฟ Creating worktree in $target_dir..." | tee -a "$log_dir/setup.log" - if ! git worktree add "$target_dir" "gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1; then - echo 1 > "$log_dir/setup.exit" - echo "โŒ Worktree creation failed. Check $log_dir/setup.log" - notify "Async Review Failed" "Worktree creation failed." "$pr_number" - exit 1 - fi -else - echo "๐ŸŒฟ Worktree already exists." | tee -a "$log_dir/setup.log" -fi -echo 0 > "$log_dir/setup.exit" - -cd "$target_dir" || exit 1 - -echo "๐Ÿš€ Launching background tasks. Logs saving to: $log_dir" - -echo " โ†ณ [1/5] Grabbing PR diff..." -rm -f "$log_dir/pr-diff.exit" -{ gh pr diff "$pr_number" > "$log_dir/pr-diff.diff" 2>&1; echo $? > "$log_dir/pr-diff.exit"; } & - -echo " โ†ณ [2/5] Starting build and lint..." -rm -f "$log_dir/build-and-lint.exit" -{ { npm run clean && npm ci && npm run format && npm run build && npm run lint:ci && npm run typecheck; } > "$log_dir/build-and-lint.log" 2>&1; echo $? > "$log_dir/build-and-lint.exit"; } & - -# Dynamically resolve gemini binary (fallback to your nightly path) -GEMINI_CMD=$(which gemini || echo "$HOME/.gcli/nightly/node_modules/.bin/gemini") -POLICY_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/policy.toml" - -echo " โ†ณ [3/5] Starting Gemini code review..." -rm -f "$log_dir/review.exit" -{ "$GEMINI_CMD" --policy "$POLICY_PATH" -p "/review-frontend $pr_number" > "$log_dir/review.md" 2>&1; echo $? > "$log_dir/review.exit"; } & - -echo " โ†ณ [4/5] Starting automated tests (waiting for build and lint)..." -rm -f "$log_dir/npm-test.exit" -{ - while [ ! -f "$log_dir/build-and-lint.exit" ]; do sleep 1; done - if [ "$(cat "$log_dir/build-and-lint.exit")" == "0" ]; then - gh pr checks "$pr_number" > "$log_dir/ci-checks.log" 2>&1 - ci_status=$? - - if [ "$ci_status" -eq 0 ]; then - echo "CI checks passed. Skipping local npm tests." > "$log_dir/npm-test.log" - echo 0 > "$log_dir/npm-test.exit" - elif [ "$ci_status" -eq 8 ]; then - echo "CI checks are still pending. Skipping local npm tests to avoid duplicate work. Please check GitHub for final results." > "$log_dir/npm-test.log" - echo 0 > "$log_dir/npm-test.exit" - else - echo "CI checks failed. Failing checks:" > "$log_dir/npm-test.log" - gh pr checks "$pr_number" --json name,bucket -q '.[] | select(.bucket=="fail") | .name' >> "$log_dir/npm-test.log" 2>&1 - - echo "Attempting to extract failing test files from CI logs..." >> "$log_dir/npm-test.log" - pr_branch=$(gh pr view "$pr_number" --json headRefName -q '.headRefName' 2>/dev/null) - run_id=$(gh run list --branch "$pr_branch" --workflow ci.yml --json databaseId -q '.[0].databaseId' 2>/dev/null) - - failed_files="" - if [[ -n "$run_id" ]]; then - failed_files=$(gh run view "$run_id" --log-failed 2>/dev/null | grep -o -E '(packages/[a-zA-Z0-9_-]+|integration-tests|evals)/[a-zA-Z0-9_/-]+\.test\.ts(x)?' | sort | uniq) - fi - - if [[ -n "$failed_files" ]]; then - echo "Found failing test files from CI:" >> "$log_dir/npm-test.log" - for f in $failed_files; do echo " - $f" >> "$log_dir/npm-test.log"; done - echo "Running ONLY failing tests locally..." >> "$log_dir/npm-test.log" - - exit_code=0 - for file in $failed_files; do - if [[ "$file" == packages/* ]]; then - ws_dir=$(echo "$file" | cut -d'/' -f1,2) - else - ws_dir=$(echo "$file" | cut -d'/' -f1) - fi - rel_file=${file#$ws_dir/} - - echo "--- Running $rel_file in workspace $ws_dir ---" >> "$log_dir/npm-test.log" - if ! npm run test:ci -w "$ws_dir" -- "$rel_file" >> "$log_dir/npm-test.log" 2>&1; then - exit_code=1 - fi - done - echo $exit_code > "$log_dir/npm-test.exit" - else - echo "Could not extract specific failing files. Skipping full local test suite as it takes too long. Please check CI logs manually." >> "$log_dir/npm-test.log" - echo 1 > "$log_dir/npm-test.exit" - fi - fi - else - echo "Skipped due to build-and-lint failure" > "$log_dir/npm-test.log" - echo 1 > "$log_dir/npm-test.exit" - fi -} & - -echo " โ†ณ [5/5] Starting Gemini test execution (waiting for build and lint)..." -rm -f "$log_dir/test-execution.exit" -{ - while [ ! -f "$log_dir/build-and-lint.exit" ]; do sleep 1; done - if [ "$(cat "$log_dir/build-and-lint.exit")" == "0" ]; then - "$GEMINI_CMD" --policy "$POLICY_PATH" -p "Analyze the diff for PR $pr_number using 'gh pr diff $pr_number'. Instead of running the project's automated test suite (like 'npm test'), physically exercise the newly changed code in the terminal (e.g., by writing a temporary script to call the new functions, or testing the CLI command directly). Verify the feature's behavior works as expected. IMPORTANT: Do NOT modify any source code to fix errors. Just exercise the code and log the results, reporting any failures clearly. Do not ask for user confirmation." > "$log_dir/test-execution.log" 2>&1; echo $? > "$log_dir/test-execution.exit" - else - echo "Skipped due to build-and-lint failure" > "$log_dir/test-execution.log" - echo 1 > "$log_dir/test-execution.exit" - fi -} & - -echo "โœ… All tasks dispatched!" -echo "You can monitor progress with: tail -f $log_dir/*.log" -echo "Read your review later at: $log_dir/review.md" - -# Polling loop to wait for all background tasks to finish -tasks=("pr-diff" "build-and-lint" "review" "npm-test" "test-execution") -log_files=("pr-diff.diff" "build-and-lint.log" "review.md" "npm-test.log" "test-execution.log") - -declare -A task_done -for t in "${tasks[@]}"; do task_done[$t]=0; done - -all_done=0 -while [[ $all_done -eq 0 ]]; do - clear - echo "==================================================" - echo "๐Ÿš€ Async PR Review Status for PR #$pr_number" - echo "==================================================" - echo "" - - all_done=1 - for i in "${!tasks[@]}"; do - t="${tasks[$i]}" - - if [[ -f "$log_dir/$t.exit" ]]; then - exit_code=$(cat "$log_dir/$t.exit") - if [[ "$exit_code" == "0" ]]; then - echo " โœ… $t: SUCCESS" - else - echo " โŒ $t: FAILED (exit code $exit_code)" - fi - task_done[$t]=1 - else - echo " โณ $t: RUNNING" - all_done=0 - fi - done - - echo "" - echo "==================================================" - echo "๐Ÿ“ Live Logs (Last 5 lines of running tasks)" - echo "==================================================" - - for i in "${!tasks[@]}"; do - t="${tasks[$i]}" - log_file="${log_files[$i]}" - - if [[ ${task_done[$t]} -eq 0 ]]; then - if [[ -f "$log_dir/$log_file" ]]; then - echo "" - echo "--- $t ---" - tail -n 5 "$log_dir/$log_file" - fi - fi - done - - if [[ $all_done -eq 0 ]]; then - sleep 3 - fi -done - -clear -echo "==================================================" -echo "๐Ÿš€ Async PR Review Status for PR #$pr_number" -echo "==================================================" -echo "" -for t in "${tasks[@]}"; do - exit_code=$(cat "$log_dir/$t.exit") - if [[ "$exit_code" == "0" ]]; then - echo " โœ… $t: SUCCESS" - else - echo " โŒ $t: FAILED (exit code $exit_code)" - fi -done -echo "" - -echo "โณ Tasks complete! Synthesizing final assessment..." -if ! "$GEMINI_CMD" --policy "$POLICY_PATH" -p "Read the review at $log_dir/review.md, the automated test logs at $log_dir/npm-test.log, and the manual test execution logs at $log_dir/test-execution.log. Summarize the results, state whether the build and tests passed based on $log_dir/build-and-lint.exit and $log_dir/npm-test.exit, and give a final recommendation for PR $pr_number." > "$log_dir/final-assessment.md" 2>&1; then - echo $? > "$log_dir/final-assessment.exit" - echo "โŒ Final assessment synthesis failed!" - echo "Check $log_dir/final-assessment.md for details." - notify "Async Review Failed" "Final assessment synthesis failed." "$pr_number" - exit 1 -fi - -echo 0 > "$log_dir/final-assessment.exit" -echo "โœ… Final assessment complete! Check $log_dir/final-assessment.md" -notify "Async Review Complete" "Review and test execution finished successfully." "$pr_number" diff --git a/.gemini/skills/async-pr-review/scripts/check-async-review.sh b/.gemini/skills/async-pr-review/scripts/check-async-review.sh deleted file mode 100755 index fbb58c2b72..0000000000 --- a/.gemini/skills/async-pr-review/scripts/check-async-review.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash -pr_number=$1 - -if [[ -z "$pr_number" ]]; then - echo "Usage: check-async-review " - exit 1 -fi - -base_dir=$(git rev-parse --show-toplevel 2>/dev/null) -if [[ -z "$base_dir" ]]; then - echo "โŒ Must be run from within a git repository." - exit 1 -fi - -log_dir="$base_dir/.gemini/tmp/async-reviews/pr-$pr_number/logs" - -if [[ ! -d "$log_dir" ]]; then - echo "STATUS: NOT_FOUND" - echo "โŒ No logs found for PR #$pr_number in $log_dir" - exit 0 -fi - -tasks=( - "setup|setup.log" - "pr-diff|pr-diff.diff" - "build-and-lint|build-and-lint.log" - "review|review.md" - "npm-test|npm-test.log" - "test-execution|test-execution.log" - "final-assessment|final-assessment.md" -) - -all_done=true -echo "STATUS: CHECKING" - -for task_info in "${tasks[@]}"; do - IFS="|" read -r task_name log_file <<< "$task_info" - - file_path="$log_dir/$log_file" - exit_file="$log_dir/$task_name.exit" - - if [[ -f "$exit_file" ]]; then - exit_code=$(cat "$exit_file") - if [[ "$exit_code" == "0" ]]; then - echo "โœ… $task_name: SUCCESS" - else - echo "โŒ $task_name: FAILED (exit code $exit_code)" - echo " Last lines of $file_path:" - tail -n 3 "$file_path" | sed 's/^/ /' - fi - elif [[ -f "$file_path" ]]; then - echo "โณ $task_name: RUNNING" - all_done=false - else - echo "โž– $task_name: NOT STARTED" - all_done=false - fi -done - -if $all_done; then - echo "STATUS: COMPLETE" - echo "LOG_DIR: $log_dir" -else - echo "STATUS: IN_PROGRESS" -fi \ No newline at end of file diff --git a/.gemini/skills/offload/README.md b/.gemini/skills/offload/README.md index 91aba9da4e..fd05e3ae08 100644 --- a/.gemini/skills/offload/README.md +++ b/.gemini/skills/offload/README.md @@ -74,3 +74,16 @@ The orchestration logic for this skill is fully tested. To run the tests: npx vitest .gemini/skills/offload/tests/orchestration.test.ts ``` These tests mock the external environment (SSH, GitHub CLI, and the file system) to ensure that the orchestration scripts generate the correct commands and handle environment isolation accurately. + +## Future roadmap: Offload V2 (Fleet) + +The current version of `offload` focuses on a persistent SSH workstation. The long-term vision for this tool includes: + +- **Elastic compute**: Moving from a single machine to a fleet of ephemeral + remote containers (for example, using Cloud Run Jobs or GKE Pods). +- **Immutable base images**: Building a Docker container for every commit to + `main` to ensure a guaranteed, pristine verification environment. +- **Stateless execution**: Eliminating the need for manual cleanup by using + on-demand, disposable compute nodes for every offload task. +- **Massive parallelism**: Enabling dozens of simultaneous PR reviews and + implementations across the elastic fleet. diff --git a/.gemini/skills/offload/policy.toml b/.gemini/skills/offload/policy.toml index dd26fd772c..e0c4e8f54a 100644 --- a/.gemini/skills/offload/policy.toml +++ b/.gemini/skills/offload/policy.toml @@ -29,12 +29,35 @@ toolName = "codebase_investigator" decision = "allow" priority = 100 +# --- SKILLS --- +[[rule]] +toolName = "activate_skill" +decision = "allow" +priority = 100 +[rule.toolParameters] +name = "review-pr" + +[[rule]] +toolName = "activate_skill" +decision = "allow" +priority = 100 +[rule.toolParameters] +name = "fix-pr" + +[[rule]] +toolName = "activate_skill" +decision = "allow" +priority = 100 +[rule.toolParameters] +name = "pr-address-comments" + # --- SHELL COMMANDS --- -# Git (Safe/Read-only) +# Git (Safe/Read-only + Local State) [[rule]] toolName = "run_shell_command" commandPrefix = [ + "git checkout", "git blame", "git show", "git grep", diff --git a/.gemini/skills/offload/scripts/playbooks/review.ts b/.gemini/skills/offload/scripts/playbooks/review.ts index 2bf0366e97..5358132d4f 100644 --- a/.gemini/skills/offload/scripts/playbooks/review.ts +++ b/.gemini/skills/offload/scripts/playbooks/review.ts @@ -10,8 +10,7 @@ export async function runReviewPlaybook(prNumber: string, targetDir: string, pol runner.register([ { id: 'build', name: 'Fast Build', cmd: `cd ${targetDir} && npm ci && npm run build` }, { id: 'ci', name: 'CI Checks', cmd: `gh pr checks ${prNumber}` }, - { id: 'review', name: 'Gemini Analysis', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "/review-frontend ${prNumber}"` }, - { id: 'verify', name: 'Behavioral Proof', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "Analyze the code in ${targetDir} and exercise it to prove it works."`, dep: 'build' } + { id: 'review', name: 'Offloaded Review', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "Please activate the 'review-pr' skill and use it to conduct a behavioral review of PR #${prNumber}."` } ]); return runner.run(); diff --git a/.gemini/skills/review-pr/SKILL.md b/.gemini/skills/review-pr/SKILL.md new file mode 100644 index 0000000000..ff825b82b9 --- /dev/null +++ b/.gemini/skills/review-pr/SKILL.md @@ -0,0 +1,59 @@ +# Review PR Skill + +This skill enables the agent to conduct a high-fidelity, behavioral code review +for a Pull Request. It is designed to be run in a dedicated, offloaded remote +session where parallel infrastructure validation (build, CI check) is already +underway. + +## Objective + +Conduct a thorough review that goes beyond static analysis. The agent must +**physically prove** the feature works and that all regressions are caught by +exercising the code in a live terminal. + +## Workflow: Verify then Synthesize + +The agent must follow these steps to conduct the review: + +### 1. Context Acquisition +- Read the PR description: `gh pr view `. +- Read the intent from the description and map it against the diff. +- Run `/review-frontend ` to get an initial static analysis report. + +### 2. Infrastructure Validation +- Trust the parallel worker results in `.gemini/logs/offload-/`. +- Read `build.log` to ensure the environment is stable for testing. +- Read `ci-status.exit`. +- **If CI is failing**: Use the extractor to identify failing tests: + `npx tsx .gemini/skills/review-pr/scripts/extract-failures.ts ` +- Reproduce the failing tests locally and analyze the output. + +### 3. Behavioral Verification (The "Proof") +- Do NOT just trust that tests pass. +- Physically exercise the new code in the terminal. +- Examples of "Proof": + - If it's a new CLI command, run it with various flags. + - If it's a new service, write a small `.ts` script to call the functions. + - If it's a UI component, verify the Ink layout renders correctly. +- Log these proof steps and results in your conversation. + +### 4. Synthesis +- Combine all data points: + - **Static Analysis** (Code quality, style). + - **Infrastructure** (Build status, CI status). + - **Behavioral Proof** (Empirical verification). +- Check for merge conflicts with `main`. + +## Final Assessment + +Provide a final recommendation with three sections: + +1. **Summary**: High-level verdict (Approve / Needs Work / Reject). +2. **Verified Behavior**: Describe the scripts/commands you ran to prove the + feature works. +3. **Findings**: List Critical issues, Improvements, and Nitpicks. + +## Best Practices +- **Be Skeptical**: Just because CI is green doesn't mean the feature is complete. Look for missing edge cases. +- **Collaborate**: If you find a bug during verification, tell the user immediately in the main window. +- **Don't Fix**: Your role is to **review**, not to fix. If the PR is failing, summarize the causes and request changes. diff --git a/.gemini/skills/review-pr/scripts/extract-failures.ts b/.gemini/skills/review-pr/scripts/extract-failures.ts new file mode 100644 index 0000000000..77e7c1bfc4 --- /dev/null +++ b/.gemini/skills/review-pr/scripts/extract-failures.ts @@ -0,0 +1,59 @@ +/** + * CI Failure Extraction Utility + * + * Analyzes remote CI logs to identify specific failing test files. + * Used by the review-pr and fix-pr skills to minimize local repro noise. + */ +import { spawnSync } from 'child_process'; + +async function main() { + const prNumber = process.argv[2]; + if (!prNumber) { + console.error('Usage: npx tsx extract-failures.ts '); + process.exit(1); + } + + console.log(`๐Ÿ” Investigating CI failures for PR #${prNumber}...`); + + // 1. Get branch name + const branchView = spawnSync('gh', ['pr', 'view', prNumber, '--json', 'headRefName', '-q', '.headRefName'], { shell: true }); + const branchName = branchView.stdout.toString().trim(); + + // 2. Get latest CI run ID + const runList = spawnSync('gh', ['run', 'list', '--branch', branchName, '--workflow', 'ci.yml', '--json', 'databaseId', '-q', '.[0].databaseId'], { shell: true }); + const runId = runList.stdout.toString().trim(); + + if (!runId) { + console.log('โš ๏ธ No recent CI runs found for this branch.'); + process.exit(0); + } + + // 3. Extract failing files from logs + // Pattern matches common test locations in the monorepo + const logView = spawnSync('gh', ['run', 'view', runId, '--log-failed'], { shell: true }); + const logOutput = logView.stdout.toString(); + + const testPattern = /(packages\/[a-zA-Z0-9_-]+|integration-tests|evals)\/[a-zA-Z0-9_\/-]+\.test\.ts(x)?/g; + const matches = logOutput.match(testPattern); + + if (!matches || matches.length === 0) { + console.log('โœ… No specific failing test files detected in logs.'); + process.exit(0); + } + + const uniqueFiles = Array.from(new Set(matches)).sort(); + console.log('\nโŒ Found failing test files:'); + uniqueFiles.forEach(f => console.log(` - ${f}`)); + + console.log('\n๐Ÿ‘‰ Recommendation: Run these tests locally using:'); + uniqueFiles.forEach(file => { + let wsDir = file.split('/')[0]; + if (wsDir === 'packages') { + wsDir = file.split('/').slice(0, 2).join('/'); + } + const relFile = file.replace(`${wsDir}/`, ''); + console.log(` npm run test:ci -w ${wsDir} -- ${relFile}`); + }); +} + +main().catch(console.error);