mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-04 02:11:11 -07:00
feat: add 'review-pr' skill and sunset 'async-pr-review'
This commit is contained in:
@@ -1,45 +0,0 @@
|
||||
---
|
||||
name: async-pr-review
|
||||
description: Trigger this skill when the user wants to start an asynchronous PR review, run background checks on a PR, or check the status of a previously started async PR review.
|
||||
---
|
||||
|
||||
# Async PR Review
|
||||
|
||||
This skill provides a set of tools to asynchronously review a Pull Request. It will create a background job to run the project's preflight checks, execute Gemini-powered test plans, and perform a comprehensive code review using custom prompts.
|
||||
|
||||
This skill is designed to showcase an advanced "Agentic Asynchronous Pattern":
|
||||
1. **Native Background Shells vs Headless Inference**: While Gemini CLI can natively spawn and detach background shell commands (using the `run_shell_command` tool with `is_background: true`), a standard bash background job cannot perform LLM inference. To conduct AI-driven code reviews and test generation in the background, the shell script *must* invoke the `gemini` executable headlessly using `-p`. This offloads the AI tasks to independent worker agents.
|
||||
2. **Dynamic Git Scoping**: The review scripts avoid hardcoded paths. They use `git rev-parse --show-toplevel` to automatically resolve the root of the user's current project.
|
||||
3. **Ephemeral Worktrees**: Instead of checking out branches in the user's main workspace, the skill provisions temporary git worktrees in `.gemini/tmp/async-reviews/pr-<number>`. This prevents git lock conflicts and namespace pollution.
|
||||
4. **Agentic Evaluation (`check-async-review.sh`)**: The check script outputs clean JSON/text statuses for the main agent to parse. The interactive agent itself synthesizes the final assessment dynamically from the generated log files.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. **Determine Action**: Establish whether the user wants to start a new async review or check the status of an existing one.
|
||||
* If the user says "start an async review for PR #123" or similar, proceed to **Start Review**.
|
||||
* If the user says "check the status of my async review for PR #123" or similar, proceed to **Check Status**.
|
||||
|
||||
### Start Review
|
||||
|
||||
If the user wants to start a new async PR review:
|
||||
|
||||
1. Ask the user for the PR number if they haven't provided it.
|
||||
2. Execute the `async-review.sh` script, passing the PR number as the first argument. Be sure to run it with the `is_background` flag set to true to ensure it immediately detaches.
|
||||
```bash
|
||||
.gemini/skills/async-pr-review/scripts/async-review.sh <PR_NUMBER>
|
||||
```
|
||||
3. Inform the user that the tasks have started successfully and they can check the status later.
|
||||
|
||||
### Check Status
|
||||
|
||||
If the user wants to check the status or view the final assessment of a previously started async review:
|
||||
|
||||
1. Ask the user for the PR number if they haven't provided it.
|
||||
2. Execute the `check-async-review.sh` script, passing the PR number as the first argument:
|
||||
```bash
|
||||
.gemini/skills/async-pr-review/scripts/check-async-review.sh <PR_NUMBER>
|
||||
```
|
||||
3. **Evaluate Output**: Read the output from the script.
|
||||
* If the output contains `STATUS: IN_PROGRESS`, tell the user which tasks are still running.
|
||||
* If the output contains `STATUS: COMPLETE`, use your file reading tools (`read_file`) to retrieve the contents of `final-assessment.md`, `review.md`, `pr-diff.diff`, `npm-test.log`, and `test-execution.log` files from the `LOG_DIR` specified in the output.
|
||||
* **Final Assessment**: Read those files, synthesize their results, and give the user a concise recommendation on whether the PR builds successfully, passes tests, and if you recommend they approve it based on the review.
|
||||
@@ -1,148 +0,0 @@
|
||||
# --- CORE TOOLS ---
|
||||
[[rule]]
|
||||
toolName = "read_file"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
[[rule]]
|
||||
toolName = "write_file"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
[[rule]]
|
||||
toolName = "grep_search"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
[[rule]]
|
||||
toolName = "glob"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
[[rule]]
|
||||
toolName = "list_directory"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
[[rule]]
|
||||
toolName = "codebase_investigator"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
# --- SHELL COMMANDS ---
|
||||
|
||||
# Git (Safe/Read-only)
|
||||
[[rule]]
|
||||
toolName = "run_shell_command"
|
||||
commandPrefix = [
|
||||
"git blame",
|
||||
"git show",
|
||||
"git grep",
|
||||
"git show-ref",
|
||||
"git ls-tree",
|
||||
"git ls-remote",
|
||||
"git reflog",
|
||||
"git remote -v",
|
||||
"git diff",
|
||||
"git rev-list",
|
||||
"git rev-parse",
|
||||
"git merge-base",
|
||||
"git cherry",
|
||||
"git fetch",
|
||||
"git status",
|
||||
"git st",
|
||||
"git branch",
|
||||
"git br",
|
||||
"git log",
|
||||
"git --version"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
# GitHub CLI (Read-only)
|
||||
[[rule]]
|
||||
toolName = "run_shell_command"
|
||||
commandPrefix = [
|
||||
"gh workflow list",
|
||||
"gh auth status",
|
||||
"gh checkout view",
|
||||
"gh run view",
|
||||
"gh run job view",
|
||||
"gh run list",
|
||||
"gh run --help",
|
||||
"gh issue view",
|
||||
"gh issue list",
|
||||
"gh label list",
|
||||
"gh pr diff",
|
||||
"gh pr check",
|
||||
"gh pr checks",
|
||||
"gh pr view",
|
||||
"gh pr list",
|
||||
"gh pr status",
|
||||
"gh repo view",
|
||||
"gh job view",
|
||||
"gh api",
|
||||
"gh log"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
# Node.js/NPM (Generic Tests, Checks, and Build)
|
||||
[[rule]]
|
||||
toolName = "run_shell_command"
|
||||
commandPrefix = [
|
||||
"npm run start",
|
||||
"npm install",
|
||||
"npm run",
|
||||
"npm test",
|
||||
"npm ci",
|
||||
"npm list",
|
||||
"npm --version"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
# Core Utilities (Safe)
|
||||
[[rule]]
|
||||
toolName = "run_shell_command"
|
||||
commandPrefix = [
|
||||
"sleep",
|
||||
"env",
|
||||
"break",
|
||||
"xargs",
|
||||
"base64",
|
||||
"uniq",
|
||||
"sort",
|
||||
"echo",
|
||||
"which",
|
||||
"ls",
|
||||
"find",
|
||||
"tail",
|
||||
"head",
|
||||
"cat",
|
||||
"cd",
|
||||
"grep",
|
||||
"ps",
|
||||
"pwd",
|
||||
"wc",
|
||||
"file",
|
||||
"stat",
|
||||
"diff",
|
||||
"lsof",
|
||||
"date",
|
||||
"whoami",
|
||||
"uname",
|
||||
"du",
|
||||
"cut",
|
||||
"true",
|
||||
"false",
|
||||
"readlink",
|
||||
"awk",
|
||||
"jq",
|
||||
"rg",
|
||||
"less",
|
||||
"more",
|
||||
"tree"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
@@ -1,241 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
notify() {
|
||||
local title="$1"
|
||||
local message="$2"
|
||||
local pr="$3"
|
||||
# Terminal escape sequence
|
||||
printf "\e]9;%s | PR #%s | %s\a" "$title" "$pr" "$message"
|
||||
# Native macOS notification
|
||||
if [[ "$(uname)" == "Darwin" ]]; then
|
||||
osascript -e "display notification \"$message\" with title \"$title\" subtitle \"PR #$pr\""
|
||||
fi
|
||||
}
|
||||
|
||||
pr_number=$1
|
||||
if [[ -z "$pr_number" ]]; then
|
||||
echo "Usage: async-review <pr_number>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
base_dir=$(git rev-parse --show-toplevel 2>/dev/null)
|
||||
if [[ -z "$base_dir" ]]; then
|
||||
echo "❌ Must be run from within a git repository."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use the repository's local .gemini/tmp directory for ephemeral worktrees and logs
|
||||
pr_dir="$base_dir/.gemini/tmp/async-reviews/pr-$pr_number"
|
||||
target_dir="$pr_dir/worktree"
|
||||
log_dir="$pr_dir/logs"
|
||||
|
||||
cd "$base_dir" || exit 1
|
||||
|
||||
mkdir -p "$log_dir"
|
||||
rm -f "$log_dir/setup.exit" "$log_dir/final-assessment.exit" "$log_dir/final-assessment.md"
|
||||
|
||||
echo "🧹 Cleaning up previous worktree if it exists..." | tee -a "$log_dir/setup.log"
|
||||
git worktree remove -f "$target_dir" >> "$log_dir/setup.log" 2>&1 || true
|
||||
git branch -D "gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1 || true
|
||||
git worktree prune >> "$log_dir/setup.log" 2>&1 || true
|
||||
|
||||
echo "📡 Fetching PR #$pr_number..." | tee -a "$log_dir/setup.log"
|
||||
if ! git fetch origin -f "pull/$pr_number/head:gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1; then
|
||||
echo 1 > "$log_dir/setup.exit"
|
||||
echo "❌ Fetch failed. Check $log_dir/setup.log"
|
||||
notify "Async Review Failed" "Fetch failed." "$pr_number"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -d "$target_dir" ]]; then
|
||||
echo "🧹 Pruning missing worktrees..." | tee -a "$log_dir/setup.log"
|
||||
git worktree prune >> "$log_dir/setup.log" 2>&1
|
||||
echo "🌿 Creating worktree in $target_dir..." | tee -a "$log_dir/setup.log"
|
||||
if ! git worktree add "$target_dir" "gemini-async-pr-$pr_number" >> "$log_dir/setup.log" 2>&1; then
|
||||
echo 1 > "$log_dir/setup.exit"
|
||||
echo "❌ Worktree creation failed. Check $log_dir/setup.log"
|
||||
notify "Async Review Failed" "Worktree creation failed." "$pr_number"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "🌿 Worktree already exists." | tee -a "$log_dir/setup.log"
|
||||
fi
|
||||
echo 0 > "$log_dir/setup.exit"
|
||||
|
||||
cd "$target_dir" || exit 1
|
||||
|
||||
echo "🚀 Launching background tasks. Logs saving to: $log_dir"
|
||||
|
||||
echo " ↳ [1/5] Grabbing PR diff..."
|
||||
rm -f "$log_dir/pr-diff.exit"
|
||||
{ gh pr diff "$pr_number" > "$log_dir/pr-diff.diff" 2>&1; echo $? > "$log_dir/pr-diff.exit"; } &
|
||||
|
||||
echo " ↳ [2/5] Starting build and lint..."
|
||||
rm -f "$log_dir/build-and-lint.exit"
|
||||
{ { npm run clean && npm ci && npm run format && npm run build && npm run lint:ci && npm run typecheck; } > "$log_dir/build-and-lint.log" 2>&1; echo $? > "$log_dir/build-and-lint.exit"; } &
|
||||
|
||||
# Dynamically resolve gemini binary (fallback to your nightly path)
|
||||
GEMINI_CMD=$(which gemini || echo "$HOME/.gcli/nightly/node_modules/.bin/gemini")
|
||||
POLICY_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/policy.toml"
|
||||
|
||||
echo " ↳ [3/5] Starting Gemini code review..."
|
||||
rm -f "$log_dir/review.exit"
|
||||
{ "$GEMINI_CMD" --policy "$POLICY_PATH" -p "/review-frontend $pr_number" > "$log_dir/review.md" 2>&1; echo $? > "$log_dir/review.exit"; } &
|
||||
|
||||
echo " ↳ [4/5] Starting automated tests (waiting for build and lint)..."
|
||||
rm -f "$log_dir/npm-test.exit"
|
||||
{
|
||||
while [ ! -f "$log_dir/build-and-lint.exit" ]; do sleep 1; done
|
||||
if [ "$(cat "$log_dir/build-and-lint.exit")" == "0" ]; then
|
||||
gh pr checks "$pr_number" > "$log_dir/ci-checks.log" 2>&1
|
||||
ci_status=$?
|
||||
|
||||
if [ "$ci_status" -eq 0 ]; then
|
||||
echo "CI checks passed. Skipping local npm tests." > "$log_dir/npm-test.log"
|
||||
echo 0 > "$log_dir/npm-test.exit"
|
||||
elif [ "$ci_status" -eq 8 ]; then
|
||||
echo "CI checks are still pending. Skipping local npm tests to avoid duplicate work. Please check GitHub for final results." > "$log_dir/npm-test.log"
|
||||
echo 0 > "$log_dir/npm-test.exit"
|
||||
else
|
||||
echo "CI checks failed. Failing checks:" > "$log_dir/npm-test.log"
|
||||
gh pr checks "$pr_number" --json name,bucket -q '.[] | select(.bucket=="fail") | .name' >> "$log_dir/npm-test.log" 2>&1
|
||||
|
||||
echo "Attempting to extract failing test files from CI logs..." >> "$log_dir/npm-test.log"
|
||||
pr_branch=$(gh pr view "$pr_number" --json headRefName -q '.headRefName' 2>/dev/null)
|
||||
run_id=$(gh run list --branch "$pr_branch" --workflow ci.yml --json databaseId -q '.[0].databaseId' 2>/dev/null)
|
||||
|
||||
failed_files=""
|
||||
if [[ -n "$run_id" ]]; then
|
||||
failed_files=$(gh run view "$run_id" --log-failed 2>/dev/null | grep -o -E '(packages/[a-zA-Z0-9_-]+|integration-tests|evals)/[a-zA-Z0-9_/-]+\.test\.ts(x)?' | sort | uniq)
|
||||
fi
|
||||
|
||||
if [[ -n "$failed_files" ]]; then
|
||||
echo "Found failing test files from CI:" >> "$log_dir/npm-test.log"
|
||||
for f in $failed_files; do echo " - $f" >> "$log_dir/npm-test.log"; done
|
||||
echo "Running ONLY failing tests locally..." >> "$log_dir/npm-test.log"
|
||||
|
||||
exit_code=0
|
||||
for file in $failed_files; do
|
||||
if [[ "$file" == packages/* ]]; then
|
||||
ws_dir=$(echo "$file" | cut -d'/' -f1,2)
|
||||
else
|
||||
ws_dir=$(echo "$file" | cut -d'/' -f1)
|
||||
fi
|
||||
rel_file=${file#$ws_dir/}
|
||||
|
||||
echo "--- Running $rel_file in workspace $ws_dir ---" >> "$log_dir/npm-test.log"
|
||||
if ! npm run test:ci -w "$ws_dir" -- "$rel_file" >> "$log_dir/npm-test.log" 2>&1; then
|
||||
exit_code=1
|
||||
fi
|
||||
done
|
||||
echo $exit_code > "$log_dir/npm-test.exit"
|
||||
else
|
||||
echo "Could not extract specific failing files. Skipping full local test suite as it takes too long. Please check CI logs manually." >> "$log_dir/npm-test.log"
|
||||
echo 1 > "$log_dir/npm-test.exit"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "Skipped due to build-and-lint failure" > "$log_dir/npm-test.log"
|
||||
echo 1 > "$log_dir/npm-test.exit"
|
||||
fi
|
||||
} &
|
||||
|
||||
echo " ↳ [5/5] Starting Gemini test execution (waiting for build and lint)..."
|
||||
rm -f "$log_dir/test-execution.exit"
|
||||
{
|
||||
while [ ! -f "$log_dir/build-and-lint.exit" ]; do sleep 1; done
|
||||
if [ "$(cat "$log_dir/build-and-lint.exit")" == "0" ]; then
|
||||
"$GEMINI_CMD" --policy "$POLICY_PATH" -p "Analyze the diff for PR $pr_number using 'gh pr diff $pr_number'. Instead of running the project's automated test suite (like 'npm test'), physically exercise the newly changed code in the terminal (e.g., by writing a temporary script to call the new functions, or testing the CLI command directly). Verify the feature's behavior works as expected. IMPORTANT: Do NOT modify any source code to fix errors. Just exercise the code and log the results, reporting any failures clearly. Do not ask for user confirmation." > "$log_dir/test-execution.log" 2>&1; echo $? > "$log_dir/test-execution.exit"
|
||||
else
|
||||
echo "Skipped due to build-and-lint failure" > "$log_dir/test-execution.log"
|
||||
echo 1 > "$log_dir/test-execution.exit"
|
||||
fi
|
||||
} &
|
||||
|
||||
echo "✅ All tasks dispatched!"
|
||||
echo "You can monitor progress with: tail -f $log_dir/*.log"
|
||||
echo "Read your review later at: $log_dir/review.md"
|
||||
|
||||
# Polling loop to wait for all background tasks to finish
|
||||
tasks=("pr-diff" "build-and-lint" "review" "npm-test" "test-execution")
|
||||
log_files=("pr-diff.diff" "build-and-lint.log" "review.md" "npm-test.log" "test-execution.log")
|
||||
|
||||
declare -A task_done
|
||||
for t in "${tasks[@]}"; do task_done[$t]=0; done
|
||||
|
||||
all_done=0
|
||||
while [[ $all_done -eq 0 ]]; do
|
||||
clear
|
||||
echo "=================================================="
|
||||
echo "🚀 Async PR Review Status for PR #$pr_number"
|
||||
echo "=================================================="
|
||||
echo ""
|
||||
|
||||
all_done=1
|
||||
for i in "${!tasks[@]}"; do
|
||||
t="${tasks[$i]}"
|
||||
|
||||
if [[ -f "$log_dir/$t.exit" ]]; then
|
||||
exit_code=$(cat "$log_dir/$t.exit")
|
||||
if [[ "$exit_code" == "0" ]]; then
|
||||
echo " ✅ $t: SUCCESS"
|
||||
else
|
||||
echo " ❌ $t: FAILED (exit code $exit_code)"
|
||||
fi
|
||||
task_done[$t]=1
|
||||
else
|
||||
echo " ⏳ $t: RUNNING"
|
||||
all_done=0
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=================================================="
|
||||
echo "📝 Live Logs (Last 5 lines of running tasks)"
|
||||
echo "=================================================="
|
||||
|
||||
for i in "${!tasks[@]}"; do
|
||||
t="${tasks[$i]}"
|
||||
log_file="${log_files[$i]}"
|
||||
|
||||
if [[ ${task_done[$t]} -eq 0 ]]; then
|
||||
if [[ -f "$log_dir/$log_file" ]]; then
|
||||
echo ""
|
||||
echo "--- $t ---"
|
||||
tail -n 5 "$log_dir/$log_file"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $all_done -eq 0 ]]; then
|
||||
sleep 3
|
||||
fi
|
||||
done
|
||||
|
||||
clear
|
||||
echo "=================================================="
|
||||
echo "🚀 Async PR Review Status for PR #$pr_number"
|
||||
echo "=================================================="
|
||||
echo ""
|
||||
for t in "${tasks[@]}"; do
|
||||
exit_code=$(cat "$log_dir/$t.exit")
|
||||
if [[ "$exit_code" == "0" ]]; then
|
||||
echo " ✅ $t: SUCCESS"
|
||||
else
|
||||
echo " ❌ $t: FAILED (exit code $exit_code)"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
echo "⏳ Tasks complete! Synthesizing final assessment..."
|
||||
if ! "$GEMINI_CMD" --policy "$POLICY_PATH" -p "Read the review at $log_dir/review.md, the automated test logs at $log_dir/npm-test.log, and the manual test execution logs at $log_dir/test-execution.log. Summarize the results, state whether the build and tests passed based on $log_dir/build-and-lint.exit and $log_dir/npm-test.exit, and give a final recommendation for PR $pr_number." > "$log_dir/final-assessment.md" 2>&1; then
|
||||
echo $? > "$log_dir/final-assessment.exit"
|
||||
echo "❌ Final assessment synthesis failed!"
|
||||
echo "Check $log_dir/final-assessment.md for details."
|
||||
notify "Async Review Failed" "Final assessment synthesis failed." "$pr_number"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo 0 > "$log_dir/final-assessment.exit"
|
||||
echo "✅ Final assessment complete! Check $log_dir/final-assessment.md"
|
||||
notify "Async Review Complete" "Review and test execution finished successfully." "$pr_number"
|
||||
@@ -1,65 +0,0 @@
|
||||
#!/bin/bash
|
||||
pr_number=$1
|
||||
|
||||
if [[ -z "$pr_number" ]]; then
|
||||
echo "Usage: check-async-review <pr_number>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
base_dir=$(git rev-parse --show-toplevel 2>/dev/null)
|
||||
if [[ -z "$base_dir" ]]; then
|
||||
echo "❌ Must be run from within a git repository."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_dir="$base_dir/.gemini/tmp/async-reviews/pr-$pr_number/logs"
|
||||
|
||||
if [[ ! -d "$log_dir" ]]; then
|
||||
echo "STATUS: NOT_FOUND"
|
||||
echo "❌ No logs found for PR #$pr_number in $log_dir"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
tasks=(
|
||||
"setup|setup.log"
|
||||
"pr-diff|pr-diff.diff"
|
||||
"build-and-lint|build-and-lint.log"
|
||||
"review|review.md"
|
||||
"npm-test|npm-test.log"
|
||||
"test-execution|test-execution.log"
|
||||
"final-assessment|final-assessment.md"
|
||||
)
|
||||
|
||||
all_done=true
|
||||
echo "STATUS: CHECKING"
|
||||
|
||||
for task_info in "${tasks[@]}"; do
|
||||
IFS="|" read -r task_name log_file <<< "$task_info"
|
||||
|
||||
file_path="$log_dir/$log_file"
|
||||
exit_file="$log_dir/$task_name.exit"
|
||||
|
||||
if [[ -f "$exit_file" ]]; then
|
||||
exit_code=$(cat "$exit_file")
|
||||
if [[ "$exit_code" == "0" ]]; then
|
||||
echo "✅ $task_name: SUCCESS"
|
||||
else
|
||||
echo "❌ $task_name: FAILED (exit code $exit_code)"
|
||||
echo " Last lines of $file_path:"
|
||||
tail -n 3 "$file_path" | sed 's/^/ /'
|
||||
fi
|
||||
elif [[ -f "$file_path" ]]; then
|
||||
echo "⏳ $task_name: RUNNING"
|
||||
all_done=false
|
||||
else
|
||||
echo "➖ $task_name: NOT STARTED"
|
||||
all_done=false
|
||||
fi
|
||||
done
|
||||
|
||||
if $all_done; then
|
||||
echo "STATUS: COMPLETE"
|
||||
echo "LOG_DIR: $log_dir"
|
||||
else
|
||||
echo "STATUS: IN_PROGRESS"
|
||||
fi
|
||||
@@ -74,3 +74,16 @@ The orchestration logic for this skill is fully tested. To run the tests:
|
||||
npx vitest .gemini/skills/offload/tests/orchestration.test.ts
|
||||
```
|
||||
These tests mock the external environment (SSH, GitHub CLI, and the file system) to ensure that the orchestration scripts generate the correct commands and handle environment isolation accurately.
|
||||
|
||||
## Future roadmap: Offload V2 (Fleet)
|
||||
|
||||
The current version of `offload` focuses on a persistent SSH workstation. The long-term vision for this tool includes:
|
||||
|
||||
- **Elastic compute**: Moving from a single machine to a fleet of ephemeral
|
||||
remote containers (for example, using Cloud Run Jobs or GKE Pods).
|
||||
- **Immutable base images**: Building a Docker container for every commit to
|
||||
`main` to ensure a guaranteed, pristine verification environment.
|
||||
- **Stateless execution**: Eliminating the need for manual cleanup by using
|
||||
on-demand, disposable compute nodes for every offload task.
|
||||
- **Massive parallelism**: Enabling dozens of simultaneous PR reviews and
|
||||
implementations across the elastic fleet.
|
||||
|
||||
@@ -29,12 +29,35 @@ toolName = "codebase_investigator"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
|
||||
# --- SKILLS ---
|
||||
[[rule]]
|
||||
toolName = "activate_skill"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
[rule.toolParameters]
|
||||
name = "review-pr"
|
||||
|
||||
[[rule]]
|
||||
toolName = "activate_skill"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
[rule.toolParameters]
|
||||
name = "fix-pr"
|
||||
|
||||
[[rule]]
|
||||
toolName = "activate_skill"
|
||||
decision = "allow"
|
||||
priority = 100
|
||||
[rule.toolParameters]
|
||||
name = "pr-address-comments"
|
||||
|
||||
# --- SHELL COMMANDS ---
|
||||
|
||||
# Git (Safe/Read-only)
|
||||
# Git (Safe/Read-only + Local State)
|
||||
[[rule]]
|
||||
toolName = "run_shell_command"
|
||||
commandPrefix = [
|
||||
"git checkout",
|
||||
"git blame",
|
||||
"git show",
|
||||
"git grep",
|
||||
|
||||
@@ -10,8 +10,7 @@ export async function runReviewPlaybook(prNumber: string, targetDir: string, pol
|
||||
runner.register([
|
||||
{ id: 'build', name: 'Fast Build', cmd: `cd ${targetDir} && npm ci && npm run build` },
|
||||
{ id: 'ci', name: 'CI Checks', cmd: `gh pr checks ${prNumber}` },
|
||||
{ id: 'review', name: 'Gemini Analysis', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "/review-frontend ${prNumber}"` },
|
||||
{ id: 'verify', name: 'Behavioral Proof', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "Analyze the code in ${targetDir} and exercise it to prove it works."`, dep: 'build' }
|
||||
{ id: 'review', name: 'Offloaded Review', cmd: `${geminiBin} --policy ${policyPath} --cwd ${targetDir} -p "Please activate the 'review-pr' skill and use it to conduct a behavioral review of PR #${prNumber}."` }
|
||||
]);
|
||||
|
||||
return runner.run();
|
||||
|
||||
59
.gemini/skills/review-pr/SKILL.md
Normal file
59
.gemini/skills/review-pr/SKILL.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Review PR Skill
|
||||
|
||||
This skill enables the agent to conduct a high-fidelity, behavioral code review
|
||||
for a Pull Request. It is designed to be run in a dedicated, offloaded remote
|
||||
session where parallel infrastructure validation (build, CI check) is already
|
||||
underway.
|
||||
|
||||
## Objective
|
||||
|
||||
Conduct a thorough review that goes beyond static analysis. The agent must
|
||||
**physically prove** the feature works and that all regressions are caught by
|
||||
exercising the code in a live terminal.
|
||||
|
||||
## Workflow: Verify then Synthesize
|
||||
|
||||
The agent must follow these steps to conduct the review:
|
||||
|
||||
### 1. Context Acquisition
|
||||
- Read the PR description: `gh pr view <PR_NUMBER>`.
|
||||
- Read the intent from the description and map it against the diff.
|
||||
- Run `/review-frontend <PR_NUMBER>` to get an initial static analysis report.
|
||||
|
||||
### 2. Infrastructure Validation
|
||||
- Trust the parallel worker results in `.gemini/logs/offload-<PR_NUMBER>/`.
|
||||
- Read `build.log` to ensure the environment is stable for testing.
|
||||
- Read `ci-status.exit`.
|
||||
- **If CI is failing**: Use the extractor to identify failing tests:
|
||||
`npx tsx .gemini/skills/review-pr/scripts/extract-failures.ts <PR_NUMBER>`
|
||||
- Reproduce the failing tests locally and analyze the output.
|
||||
|
||||
### 3. Behavioral Verification (The "Proof")
|
||||
- Do NOT just trust that tests pass.
|
||||
- Physically exercise the new code in the terminal.
|
||||
- Examples of "Proof":
|
||||
- If it's a new CLI command, run it with various flags.
|
||||
- If it's a new service, write a small `.ts` script to call the functions.
|
||||
- If it's a UI component, verify the Ink layout renders correctly.
|
||||
- Log these proof steps and results in your conversation.
|
||||
|
||||
### 4. Synthesis
|
||||
- Combine all data points:
|
||||
- **Static Analysis** (Code quality, style).
|
||||
- **Infrastructure** (Build status, CI status).
|
||||
- **Behavioral Proof** (Empirical verification).
|
||||
- Check for merge conflicts with `main`.
|
||||
|
||||
## Final Assessment
|
||||
|
||||
Provide a final recommendation with three sections:
|
||||
|
||||
1. **Summary**: High-level verdict (Approve / Needs Work / Reject).
|
||||
2. **Verified Behavior**: Describe the scripts/commands you ran to prove the
|
||||
feature works.
|
||||
3. **Findings**: List Critical issues, Improvements, and Nitpicks.
|
||||
|
||||
## Best Practices
|
||||
- **Be Skeptical**: Just because CI is green doesn't mean the feature is complete. Look for missing edge cases.
|
||||
- **Collaborate**: If you find a bug during verification, tell the user immediately in the main window.
|
||||
- **Don't Fix**: Your role is to **review**, not to fix. If the PR is failing, summarize the causes and request changes.
|
||||
59
.gemini/skills/review-pr/scripts/extract-failures.ts
Normal file
59
.gemini/skills/review-pr/scripts/extract-failures.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* CI Failure Extraction Utility
|
||||
*
|
||||
* Analyzes remote CI logs to identify specific failing test files.
|
||||
* Used by the review-pr and fix-pr skills to minimize local repro noise.
|
||||
*/
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
async function main() {
|
||||
const prNumber = process.argv[2];
|
||||
if (!prNumber) {
|
||||
console.error('Usage: npx tsx extract-failures.ts <PR_NUMBER>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`🔍 Investigating CI failures for PR #${prNumber}...`);
|
||||
|
||||
// 1. Get branch name
|
||||
const branchView = spawnSync('gh', ['pr', 'view', prNumber, '--json', 'headRefName', '-q', '.headRefName'], { shell: true });
|
||||
const branchName = branchView.stdout.toString().trim();
|
||||
|
||||
// 2. Get latest CI run ID
|
||||
const runList = spawnSync('gh', ['run', 'list', '--branch', branchName, '--workflow', 'ci.yml', '--json', 'databaseId', '-q', '.[0].databaseId'], { shell: true });
|
||||
const runId = runList.stdout.toString().trim();
|
||||
|
||||
if (!runId) {
|
||||
console.log('⚠️ No recent CI runs found for this branch.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// 3. Extract failing files from logs
|
||||
// Pattern matches common test locations in the monorepo
|
||||
const logView = spawnSync('gh', ['run', 'view', runId, '--log-failed'], { shell: true });
|
||||
const logOutput = logView.stdout.toString();
|
||||
|
||||
const testPattern = /(packages\/[a-zA-Z0-9_-]+|integration-tests|evals)\/[a-zA-Z0-9_\/-]+\.test\.ts(x)?/g;
|
||||
const matches = logOutput.match(testPattern);
|
||||
|
||||
if (!matches || matches.length === 0) {
|
||||
console.log('✅ No specific failing test files detected in logs.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const uniqueFiles = Array.from(new Set(matches)).sort();
|
||||
console.log('\n❌ Found failing test files:');
|
||||
uniqueFiles.forEach(f => console.log(` - ${f}`));
|
||||
|
||||
console.log('\n👉 Recommendation: Run these tests locally using:');
|
||||
uniqueFiles.forEach(file => {
|
||||
let wsDir = file.split('/')[0];
|
||||
if (wsDir === 'packages') {
|
||||
wsDir = file.split('/').slice(0, 2).join('/');
|
||||
}
|
||||
const relFile = file.replace(`${wsDir}/`, '');
|
||||
console.log(` npm run test:ci -w ${wsDir} -- ${relFile}`);
|
||||
});
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user