mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
feat: merge steps 2-5 into a single unified pipeline script run_pipeline.sh
This commit is contained in:
@@ -11,8 +11,8 @@ and determining implementation effort levels for the Gemini CLI project.
|
||||
validation (e.g., `validate_effort.py`, `inject_manual_fixes.py`).
|
||||
- `*.py`: Core analysis and export scripts (e.g., `bug_analyzer_final.py`,
|
||||
`generate_bugs_csv.py`).
|
||||
- `loop_analyzer.sh`: A shell script for running iterative analysis until all
|
||||
issues are processed.
|
||||
- `run_pipeline.sh`: A shell script that orchestrates the entire effort analysis
|
||||
pipeline end-to-end.
|
||||
|
||||
## 📥 Prerequisites: Data Generation
|
||||
|
||||
@@ -51,41 +51,17 @@ gemini "Read data/uncategorized.json. For each issue, determine if it is a bug o
|
||||
_Note: Make sure your `gemini-cli` has permission to execute shell commands if
|
||||
you want it to apply the labels automatically via `gh`._
|
||||
|
||||
### 2. Initial Triage (Static)
|
||||
### 2. Full Effort Analysis Pipeline
|
||||
|
||||
Use this for a quick, first-pass estimation.
|
||||
Instead of running individual steps manually, you can run the entire analysis
|
||||
pipeline (Initial Triage -> Deep Agentic Analysis -> Iterative Recovery ->
|
||||
Validation -> CSV Export) with a single command.
|
||||
|
||||
```bash
|
||||
python3 analyze_bugs.py --api-key "YOUR_KEY"
|
||||
GEMINI_API_KEY="YOUR_KEY" ./run_pipeline.sh data/bugs.json ../../packages
|
||||
```
|
||||
|
||||
### 3. Deep Agentic Analysis
|
||||
|
||||
Uses Gemini as an agent with access to the codebase.
|
||||
|
||||
```bash
|
||||
python3 bug_analyzer_final.py --api-key "YOUR_KEY"
|
||||
```
|
||||
|
||||
### 4. Iterative Analysis
|
||||
|
||||
Runs the single-turn analyzer in a loop until all issues have a valid analysis.
|
||||
|
||||
```bash
|
||||
GEMINI_API_KEY="YOUR_KEY" ./loop_analyzer.sh
|
||||
```
|
||||
|
||||
### 5. Validation & Export
|
||||
|
||||
Run validation from the utils folder to ensure consistency, then generate a
|
||||
readable report.
|
||||
|
||||
```bash
|
||||
python3 utils/validate_effort.py
|
||||
python3 generate_bugs_csv.py
|
||||
```
|
||||
|
||||
### 6. Generic Issue Processing
|
||||
### 3. Generic Issue Processing
|
||||
|
||||
For any other backlog task (e.g., categorizing features, updating labels, or
|
||||
custom analysis), use the `generic_processor.py`. This script allows you to
|
||||
|
||||
@@ -2,24 +2,22 @@
|
||||
Purpose: Exports analyzed JSON issue data into a human-readable CSV format.
|
||||
This is typically the final step in the workflow, making the output suitable for sharing, spreadsheet import, or manual review.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import csv
|
||||
from datetime import datetime
|
||||
|
||||
BUGS_FILE = 'data/bugs.json'
|
||||
METADATA_FILE = 'data/metadata_bugs.json'
|
||||
CSV_FILE = 'data/bugs.csv'
|
||||
parser = argparse.ArgumentParser(description="Export JSON issues to CSV.")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file")
|
||||
parser.add_argument("--output", default="data/bugs.csv", help="Output CSV file")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
with open(args.input, 'r') as f:
|
||||
issues = json.load(f)
|
||||
|
||||
with open(METADATA_FILE, 'r') as f:
|
||||
metadata_list = json.load(f)
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
metadata_map = {m['number']: m for m in metadata_list}
|
||||
today = "2026-04-21"
|
||||
|
||||
with open(CSV_FILE, 'w', newline='', encoding='utf-8') as f:
|
||||
with open(args.output, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f, delimiter='\t')
|
||||
writer.writerow([
|
||||
'Issue ID', 'Title', 'Status', 'Assignee', 'Labels',
|
||||
@@ -27,25 +25,31 @@ with open(CSV_FILE, 'w', newline='', encoding='utf-8') as f:
|
||||
'reasoning', 'recommended_implementation'
|
||||
])
|
||||
|
||||
for bug in bugs:
|
||||
num = bug.get('number')
|
||||
meta = metadata_map.get(num, {})
|
||||
for issue in issues:
|
||||
num = issue.get('number')
|
||||
|
||||
assignee = ", ".join([a['login'] for a in meta.get('assignees', [])])
|
||||
labels = ", ".join([l['name'] for l in meta.get('labels', [])])
|
||||
assignee_list = issue.get('assignees', [])
|
||||
if isinstance(assignee_list, dict) and 'nodes' in assignee_list:
|
||||
assignee_list = assignee_list['nodes']
|
||||
assignee = ", ".join([a.get('login', '') for a in assignee_list])
|
||||
|
||||
labels_list = issue.get('labels', [])
|
||||
if isinstance(labels_list, dict) and 'nodes' in labels_list:
|
||||
labels_list = labels_list['nodes']
|
||||
labels = ", ".join([l.get('name', '') for l in labels_list])
|
||||
|
||||
writer.writerow([
|
||||
num,
|
||||
bug.get('title', ''),
|
||||
meta.get('state', 'open'),
|
||||
issue.get('title', ''),
|
||||
issue.get('state', 'OPEN'),
|
||||
assignee,
|
||||
labels,
|
||||
today,
|
||||
bug.get('url', ''),
|
||||
bug.get('analysis', ''),
|
||||
bug.get('effort_level', ''),
|
||||
bug.get('reasoning', ''),
|
||||
bug.get('recommended_implementation', '')
|
||||
issue.get('url', ''),
|
||||
issue.get('analysis', ''),
|
||||
issue.get('effort_level', ''),
|
||||
issue.get('reasoning', ''),
|
||||
issue.get('recommended_implementation', '')
|
||||
])
|
||||
|
||||
print(f"Successfully generated {CSV_FILE}")
|
||||
print(f"Successfully generated {args.output}")
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Run from the project root or the scripts/backlog-analysis directory
|
||||
# This script assumes it's running in the same directory as the python scripts
|
||||
|
||||
if [ -z "$GEMINI_API_KEY" ]; then
|
||||
echo "Error: GEMINI_API_KEY environment variable is required."
|
||||
echo "Usage: GEMINI_API_KEY=your_key ./loop_analyzer.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
count=$(jq '[.[] | select(.analysis == "Failed to analyze autonomously" or .analysis == null or .analysis == "" or (.analysis | length) < 30)] | length' data/bugs.json)
|
||||
if [ "$count" -eq 0 ]; then
|
||||
echo "All bugs processed!"
|
||||
break
|
||||
fi
|
||||
echo "Remaining bugs: $count"
|
||||
python3 single_turn_bug_analyzer.py --api-key "$GEMINI_API_KEY"
|
||||
done
|
||||
python3 generate_bugs_csv.py
|
||||
Executable
+53
@@ -0,0 +1,53 @@
|
||||
#!/bin/bash
|
||||
# run_pipeline.sh
|
||||
# Purpose: Orchestrates the full effort analysis pipeline end-to-end.
|
||||
|
||||
if [ -z "$GEMINI_API_KEY" ]; then
|
||||
echo "Error: GEMINI_API_KEY environment variable is required."
|
||||
echo "Usage: GEMINI_API_KEY=your_key ./run_pipeline.sh [INPUT_FILE] [PROJECT_DIR]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INPUT_FILE=${1:-"data/bugs.json"}
|
||||
PROJECT_DIR=${2:-"../../packages"}
|
||||
OUTPUT_CSV="${INPUT_FILE%.json}.csv"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Step 1: Initial Triage (Static Pass)"
|
||||
echo "=========================================="
|
||||
python3 analyze_bugs.py --api-key "$GEMINI_API_KEY" --input "$INPUT_FILE" --project "$PROJECT_DIR"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Step 2: Deep Agentic Analysis"
|
||||
echo "=========================================="
|
||||
python3 bug_analyzer_final.py --api-key "$GEMINI_API_KEY" --input "$INPUT_FILE" --project "$PROJECT_DIR"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Step 3: Iterative Recovery Analysis"
|
||||
echo "=========================================="
|
||||
while true; do
|
||||
count=$(jq '[.[] | select(.analysis == "Failed to analyze autonomously" or .analysis == null or .analysis == "" or (.analysis | length) < 30)] | length' "$INPUT_FILE")
|
||||
if [ -z "$count" ] || [ "$count" -eq 0 ]; then
|
||||
echo "All issues successfully processed!"
|
||||
break
|
||||
fi
|
||||
echo "Remaining unanalyzed issues: $count"
|
||||
python3 single_turn_bug_analyzer.py --api-key "$GEMINI_API_KEY" --input "$INPUT_FILE" --project "$PROJECT_DIR"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Step 4: Heuristic Validation"
|
||||
echo "=========================================="
|
||||
python3 utils/validate_effort.py --input "$INPUT_FILE" --project "$PROJECT_DIR"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Step 5: Exporting to CSV"
|
||||
echo "=========================================="
|
||||
python3 generate_bugs_csv.py --input "$INPUT_FILE" --output "$OUTPUT_CSV"
|
||||
|
||||
echo ""
|
||||
echo "✅ Pipeline Complete! Results saved to $OUTPUT_CSV"
|
||||
@@ -2,12 +2,18 @@
|
||||
Purpose: Runs heuristic post-analysis validation on the AI's effort estimations.
|
||||
Checks for keywords (like 'Windows', 'WSL', 'PTY') in the issue body to ensure the AI didn't underestimate platform-specific or architecturally complex bugs as 'small'.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
ISSUES_FILE = 'backlog-analysis/issues.json'
|
||||
REPO_ROOT = '/Users/cocosheng/gemini-cli'
|
||||
parser = argparse.ArgumentParser(description="Validate effort levels using heuristics.")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file containing analyzed issues")
|
||||
parser.add_argument("--project", default="../../packages", help="Project root for codebase validation")
|
||||
args = parser.parse_args()
|
||||
|
||||
ISSUES_FILE = args.input
|
||||
REPO_ROOT = args.project
|
||||
|
||||
with open(ISSUES_FILE, 'r') as f:
|
||||
issues = json.load(f)
|
||||
@@ -35,17 +41,13 @@ SMALL_KEYWORDS = [
|
||||
]
|
||||
|
||||
def find_files_in_text(text):
|
||||
# match patterns like packages/cli/src/ui/components/Footer.tsx or Footer.tsx
|
||||
# We will look for anything ending in .ts, .tsx, .js, .json
|
||||
matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
|
||||
# filter out URLs or common false positives
|
||||
return set([m for m in matches if not m.startswith('http')])
|
||||
|
||||
def resolve_file(filename):
|
||||
if os.path.exists(os.path.join(REPO_ROOT, filename)):
|
||||
return os.path.join(REPO_ROOT, filename)
|
||||
|
||||
# Try searching the repo for the basename
|
||||
basename = os.path.basename(filename)
|
||||
for root, dirs, files in os.walk(REPO_ROOT):
|
||||
if '.git' in root or 'node_modules' in root:
|
||||
@@ -82,7 +84,6 @@ def analyze_issue(issue):
|
||||
effort = "small"
|
||||
validation_msg = ""
|
||||
|
||||
# Keyword analysis
|
||||
keyword_effort = "small"
|
||||
for kw in LARGE_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
@@ -95,9 +96,7 @@ def analyze_issue(issue):
|
||||
keyword_effort = "medium"
|
||||
break
|
||||
|
||||
# Codebase heuristic
|
||||
if num_files == 0:
|
||||
# If no files found, rely strictly on keywords, but default to medium to be safe
|
||||
effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'
|
||||
validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."
|
||||
else:
|
||||
@@ -121,9 +120,7 @@ for issue in issues:
|
||||
|
||||
issue['effort_level'] = new_effort
|
||||
|
||||
# Store the validation reason in the reasoning field
|
||||
existing_reasoning = issue.get('reasoning', '')
|
||||
# Strip any previous validation messages
|
||||
existing_reasoning = existing_reasoning.split(' | Codebase validation:')[0]
|
||||
existing_reasoning = existing_reasoning.split(' | No specific files identified')[0]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user