feat: add categorize_issues.py to automatically classify and update github issues, and update README

This commit is contained in:
Coco Sheng
2026-05-06 16:31:49 -04:00
parent 3579e5edde
commit 374e4713d8
2 changed files with 109 additions and 3 deletions
+5 -3
View File
@@ -22,11 +22,13 @@ If you have a raw list of uncategorized issues, the first step is to apply the
correct types (`bug` or `feature`) directly on GitHub, and then fetch the data
into a local JSON file for analysis.
**A) Auto-Categorize on GitHub:** Use the Gemini CLI directly in your terminal
to classify and label the issues on GitHub.
**A) Auto-Categorize on GitHub:** We provide a dedicated Python script that will
automatically fetch uncategorized issues matching your search query, classify
them using the Gemini API, and apply the correct labels and title prefixes
(`[Bug]` or `[Feature]`) directly on GitHub.
```bash
gemini "I have a list of issues (e.g. 123, 124). For each issue, determine if it is a bug or a feature request. Use the gh CLI tool to act on the GitHub issue: (a) Add the 'type/bug' or 'type/feature' label, and (b) Edit the issue body or title to explicitly denote the type."
python3 categorize_issues.py --api-key "YOUR_KEY" --search "repo:google-gemini/gemini-cli -label:type/bug -label:type/feature is:open" --limit 50
```
**B) Export to JSON:** Once the issues are correctly labeled on GitHub, fetch
@@ -0,0 +1,104 @@
"""
Purpose: Automatically categorizes GitHub issues as 'bug' or 'feature' and applies the corresponding label on GitHub.
It fetches issues matching a search query, uses the Gemini API to classify them, and runs 'gh issue edit' to update GitHub.
"""
import argparse
import urllib.request
import json
import subprocess
import sys
import concurrent.futures
MODEL = "gemini-3-flash-preview"
def fetch_issues(search_query, limit):
cmd = ["gh", "issue", "list", "--search", search_query, "--limit", str(limit), "--json", "number,title,body,url"]
try:
res = subprocess.run(cmd, capture_output=True, text=True, check=True)
return json.loads(res.stdout)
except subprocess.CalledProcessError as e:
print(f"Error fetching issues: {e.stderr}", file=sys.stderr)
sys.exit(1)
def categorize_issue(issue, api_key):
url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={api_key}"
prompt = f"""
Analyze the following GitHub issue and determine if it is a bug or a feature request.
Reply ONLY with a valid JSON object matching exactly this schema, without Markdown formatting:
{{"type": "bug" | "feature", "reasoning": "brief justification"}}
Issue Title: {issue.get('title')}
Issue Body: {issue.get('body', '')[:1500]}
"""
data = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"temperature": 0.1}
}
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
try:
with urllib.request.urlopen(req) as response:
result = json.loads(response.read().decode('utf-8'))
text = result['candidates'][0]['content']['parts'][0]['text']
# Clean markdown block if present
if text.startswith('```json'):
text = text[7:]
if text.startswith('```'):
text = text[3:]
if text.endswith('```'):
text = text[:-3]
parsed = json.loads(text.strip())
return parsed
except Exception as e:
print(f"Error processing issue {issue['number']}: {e}")
return None
def process_issue(issue, api_key):
print(f"Categorizing Issue #{issue['number']}...")
result = categorize_issue(issue, api_key)
if not result or 'type' not in result:
print(f"Failed to categorize #{issue['number']}.")
return
issue_type = result['type']
label = f"type/{issue_type}"
print(f"Issue #{issue['number']} is a {issue_type}. Applying label '{label}' on GitHub...")
cmd = ["gh", "issue", "edit", str(issue['number']), "--add-label", label]
# Prepend the type to the title if it's not already there
title = issue.get('title', '')
if not title.lower().startswith(f"[{issue_type}]") and not title.lower().startswith(f"{issue_type}:"):
new_title = f"[{issue_type.capitalize()}] {title}"
cmd.extend(["--title", new_title])
try:
subprocess.run(cmd, capture_output=True, text=True, check=True)
print(f"Successfully labeled and updated title for #{issue['number']}.")
except subprocess.CalledProcessError as e:
print(f"Error labeling #{issue['number']}: {e.stderr}", file=sys.stderr)
def main():
parser = argparse.ArgumentParser(description="Auto-categorize GitHub issues (bug vs feature) and apply labels on GitHub.")
parser.add_argument("--api-key", required=True, help="Gemini API Key")
parser.add_argument("--search", required=True, help="GitHub search query for issues to categorize")
parser.add_argument("--limit", type=int, default=50, help="Maximum number of issues to process")
args = parser.parse_args()
print(f"Fetching issues matching: '{args.search}'")
issues = fetch_issues(args.search, args.limit)
if not issues:
print("No issues found matching the query.")
return
print(f"Found {len(issues)} issues to categorize.")
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [executor.submit(process_issue, issue, args.api_key) for issue in issues]
concurrent.futures.wait(futures)
print("Done categorizing issues.")
if __name__ == '__main__':
main()