gemini-cli/scripts/backlog-analysis/categorize_issues.py

"""
Purpose: Automatically categorizes GitHub issues as 'bug' or 'feature' and applies the corresponding label on GitHub.
It fetches issues matching a search query, uses the Gemini API to classify them, and runs 'gh issue edit' to update GitHub.
"""
import argparse
import urllib.request
import json
import subprocess
import sys
import concurrent.futures

MODEL = "gemini-3-flash-preview"

def fetch_issues(search_query, limit):
    cmd = ["gh", "issue", "list", "--search", search_query, "--limit", str(limit), "--json", "number,title,body,url"]
    try:
        res = subprocess.run(cmd, capture_output=True, text=True, check=True)
        return json.loads(res.stdout)
    except subprocess.CalledProcessError as e:
        print(f"Error fetching issues: {e.stderr}", file=sys.stderr)
        sys.exit(1)

def categorize_issue(issue, api_key):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={api_key}"
    prompt = f"""
Analyze the following GitHub issue and determine if it is a bug or a feature request.
Reply ONLY with a valid JSON object matching exactly this schema, without Markdown formatting:
{{"type": "bug" | "feature", "reasoning": "brief justification"}}

Issue Title: {issue.get('title')}
Issue Body: {issue.get('body', '')[:1500]}
"""
    data = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"temperature": 0.1}
    }
    req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
    try:
        with urllib.request.urlopen(req) as response:
            result = json.loads(response.read().decode('utf-8'))
            text = result['candidates'][0]['content']['parts'][0]['text']

            # Clean markdown block if present
            if text.startswith('```json'):
                text = text[7:]
            if text.startswith('```'):
                text = text[3:]
            if text.endswith('```'):
                text = text[:-3]

            parsed = json.loads(text.strip())
            return parsed
    except Exception as e:
        print(f"Error processing issue {issue['number']}: {e}")
        return None

def process_issue(issue, api_key):
    print(f"Categorizing Issue #{issue['number']}...")
    result = categorize_issue(issue, api_key)
    if not result or 'type' not in result:
        print(f"Failed to categorize #{issue['number']}.")
        return

    issue_type = result['type']
    label = f"type/{issue_type}"
    print(f"Issue #{issue['number']} is a {issue_type}. Applying label '{label}' on GitHub...")

    cmd = ["gh", "issue", "edit", str(issue['number']), "--add-label", label]

    # Prepend the type to the title if it's not already there
    title = issue.get('title', '')
    if not title.lower().startswith(f"[{issue_type}]") and not title.lower().startswith(f"{issue_type}:"):
        new_title = f"[{issue_type.capitalize()}] {title}"
        cmd.extend(["--title", new_title])

    try:
        subprocess.run(cmd, capture_output=True, text=True, check=True)
        print(f"Successfully labeled and updated title for #{issue['number']}.")
    except subprocess.CalledProcessError as e:
        print(f"Error labeling #{issue['number']}: {e.stderr}", file=sys.stderr)

def main():
    parser = argparse.ArgumentParser(description="Auto-categorize GitHub issues (bug vs feature) and apply labels on GitHub.")
    parser.add_argument("--api-key", required=True, help="Gemini API Key")
    parser.add_argument("--search", required=True, help="GitHub search query for issues to categorize")
    parser.add_argument("--limit", type=int, default=50, help="Maximum number of issues to process")
    args = parser.parse_args()

    print(f"Fetching issues matching: '{args.search}'")
    issues = fetch_issues(args.search, args.limit)
    if not issues:
        print("No issues found matching the query.")
        return

    print(f"Found {len(issues)} issues to categorize.")

    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        futures = [executor.submit(process_issue, issue, args.api_key) for issue in issues]
        concurrent.futures.wait(futures)

    print("Done categorizing issues.")

if __name__ == '__main__':
    main()