feat: categorize_issues.py now accepts a full github search URL as input instead of a raw query string

2026-05-14 22:02:59 -07:00 · 2026-05-06 16:32:59 -04:00
parent 374e4713d8
commit fb236ea0e2
2 changed files with 25 additions and 9 deletions
@@ -23,12 +23,12 @@ correct types (`bug` or `feature`) directly on GitHub, and then fetch the data
 into a local JSON file for analysis.

 **A) Auto-Categorize on GitHub:** We provide a dedicated Python script that will
-automatically fetch uncategorized issues matching your search query, classify
-them using the Gemini API, and apply the correct labels and title prefixes
-(`[Bug]` or `[Feature]`) directly on GitHub.
+automatically fetch uncategorized issues matching your search URL, classify them
+using the Gemini API, and apply the correct labels and title prefixes (`[Bug]`
+or `[Feature]`) directly on GitHub.

 ```bash
-python3 categorize_issues.py --api-key "YOUR_KEY" --search "repo:google-gemini/gemini-cli -label:type/bug -label:type/feature is:open" --limit 50
+python3 categorize_issues.py "https://github.com/google-gemini/gemini-cli/issues/?q=-label:type/bug+-label:type/feature+is:open" --api-key "YOUR_KEY" --limit 50
 ```

 **B) Export to JSON:** Once the issues are correctly labeled on GitHub, fetch
@@ -1,8 +1,9 @@
 """
 Purpose: Automatically categorizes GitHub issues as 'bug' or 'feature' and applies the corresponding label on GitHub.
-It fetches issues matching a search query, uses the Gemini API to classify them, and runs 'gh issue edit' to update GitHub.
+It fetches issues matching a search URL, uses the Gemini API to classify them, and runs 'gh issue edit' to update GitHub.
 """
 import argparse
+import urllib.parse
 import urllib.request
 import json
 import subprocess
@@ -80,14 +81,29 @@ def process_issue(issue, api_key):
        print(f"Error labeling #{issue['number']}: {e.stderr}", file=sys.stderr)

 def main():
-    parser = argparse.ArgumentParser(description="Auto-categorize GitHub issues (bug vs feature) and apply labels on GitHub.")
+    parser = argparse.ArgumentParser(description="Auto-categorize GitHub issues (bug vs feature) from a GitHub URL and apply labels on GitHub.")
+    parser.add_argument("url", help="The full GitHub Issues search URL (e.g., https://github.com/.../issues/?q=...)")
    parser.add_argument("--api-key", required=True, help="Gemini API Key")
-    parser.add_argument("--search", required=True, help="GitHub search query for issues to categorize")
    parser.add_argument("--limit", type=int, default=50, help="Maximum number of issues to process")
    args = parser.parse_args()

-    print(f"Fetching issues matching: '{args.search}'")
-    issues = fetch_issues(args.search, args.limit)
+    parsed_url = urllib.parse.urlparse(args.url)
+    query_params = urllib.parse.parse_qs(parsed_url.query)
+    
+    if 'q' in query_params:
+        search_query = query_params['q'][0]
+    else:
+        print("Warning: No 'q=' search parameter found in URL. Fetching default open issues.")
+        search_query = "is:issue is:open"
+
+    if 'repo:' not in search_query:
+        path_parts = [p for p in parsed_url.path.split('/') if p]
+        if len(path_parts) >= 2:
+            repo = f"{path_parts[0]}/{path_parts[1]}"
+            search_query = f"repo:{repo} {search_query}"
+
+    print(f"Fetching issues matching: '{search_query}'")
+    issues = fetch_issues(search_query, args.limit)
    if not issues:
        print("No issues found matching the query.")
        return