feat: consolidate backlog analysis tools into scripts/backlog-analysis

2026-06-01 22:52:43 -07:00 · 2026-05-06 15:50:06 -04:00
parent a9629df449
commit c76ea08eb5
33 changed files with 16145 additions and 0 deletions
@@ -0,0 +1,135 @@
+import json
+import urllib.request
+import urllib.error
+import os
+import subprocess
+import sys
+
+API_KEY = "REDACTED_API_KEY"
+MODEL = "gemini-3-flash-preview"
+URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
+
+BUGS_FILE = 'data/bugs.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+tools = [
+    {
+        "functionDeclarations": [
+            {
+                "name": "search_codebase",
+                "description": "Search the gemini-cli packages directory for a string using grep.",
+                "parameters": {
+                    "type": "OBJECT",
+                    "properties": {"pattern": {"type": "STRING"}},
+                    "required": ["pattern"]
+                }
+            },
+            {
+                "name": "read_file",
+                "description": "Read a specific file.",
+                "parameters": {
+                    "type": "OBJECT",
+                    "properties": {"filepath": {"type": "STRING"}},
+                    "required": ["filepath"]
+                }
+            }
+        ]
+    }
+]
+
+def call_gemini(messages):
+    data = {"contents": messages, "tools": tools, "generationConfig": {"temperature": 0.1}}
+    req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
+    with urllib.request.urlopen(req) as response:
+        return json.loads(response.read().decode('utf-8'))
+
+def execute_tool(call):
+    name = call['name']
+    args = call.get('args', {})
+    if name == 'search_codebase':
+        p = args.get('pattern', '').replace('"', '\\"')
+        cmd = f'grep -rn "{p}" ../../packages | grep -vE "node_modules|dist|build|\\.test\\." | head -n 30'
+        try:
+            return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT) or "No results."
+        except: return "No results."
+    elif name == 'read_file':
+        f = args.get('filepath', '')
+        if not f.startswith('/'): f = os.path.join('../../packages', f)
+        if not os.path.exists(f):
+            basename = os.path.basename(f)
+            find_cmd = f'find ../../packages -name "{basename}" | head -n 1'
+            try:
+                f = subprocess.check_output(find_cmd, shell=True, text=True).strip()
+            except: return "File not found."
+        if not f or not os.path.exists(f): return "File not found."
+        try:
+            return subprocess.check_output(f'head -n 300 "{f}"', shell=True, text=True)
+        except: return "Error reading file."
+    return "Unknown tool"
+
+def analyze_issue(issue):
+    system_instruction = """You are a senior software engineer analyzing bug reports for the gemini-cli codebase. 
+You MUST use the provided tools to investigate the codebase and pinpoint exactly which files and logic are responsible for the bug. 
+DO NOT GUESS.
+
+Rating Effort Level:
+- small (1 day): Bug is easy to reproduce, localized fix (1-2 files).
+- medium (2-3 days): Harder to reproduce, touches multiple components, or requires tracing.
+- large (>3 days): Architectural issues, core protocol changes, or complex multi-package bugs.
+
+REPRODUCTION RULE:
+If a bug is hard to reproduce (specific OS, complex setup, intermittent/flickering), it MUST NOT be rated as small.
+
+Output format (ONLY valid JSON, NO markdown):
+{
+  "analysis": "technical analysis of root cause and fix",
+  "effort_level": "small|medium|large",
+  "reasoning": "justification with specific files/lines found using tools"
+}
+"""
+    prompt = f"{system_instruction}\n\nBug Title: {issue.get('title')}\nBug Body: {issue.get('body', '')[:1200]}"
+    messages = [{"role": "user", "parts": [{"text": prompt}]}]
+    
+    for _ in range(25):
+        try:
+            res = call_gemini(messages)
+            candidate = res['candidates'][0]['content']
+            if 'role' not in candidate: candidate['role'] = 'model'
+            messages.append(candidate)
+            fcalls = [p['functionCall'] for p in candidate.get('parts', []) if 'functionCall' in p]
+            if fcalls:
+                responses = []
+                for fc in fcalls:
+                    out = execute_tool(fc)
+                    responses.append({"functionResponse": {"name": fc['name'], "response": {"result": out}}})
+                messages.append({"role": "user", "parts": responses})
+            else:
+                txt = candidate['parts'][0].get('text', '').replace('```json', '').replace('```', '').strip()
+                return json.loads(txt)
+        except Exception: break
+    return None
+
+print(f"Starting sequential re-analysis for {len(bugs)} bugs...")
+for i, bug in enumerate(bugs):
+    # Only re-analyze if it's missing a real analysis
+    analysis = bug.get('analysis', '')
+    if analysis and analysis != "Failed to analyze autonomously" and len(analysis) > 50:
+        continue
+        
+    print(f"[{i+1}/{len(bugs)}] Analyzing #{bug['number']}...")
+    result = analyze_issue(bug)
+    if result:
+        bug['analysis'] = result.get('analysis', 'Failed to analyze')
+        bug['effort_level'] = result.get('effort_level', 'medium')
+        bug['reasoning'] = result.get('reasoning', 'Could not determine')
+        print(f"  > Success: {bug['effort_level']}")
+    else:
+        print(f"  > FAILED")
+        
+    # Save after each bug to ensure no loss
+    with open(BUGS_FILE, 'w') as f:
+        json.dump(bugs, f, indent=2)
+
+print("Done.")