scripts/backlog-analysis/analyze_bugs.py

import json
import urllib.request
import urllib.error
import os
import concurrent.futures
from pathlib import Path

API_KEY = "REDACTED_API_KEY"
MODEL = "gemini-3-flash-preview"
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"

BUGS_FILE = 'data/bugs.json'

with open(BUGS_FILE, 'r') as f:
    bugs = json.load(f)

# Collect basic directory structure to provide as context
def get_tree(path, max_depth=3):
    tree = []
    base_path = Path(path)
    if not base_path.exists(): return ""
    for root, dirs, files in os.walk(base_path):
        dirs[:] = [d for d in dirs if d not in ('.git', 'node_modules', 'dist', 'build', 'coverage')]
        depth = Path(root).relative_to(base_path).parts
        if len(depth) >= max_depth:
            dirs.clear()
            continue
        indent = '  ' * len(depth)
        tree.append(f"{indent}{Path(root).name}/")
        for f in files:
            if f.endswith(('.ts', '.tsx', '.js', '.json', '.toml', '.md')):
                tree.append(f"{indent}  {f}")
    return "\n".join(tree)

tree_context = get_tree('../../packages')

def analyze_bug(bug):
    prompt = f"""
You are analyzing bugs for the google-gemini/gemini-cli codebase.
Here is the directory structure of the 'packages' directory:
{tree_context[:4000]}

Analyze the following GitHub bug report to determine the implementation effort.
Rate the effort level with reasoning (small as in 1 day, medium as in 2-3 day, else large).
Look at the directory structure above to pinpoint which packages and files need modification.

Issue Title: {bug.get('title')}
Issue Body: {bug.get('body', '')[:1000]}

Reply with ONLY a valid JSON object matching exactly this schema, without Markdown formatting:
{{"analysis": "short technical analysis of the root cause and required fix", "effort_level": "small|medium|large", "reasoning": "brief justification mapping the effort to the files/components involved", "recommended_implementation": "concise code change instructions (only if small effort)"}}
"""
    data = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {
            "temperature": 0.1,
        }
    }
    
    req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
    try:
        with urllib.request.urlopen(req) as response:
            result = json.loads(response.read().decode('utf-8'))
            text = result['candidates'][0]['content']['parts'][0]['text']
            
            # Clean markdown block if present
            if text.startswith('```json'):
                text = text[7:]
            if text.startswith('```'):
                text = text[3:]
            if text.endswith('```'):
                text = text[:-3]
                
            parsed = json.loads(text.strip())
            return parsed
    except Exception as e:
        print(f"Error processing bug {bug['number']}: {e}")
        return {"analysis": "Failed to analyze", "effort_level": "medium", "reasoning": "Error calling Gemini API"}

def process_bug(bug):
    print(f"Analyzing Bug #{bug['number']}...")
    result = analyze_bug(bug)
    bug['analysis'] = result.get('analysis', '')
    bug['effort_level'] = result.get('effort_level', 'medium')
    bug['reasoning'] = result.get('reasoning', '')
    if 'recommended_implementation' in result:
        bug['recommended_implementation'] = result['recommended_implementation']
    return bug

def main():
    print(f"Starting analysis of {len(bugs)} bugs...")
    
    # Process in batches to save incrementally
    batch_size = 10
    for i in range(0, len(bugs), batch_size):
        batch = bugs[i:i+batch_size]
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            executor.map(process_bug, batch)
        
        with open(BUGS_FILE, 'w') as f:
            json.dump(bugs, f, indent=2)
        print(f"Saved batch {i//batch_size + 1}")
        
    print("Done analyzing all bugs!")

if __name__ == '__main__':
    main()
feat: consolidate backlog analysis tools into scripts/backlog-analysis 2026-05-06 15:50:06 -04:00			`import json`
			`import urllib.request`
			`import urllib.error`
			`import os`
			`import concurrent.futures`
			`from pathlib import Path`

			`API_KEY = "REDACTED_API_KEY"`
			`MODEL = "gemini-3-flash-preview"`
			`URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"`

			`BUGS_FILE = 'data/bugs.json'`

			`with open(BUGS_FILE, 'r') as f:`
			`bugs = json.load(f)`

			`# Collect basic directory structure to provide as context`
			`def get_tree(path, max_depth=3):`
			`tree = []`
			`base_path = Path(path)`
			`if not base_path.exists(): return ""`
			`for root, dirs, files in os.walk(base_path):`
			`dirs[:] = [d for d in dirs if d not in ('.git', 'node_modules', 'dist', 'build', 'coverage')]`
			`depth = Path(root).relative_to(base_path).parts`
			`if len(depth) >= max_depth:`
			`dirs.clear()`
			`continue`
			`indent = ' ' * len(depth)`
			`tree.append(f"{indent}{Path(root).name}/")`
			`for f in files:`
			`if f.endswith(('.ts', '.tsx', '.js', '.json', '.toml', '.md')):`
			`tree.append(f"{indent} {f}")`
			`return "\n".join(tree)`

			`tree_context = get_tree('../../packages')`

			`def analyze_bug(bug):`
			`prompt = f"""`
			`You are analyzing bugs for the google-gemini/gemini-cli codebase.`
			`Here is the directory structure of the 'packages' directory:`
			`{tree_context[:4000]}`

			`Analyze the following GitHub bug report to determine the implementation effort.`
			`Rate the effort level with reasoning (small as in 1 day, medium as in 2-3 day, else large).`
			`Look at the directory structure above to pinpoint which packages and files need modification.`

			`Issue Title: {bug.get('title')}`
			`Issue Body: {bug.get('body', '')[:1000]}`

			`Reply with ONLY a valid JSON object matching exactly this schema, without Markdown formatting:`
			`{{"analysis": "short technical analysis of the root cause and required fix", "effort_level": "small\|medium\|large", "reasoning": "brief justification mapping the effort to the files/components involved", "recommended_implementation": "concise code change instructions (only if small effort)"}}`
			`"""`
			`data = {`
			`"contents": [{"parts": [{"text": prompt}]}],`
			`"generationConfig": {`
			`"temperature": 0.1,`
			`}`
			`}`

			`req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})`
			`try:`
			`with urllib.request.urlopen(req) as response:`
			`result = json.loads(response.read().decode('utf-8'))`
			`text = result['candidates'][0]['content']['parts'][0]['text']`

			`# Clean markdown block if present`
			if text.startswith('```json'):
			`text = text[7:]`
			if text.startswith('```'):
			`text = text[3:]`
			if text.endswith('```'):
			`text = text[:-3]`

			`parsed = json.loads(text.strip())`
			`return parsed`
			`except Exception as e:`
			`print(f"Error processing bug {bug['number']}: {e}")`
			`return {"analysis": "Failed to analyze", "effort_level": "medium", "reasoning": "Error calling Gemini API"}`

			`def process_bug(bug):`
			`print(f"Analyzing Bug #{bug['number']}...")`
			`result = analyze_bug(bug)`
			`bug['analysis'] = result.get('analysis', '')`
			`bug['effort_level'] = result.get('effort_level', 'medium')`
			`bug['reasoning'] = result.get('reasoning', '')`
			`if 'recommended_implementation' in result:`
			`bug['recommended_implementation'] = result['recommended_implementation']`
			`return bug`

			`def main():`
			`print(f"Starting analysis of {len(bugs)} bugs...")`

			`# Process in batches to save incrementally`
			`batch_size = 10`
			`for i in range(0, len(bugs), batch_size):`
			`batch = bugs[i:i+batch_size]`
			`with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:`
			`executor.map(process_bug, batch)`

			`with open(BUGS_FILE, 'w') as f:`
			`json.dump(bugs, f, indent=2)`
			`print(f"Saved batch {i//batch_size + 1}")`

			`print("Done analyzing all bugs!")`

			`if __name__ == '__main__':`
			`main()`