scripts/backlog-analysis/analyze_pipeline.py

"""
Purpose: A unified pipeline that performs end-to-end effort analysis on a dataset of GitHub issues.
It combines agentic deep analysis, single-turn fallbacks, heuristic validation, and CSV export in a single, efficient execution, avoiding redundant file operations.
"""
import argparse
import json
import urllib.request
import os
import subprocess
import re
import concurrent.futures
import threading
import csv
from datetime import datetime
from pathlib import Path

MODEL = "gemini-3-flash-preview"
file_lock = threading.Lock()

# --- VALIDATION HEURISTICS ---
LARGE_KEYWORDS = [
    'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
    'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
    'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
    'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor', 'architecture', 'refactor'
]

MEDIUM_KEYWORDS = [
    'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
    'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
    'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
    'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
    'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
]

tools_decl = [
    {
        "functionDeclarations": [
            {
                "name": "search_codebase",
                "description": "Search the project directory for a string using grep. Returns matching lines and file paths.",
                "parameters": {
                    "type": "OBJECT",
                    "properties": {
                        "pattern": {"type": "STRING", "description": "The text pattern to search for"}
                    },
                    "required": ["pattern"]
                }
            },
            {
                "name": "read_file",
                "description": "Read a specific file to understand its context.",
                "parameters": {
                    "type": "OBJECT",
                    "properties": {
                        "filepath": {"type": "STRING", "description": "The path to the file"}
                    },
                    "required": ["filepath"]
                }
            }
        ]
    }
]

def call_gemini(messages, url):
    data = {
        "contents": messages,
        "tools": tools_decl,
        "generationConfig": {"temperature": 0.1}
    }
    req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
    with urllib.request.urlopen(req, timeout=120) as response:
        return json.loads(response.read().decode('utf-8'))

def execute_tool(call, project_path):
    name = call['name']
    args = call.get('args', {})
    
    if name == 'search_codebase':
        pattern = args.get('pattern', '').replace('"', '\\"')
        try:
            cmd = f'grep -rn "{pattern}" "{project_path}" | grep -vE "node_modules|dist|build|\\.test\\." | head -n 20'
            res = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)
            return res if res else "No matches found."
        except subprocess.CalledProcessError as e:
            return e.output if e.output else "No matches found."
    elif name == 'read_file':
        filepath = args.get('filepath', '')
        if not filepath.startswith('/'):
            filepath = os.path.join(project_path, filepath)
        
        try:
            if not os.path.exists(filepath):
                basename = os.path.basename(filepath)
                find_cmd = f'find "{project_path}" -name "{basename}" | head -n 1'
                found_path = subprocess.check_output(find_cmd, shell=True, text=True).strip()
                if found_path: filepath = found_path
                else: return f"File {filepath} not found."
            
            cmd = f'head -n 300 "{filepath}"'
            res = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)
            return res
        except Exception as e:
            return str(e)
    return "Unknown tool"

def analyze_issue_agentic(issue, url, project_path):
    system_instruction = """You are a senior software engineer analyzing bug/feature reports. 
You MUST use the provided tools to investigate the codebase and pinpoint exactly which files and logic are responsible. 
DO NOT GUESS.

Rating Effort Level:
- small (1 day): Localized fix/change (1-2 files), clear logic.
- medium (2-3 days): Harder to trace, state management, touches multiple components.
- large (>3 days): Architectural issues, core protocol changes, or very complex multi-package bugs.

REPRODUCTION RULE:
If a bug is hard to reproduce (specific OS, complex setup, intermittent/flickering), it MUST NOT be rated as small.

Output format (ONLY valid JSON, NO markdown):
{
  "analysis": "technical analysis of root cause and fix",
  "effort_level": "small|medium|large",
  "reasoning": "justification with specific files/lines/logic you found",
  "recommended_implementation": "code snippets or specific logic changes (only if small)"
}
"""
    prompt = f"{system_instruction}\n\nIssue Title: {issue.get('title')}\nIssue Body: {issue.get('body', '')[:1500]}"
    messages = [{"role": "user", "parts": [{"text": prompt}]}]
    
    for turn in range(15):  # Limit turns to 15 for efficiency in unified loop
        try:
            res = call_gemini(messages, url)
            candidate = res['candidates'][0]['content']
            parts = candidate.get('parts', [])
            
            if 'role' not in candidate: candidate['role'] = 'model'
            messages.append(candidate)
            
            function_calls = [p for p in parts if 'functionCall' in p]
            
            if function_calls:
                tool_responses = []
                for fcall in function_calls:
                    call_data = fcall['functionCall']
                    result = execute_tool(call_data, project_path)
                    tool_responses.append({
                        "functionResponse": {
                            "name": call_data['name'],
                            "response": {"result": result[:5000]}
                        }
                    })
                messages.append({"role": "user", "parts": tool_responses})
            else:
                text = parts[0].get('text', '')
                if not text: continue
                text = text.replace('```json', '').replace('```', '').strip()
                return json.loads(text)
        except Exception as e:
            break
            
    return {"analysis": "Failed to analyze autonomously", "effort_level": "medium", "reasoning": "Agent loop exceeded limit or errored."}

def extract_keywords(text):
    words = re.findall(r'\b[A-Z][a-zA-Z0-9]+\b|\b\w+\.tsx?\b|\b\w+Service\b|\b\w+Command\b', text)
    words = list(set([w for w in words if len(w) > 4]))
    return words[:8]

def search_codebase_static(keywords, project_path):
    context = ""
    for kw in keywords:
        try:
            kw_clean = kw.replace('"', '\\"')
            cmd = f'grep -rn "{kw_clean}" "{project_path}" | grep -vE "node_modules|dist|build|\\.test\\." | head -n 8'
            out = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)
            if out:
                context += f"\n--- Matches for {kw_clean} ---\n{out}\n"
        except:
            pass
    return context

def analyze_issue_single_turn(issue, url, project_path):
    title = issue.get('title', '')
    body = issue.get('body', '')[:1500]
    
    keywords = extract_keywords(title + " " + body)
    code_context = search_codebase_static(keywords, project_path)

    prompt = f"""You are a senior software engineer analyzing issues. 
Based on the description and codebase search context, pinpoint exactly which files and logic are responsible. 
DO NOT GUESS. If the context isn't enough, provide your best technical hypothesis.

Rating Effort Level:
- small (1 day): Localized fix (1-2 files), clear cause.
- medium (2-3 days): Touches multiple components or hard to trace.
- large (>3 days): Architectural issues, Windows/WSL-specific, core protocols.

Issue Title: {title}
Issue Body: {body}

Codebase Search Context:
{code_context[:8000]}

Output ONLY valid JSON (no markdown block):
{{
  "analysis": "technical analysis of root cause and fix",
  "effort_level": "small|medium|large",
  "reasoning": "justification with specific files/lines found"
}}
"""
    data = {
        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
        "generationConfig": {"temperature": 0.1}
    }
    
    try:
        req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
        with urllib.request.urlopen(req, timeout=60) as response:
            res = json.loads(response.read().decode('utf-8'))
            txt = res['candidates'][0]['content']['parts'][0]['text']
            txt = txt.replace('```json', '').replace('```', '').strip()
            return json.loads(txt)
    except Exception as e:
        return {"analysis": "Failed to analyze autonomously", "effort_level": "medium", "reasoning": str(e)}

# --- VALIDATION ---
def find_files_in_text(text):
    matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
    return set([m for m in matches if not m.startswith('http')])

def resolve_file(filename, project_path):
    if os.path.exists(os.path.join(project_path, filename)):
        return os.path.join(project_path, filename)
    basename = os.path.basename(filename)
    for root, dirs, files in os.walk(project_path):
        if '.git' in root or 'node_modules' in root: continue
        if basename in files: return os.path.join(root, basename)
    return None

def validate_effort(issue, project_path):
    title = issue.get('title', '').lower()
    body = issue.get('body', '').lower()
    analysis = issue.get('analysis', '').lower()
    reasoning = issue.get('reasoning', '').lower()
    
    combined_text = f"{title} {body} {analysis} {reasoning}"
    
    potential_files = find_files_in_text(combined_text)
    actual_files = []
    total_lines = 0
    
    for f in potential_files:
        resolved = resolve_file(f, project_path)
        if resolved and resolved not in [a[0] for a in actual_files]:
            try:
                with open(resolved, 'r', encoding='utf-8') as file_obj:
                    lines = sum(1 for line in file_obj)
                    actual_files.append((resolved, lines))
                    total_lines += lines
            except Exception: pass
                
    num_files = len(actual_files)
    
    keyword_effort = "small"
    for kw in LARGE_KEYWORDS:
        if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
            keyword_effort = "large"
            break
            
    if keyword_effort != "large":
        for kw in MEDIUM_KEYWORDS:
            if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
                keyword_effort = "medium"
                break

    effort = "small"
    validation_msg = ""
    if num_files == 0:
        effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'
        validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."
    else:
        file_details = ", ".join([f"{os.path.basename(f[0])} ({f[1]} lines)" for f in actual_files])
        if num_files > 3 or total_lines > 1500 or keyword_effort == "large":
            effort = "large"
            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
        elif num_files >= 2 or total_lines > 500 or keyword_effort == "medium":
            effort = "medium"
            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
        else:
            effort = "small"
            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Appears highly localized."

    return effort, validation_msg


def process_pipeline_task(args_tuple):
    issue, url, project_path, input_file, all_issues = args_tuple
    
    needs_analysis = not issue.get('analysis') or issue.get('analysis') == "Failed to analyze autonomously" or len(issue.get('analysis', '')) < 30
    
    if needs_analysis:
        print(f"[{issue.get('number', 'unknown')}] Starting Agentic Analysis...")
        result = analyze_issue_agentic(issue, url, project_path)
        
        if result.get('analysis') == "Failed to analyze autonomously":
            print(f"[{issue.get('number', 'unknown')}] Agentic failed. Falling back to Single-Turn Context Analysis...")
            result = analyze_issue_single_turn(issue, url, project_path)

        issue['analysis'] = result.get('analysis', 'Failed to analyze')
        issue['effort_level'] = result.get('effort_level', 'medium')
        issue['reasoning'] = result.get('reasoning', 'Could not determine')
        if 'recommended_implementation' in result:
            issue['recommended_implementation'] = result['recommended_implementation']
        
        with file_lock:
            with open(input_file, 'w') as f:
                json.dump(all_issues, f, indent=2)

    # Validation
    old_effort = issue.get('effort_level')
    new_effort, validation_reason = validate_effort(issue, project_path)
    issue['effort_level'] = new_effort
    
    existing_reasoning = issue.get('reasoning', '')
    existing_reasoning = existing_reasoning.split(' | Codebase validation:')[0]
    existing_reasoning = existing_reasoning.split(' | No specific files identified')[0]
    issue['reasoning'] = f"{existing_reasoning} | {validation_reason}".strip(' |')
    
    if needs_analysis or old_effort != new_effort:
        with file_lock:
            with open(input_file, 'w') as f:
                json.dump(all_issues, f, indent=2)
                
    print(f"[{issue.get('number', 'unknown')}] Completed -> {issue.get('effort_level')}")
    return issue

def export_csv(issues, output_csv):
    today = datetime.now().strftime("%Y-%m-%d")
    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerow([
            'Issue ID', 'Title', 'Status', 'Assignee', 'Labels', 
            'Last Sync', 'Link', 'analysis', 'effort_level', 
            'reasoning', 'recommended_implementation'
        ])
        
        for issue in issues:
            assignee_list = issue.get('assignees', [])
            if isinstance(assignee_list, dict) and 'nodes' in assignee_list: assignee_list = assignee_list['nodes']
            assignee = ", ".join([a.get('login', '') for a in assignee_list])
            
            labels_list = issue.get('labels', [])
            if isinstance(labels_list, dict) and 'nodes' in labels_list: labels_list = labels_list['nodes']
            labels = ", ".join([l.get('name', '') for l in labels_list])
            
            writer.writerow([
                issue.get('number'),
                issue.get('title', ''),
                issue.get('state', 'OPEN'),
                assignee,
                labels,
                today,
                issue.get('url', ''),
                issue.get('analysis', ''),
                issue.get('effort_level', ''),
                issue.get('reasoning', ''),
                issue.get('recommended_implementation', '')
            ])
    print(f"Exported successfully to {output_csv}")

def main():
    parser = argparse.ArgumentParser(description="Unified Effort Analysis Pipeline.")
    parser.add_argument("--api-key", required=True, help="Gemini API Key")
    parser.add_argument("--input", default="data/bugs.json", help="Input JSON file")
    parser.add_argument("--project", default="../../packages", help="Project root to analyze")
    parser.add_argument("--workers", type=int, default=4, help="Number of concurrent workers")
    args = parser.parse_args()

    url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={args.api_key}"

    with open(args.input, 'r') as f:
        issues = json.load(f)

    print(f"Starting unified analysis pipeline on {len(issues)} issues...")
    
    tasks = [(issue, url, args.project, args.input, issues) for issue in issues]
    with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
        list(executor.map(process_pipeline_task, tasks))
        
    output_csv = args.input.replace('.json', '.csv')
    export_csv(issues, output_csv)
    print("Pipeline fully complete!")

if __name__ == '__main__':
    main()
feat: consolidate multi-stage analysis pipeline into a single, efficient python script 2026-05-06 16:35:38 -04:00			`"""`
			`Purpose: A unified pipeline that performs end-to-end effort analysis on a dataset of GitHub issues.`
			`It combines agentic deep analysis, single-turn fallbacks, heuristic validation, and CSV export in a single, efficient execution, avoiding redundant file operations.`
			`"""`
			`import argparse`
			`import json`
			`import urllib.request`
			`import os`
			`import subprocess`
			`import re`
			`import concurrent.futures`
			`import threading`
			`import csv`
			`from datetime import datetime`
			`from pathlib import Path`

			`MODEL = "gemini-3-flash-preview"`
			`file_lock = threading.Lock()`

			`# --- VALIDATION HEURISTICS ---`
			`LARGE_KEYWORDS = [`
			`'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',`
			`'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',`
			`'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',`
			`'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor', 'architecture', 'refactor'`
			`]`

			`MEDIUM_KEYWORDS = [`
			`'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',`
			`'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',`
			`'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',`
			`'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',`
			`'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'`
			`]`

			`tools_decl = [`
			`{`
			`"functionDeclarations": [`
			`{`
			`"name": "search_codebase",`
			`"description": "Search the project directory for a string using grep. Returns matching lines and file paths.",`
			`"parameters": {`
			`"type": "OBJECT",`
			`"properties": {`
			`"pattern": {"type": "STRING", "description": "The text pattern to search for"}`
			`},`
			`"required": ["pattern"]`
			`}`
			`},`
			`{`
			`"name": "read_file",`
			`"description": "Read a specific file to understand its context.",`
			`"parameters": {`
			`"type": "OBJECT",`
			`"properties": {`
			`"filepath": {"type": "STRING", "description": "The path to the file"}`
			`},`
			`"required": ["filepath"]`
			`}`
			`}`
			`]`
			`}`
			`]`

			`def call_gemini(messages, url):`
			`data = {`
			`"contents": messages,`
			`"tools": tools_decl,`
			`"generationConfig": {"temperature": 0.1}`
			`}`
			`req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})`
			`with urllib.request.urlopen(req, timeout=120) as response:`
			`return json.loads(response.read().decode('utf-8'))`

			`def execute_tool(call, project_path):`
			`name = call['name']`
			`args = call.get('args', {})`

			`if name == 'search_codebase':`
			`pattern = args.get('pattern', '').replace('"', '\\"')`
			`try:`
			`cmd = f'grep -rn "{pattern}" "{project_path}" \| grep -vE "node_modules\|dist\|build\|\\.test\\." \| head -n 20'`
			`res = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)`
			`return res if res else "No matches found."`
			`except subprocess.CalledProcessError as e:`
			`return e.output if e.output else "No matches found."`
			`elif name == 'read_file':`
			`filepath = args.get('filepath', '')`
			`if not filepath.startswith('/'):`
			`filepath = os.path.join(project_path, filepath)`

			`try:`
			`if not os.path.exists(filepath):`
			`basename = os.path.basename(filepath)`
			`find_cmd = f'find "{project_path}" -name "{basename}" \| head -n 1'`
			`found_path = subprocess.check_output(find_cmd, shell=True, text=True).strip()`
			`if found_path: filepath = found_path`
			`else: return f"File {filepath} not found."`

			`cmd = f'head -n 300 "{filepath}"'`
			`res = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)`
			`return res`
			`except Exception as e:`
			`return str(e)`
			`return "Unknown tool"`

			`def analyze_issue_agentic(issue, url, project_path):`
			`system_instruction = """You are a senior software engineer analyzing bug/feature reports.`
			`You MUST use the provided tools to investigate the codebase and pinpoint exactly which files and logic are responsible.`
			`DO NOT GUESS.`

			`Rating Effort Level:`
			`- small (1 day): Localized fix/change (1-2 files), clear logic.`
			`- medium (2-3 days): Harder to trace, state management, touches multiple components.`
			`- large (>3 days): Architectural issues, core protocol changes, or very complex multi-package bugs.`

			`REPRODUCTION RULE:`
			`If a bug is hard to reproduce (specific OS, complex setup, intermittent/flickering), it MUST NOT be rated as small.`

			`Output format (ONLY valid JSON, NO markdown):`
			`{`
			`"analysis": "technical analysis of root cause and fix",`
			`"effort_level": "small\|medium\|large",`
			`"reasoning": "justification with specific files/lines/logic you found",`
			`"recommended_implementation": "code snippets or specific logic changes (only if small)"`
			`}`
			`"""`
			`prompt = f"{system_instruction}\n\nIssue Title: {issue.get('title')}\nIssue Body: {issue.get('body', '')[:1500]}"`
			`messages = [{"role": "user", "parts": [{"text": prompt}]}]`

			`for turn in range(15): # Limit turns to 15 for efficiency in unified loop`
			`try:`
			`res = call_gemini(messages, url)`
			`candidate = res['candidates'][0]['content']`
			`parts = candidate.get('parts', [])`

			`if 'role' not in candidate: candidate['role'] = 'model'`
			`messages.append(candidate)`

			`function_calls = [p for p in parts if 'functionCall' in p]`

			`if function_calls:`
			`tool_responses = []`
			`for fcall in function_calls:`
			`call_data = fcall['functionCall']`
			`result = execute_tool(call_data, project_path)`
			`tool_responses.append({`
			`"functionResponse": {`
			`"name": call_data['name'],`
			`"response": {"result": result[:5000]}`
			`}`
			`})`
			`messages.append({"role": "user", "parts": tool_responses})`
			`else:`
			`text = parts[0].get('text', '')`
			`if not text: continue`
			text = text.replace('```json', '').replace('```', '').strip()
			`return json.loads(text)`
			`except Exception as e:`
			`break`

			`return {"analysis": "Failed to analyze autonomously", "effort_level": "medium", "reasoning": "Agent loop exceeded limit or errored."}`

			`def extract_keywords(text):`
			`words = re.findall(r'\b[A-Z][a-zA-Z0-9]+\b\|\b\w+\.tsx?\b\|\b\w+Service\b\|\b\w+Command\b', text)`
			`words = list(set([w for w in words if len(w) > 4]))`
			`return words[:8]`

			`def search_codebase_static(keywords, project_path):`
			`context = ""`
			`for kw in keywords:`
			`try:`
			`kw_clean = kw.replace('"', '\\"')`
			`cmd = f'grep -rn "{kw_clean}" "{project_path}" \| grep -vE "node_modules\|dist\|build\|\\.test\\." \| head -n 8'`
			`out = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)`
			`if out:`
			`context += f"\n--- Matches for {kw_clean} ---\n{out}\n"`
			`except:`
			`pass`
			`return context`

			`def analyze_issue_single_turn(issue, url, project_path):`
			`title = issue.get('title', '')`
			`body = issue.get('body', '')[:1500]`

			`keywords = extract_keywords(title + " " + body)`
			`code_context = search_codebase_static(keywords, project_path)`

			`prompt = f"""You are a senior software engineer analyzing issues.`
			`Based on the description and codebase search context, pinpoint exactly which files and logic are responsible.`
			`DO NOT GUESS. If the context isn't enough, provide your best technical hypothesis.`

			`Rating Effort Level:`
			`- small (1 day): Localized fix (1-2 files), clear cause.`
			`- medium (2-3 days): Touches multiple components or hard to trace.`
			`- large (>3 days): Architectural issues, Windows/WSL-specific, core protocols.`

			`Issue Title: {title}`
			`Issue Body: {body}`

			`Codebase Search Context:`
			`{code_context[:8000]}`

			`Output ONLY valid JSON (no markdown block):`
			`{{`
			`"analysis": "technical analysis of root cause and fix",`
			`"effort_level": "small\|medium\|large",`
			`"reasoning": "justification with specific files/lines found"`
			`}}`
			`"""`
			`data = {`
			`"contents": [{"role": "user", "parts": [{"text": prompt}]}],`
			`"generationConfig": {"temperature": 0.1}`
			`}`

			`try:`
			`req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})`
			`with urllib.request.urlopen(req, timeout=60) as response:`
			`res = json.loads(response.read().decode('utf-8'))`
			`txt = res['candidates'][0]['content']['parts'][0]['text']`
			txt = txt.replace('```json', '').replace('```', '').strip()
			`return json.loads(txt)`
			`except Exception as e:`
			`return {"analysis": "Failed to analyze autonomously", "effort_level": "medium", "reasoning": str(e)}`

			`# --- VALIDATION ---`
			`def find_files_in_text(text):`
			`matches = re.findall(r'([\w\.\/\-]+\.(?:ts\|tsx\|js\|json\|md))', text)`
			`return set([m for m in matches if not m.startswith('http')])`

			`def resolve_file(filename, project_path):`
			`if os.path.exists(os.path.join(project_path, filename)):`
			`return os.path.join(project_path, filename)`
			`basename = os.path.basename(filename)`
			`for root, dirs, files in os.walk(project_path):`
			`if '.git' in root or 'node_modules' in root: continue`
			`if basename in files: return os.path.join(root, basename)`
			`return None`

			`def validate_effort(issue, project_path):`
			`title = issue.get('title', '').lower()`
			`body = issue.get('body', '').lower()`
			`analysis = issue.get('analysis', '').lower()`
			`reasoning = issue.get('reasoning', '').lower()`

			`combined_text = f"{title} {body} {analysis} {reasoning}"`

			`potential_files = find_files_in_text(combined_text)`
			`actual_files = []`
			`total_lines = 0`

			`for f in potential_files:`
			`resolved = resolve_file(f, project_path)`
			`if resolved and resolved not in [a[0] for a in actual_files]:`
			`try:`
			`with open(resolved, 'r', encoding='utf-8') as file_obj:`
			`lines = sum(1 for line in file_obj)`
			`actual_files.append((resolved, lines))`
			`total_lines += lines`
			`except Exception: pass`

			`num_files = len(actual_files)`

			`keyword_effort = "small"`
			`for kw in LARGE_KEYWORDS:`
			`if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):`
			`keyword_effort = "large"`
			`break`

			`if keyword_effort != "large":`
			`for kw in MEDIUM_KEYWORDS:`
			`if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):`
			`keyword_effort = "medium"`
			`break`

			`effort = "small"`
			`validation_msg = ""`
			`if num_files == 0:`
			`effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'`
			`validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."`
			`else:`
			`file_details = ", ".join([f"{os.path.basename(f[0])} ({f[1]} lines)" for f in actual_files])`
			`if num_files > 3 or total_lines > 1500 or keyword_effort == "large":`
			`effort = "large"`
			`validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."`
			`elif num_files >= 2 or total_lines > 500 or keyword_effort == "medium":`
			`effort = "medium"`
			`validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."`
			`else:`
			`effort = "small"`
			`validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Appears highly localized."`

			`return effort, validation_msg`


			`def process_pipeline_task(args_tuple):`
			`issue, url, project_path, input_file, all_issues = args_tuple`

			`needs_analysis = not issue.get('analysis') or issue.get('analysis') == "Failed to analyze autonomously" or len(issue.get('analysis', '')) < 30`

			`if needs_analysis:`
			`print(f"[{issue.get('number', 'unknown')}] Starting Agentic Analysis...")`
			`result = analyze_issue_agentic(issue, url, project_path)`

			`if result.get('analysis') == "Failed to analyze autonomously":`
			`print(f"[{issue.get('number', 'unknown')}] Agentic failed. Falling back to Single-Turn Context Analysis...")`
			`result = analyze_issue_single_turn(issue, url, project_path)`

			`issue['analysis'] = result.get('analysis', 'Failed to analyze')`
			`issue['effort_level'] = result.get('effort_level', 'medium')`
			`issue['reasoning'] = result.get('reasoning', 'Could not determine')`
			`if 'recommended_implementation' in result:`
			`issue['recommended_implementation'] = result['recommended_implementation']`

			`with file_lock:`
			`with open(input_file, 'w') as f:`
			`json.dump(all_issues, f, indent=2)`

			`# Validation`
			`old_effort = issue.get('effort_level')`
			`new_effort, validation_reason = validate_effort(issue, project_path)`
			`issue['effort_level'] = new_effort`

			`existing_reasoning = issue.get('reasoning', '')`
			`existing_reasoning = existing_reasoning.split(' \| Codebase validation:')[0]`
			`existing_reasoning = existing_reasoning.split(' \| No specific files identified')[0]`
			`issue['reasoning'] = f"{existing_reasoning} \| {validation_reason}".strip(' \|')`

			`if needs_analysis or old_effort != new_effort:`
			`with file_lock:`
			`with open(input_file, 'w') as f:`
			`json.dump(all_issues, f, indent=2)`

			`print(f"[{issue.get('number', 'unknown')}] Completed -> {issue.get('effort_level')}")`
			`return issue`

			`def export_csv(issues, output_csv):`
			`today = datetime.now().strftime("%Y-%m-%d")`
			`with open(output_csv, 'w', newline='', encoding='utf-8') as f:`
			`writer = csv.writer(f, delimiter='\t')`
			`writer.writerow([`
			`'Issue ID', 'Title', 'Status', 'Assignee', 'Labels',`
			`'Last Sync', 'Link', 'analysis', 'effort_level',`
			`'reasoning', 'recommended_implementation'`
			`])`

			`for issue in issues:`
			`assignee_list = issue.get('assignees', [])`
			`if isinstance(assignee_list, dict) and 'nodes' in assignee_list: assignee_list = assignee_list['nodes']`
			`assignee = ", ".join([a.get('login', '') for a in assignee_list])`

			`labels_list = issue.get('labels', [])`
			`if isinstance(labels_list, dict) and 'nodes' in labels_list: labels_list = labels_list['nodes']`
			`labels = ", ".join([l.get('name', '') for l in labels_list])`

			`writer.writerow([`
			`issue.get('number'),`
			`issue.get('title', ''),`
			`issue.get('state', 'OPEN'),`
			`assignee,`
			`labels,`
			`today,`
			`issue.get('url', ''),`
			`issue.get('analysis', ''),`
			`issue.get('effort_level', ''),`
			`issue.get('reasoning', ''),`
			`issue.get('recommended_implementation', '')`
			`])`
			`print(f"Exported successfully to {output_csv}")`

			`def main():`
			`parser = argparse.ArgumentParser(description="Unified Effort Analysis Pipeline.")`
			`parser.add_argument("--api-key", required=True, help="Gemini API Key")`
			`parser.add_argument("--input", default="data/bugs.json", help="Input JSON file")`
			`parser.add_argument("--project", default="../../packages", help="Project root to analyze")`
			`parser.add_argument("--workers", type=int, default=4, help="Number of concurrent workers")`
			`args = parser.parse_args()`

			`url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={args.api_key}"`

			`with open(args.input, 'r') as f:`
			`issues = json.load(f)`

			`print(f"Starting unified analysis pipeline on {len(issues)} issues...")`

			`tasks = [(issue, url, args.project, args.input, issues) for issue in issues]`
			`with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:`
			`list(executor.map(process_pipeline_task, tasks))`

			`output_csv = args.input.replace('.json', '.csv')`
			`export_csv(issues, output_csv)`
			`print("Pipeline fully complete!")`

			`if __name__ == '__main__':`
			`main()`