mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-31 06:02:47 -07:00
feat: consolidate multi-stage analysis pipeline into a single, efficient python script
This commit is contained in:
@@ -1,135 +0,0 @@
|
||||
"""
|
||||
Purpose: Runs heuristic post-analysis validation on the AI's effort estimations.
|
||||
Checks for keywords (like 'Windows', 'WSL', 'PTY') in the issue body to ensure the AI didn't underestimate platform-specific or architecturally complex bugs as 'small'.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
parser = argparse.ArgumentParser(description="Validate effort levels using heuristics.")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file containing analyzed issues")
|
||||
parser.add_argument("--project", default="../../packages", help="Project root for codebase validation")
|
||||
args = parser.parse_args()
|
||||
|
||||
ISSUES_FILE = args.input
|
||||
REPO_ROOT = args.project
|
||||
|
||||
with open(ISSUES_FILE, 'r') as f:
|
||||
issues = json.load(f)
|
||||
|
||||
# Stricter criteria keywords
|
||||
LARGE_KEYWORDS = [
|
||||
'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
|
||||
'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
|
||||
'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
|
||||
'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor', 'architecture', 'refactor'
|
||||
]
|
||||
|
||||
MEDIUM_KEYWORDS = [
|
||||
'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
|
||||
'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
|
||||
'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
|
||||
'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
|
||||
'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
|
||||
]
|
||||
|
||||
SMALL_KEYWORDS = [
|
||||
'typo', 'spelling', 'rename', 'string', 'constant', 'css', 'color', 'theme.status', 'padding', 'margin',
|
||||
'error message', 'econnreset', 'enotdir', 'etimedout', 'documentation', 'jsdoc', 'readme', 'help text',
|
||||
'flag', 'version string', 'static value'
|
||||
]
|
||||
|
||||
def find_files_in_text(text):
|
||||
matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
|
||||
return set([m for m in matches if not m.startswith('http')])
|
||||
|
||||
def resolve_file(filename):
|
||||
if os.path.exists(os.path.join(REPO_ROOT, filename)):
|
||||
return os.path.join(REPO_ROOT, filename)
|
||||
|
||||
basename = os.path.basename(filename)
|
||||
for root, dirs, files in os.walk(REPO_ROOT):
|
||||
if '.git' in root or 'node_modules' in root:
|
||||
continue
|
||||
if basename in files:
|
||||
return os.path.join(root, basename)
|
||||
return None
|
||||
|
||||
def analyze_issue(issue):
|
||||
title = issue.get('title', '').lower()
|
||||
body = issue.get('body', '').lower()
|
||||
analysis = issue.get('analysis', '').lower()
|
||||
reasoning = issue.get('reasoning', '').lower()
|
||||
|
||||
combined_text = f"{title} {body} {analysis} {reasoning}"
|
||||
|
||||
potential_files = find_files_in_text(combined_text)
|
||||
actual_files = []
|
||||
total_lines = 0
|
||||
|
||||
for f in potential_files:
|
||||
resolved = resolve_file(f)
|
||||
if resolved and resolved not in [a[0] for a in actual_files]:
|
||||
try:
|
||||
with open(resolved, 'r', encoding='utf-8') as file_obj:
|
||||
lines = sum(1 for line in file_obj)
|
||||
actual_files.append((resolved, lines))
|
||||
total_lines += lines
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
num_files = len(actual_files)
|
||||
|
||||
effort = "small"
|
||||
validation_msg = ""
|
||||
|
||||
keyword_effort = "small"
|
||||
for kw in LARGE_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
keyword_effort = "large"
|
||||
break
|
||||
|
||||
if keyword_effort != "large":
|
||||
for kw in MEDIUM_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
keyword_effort = "medium"
|
||||
break
|
||||
|
||||
if num_files == 0:
|
||||
effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'
|
||||
validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."
|
||||
else:
|
||||
file_details = ", ".join([f"{os.path.basename(f[0])} ({f[1]} lines)" for f in actual_files])
|
||||
if num_files > 3 or total_lines > 1500 or keyword_effort == "large":
|
||||
effort = "large"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
|
||||
elif num_files >= 2 or total_lines > 500 or keyword_effort == "medium":
|
||||
effort = "medium"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
|
||||
else:
|
||||
effort = "small"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Appears highly localized."
|
||||
|
||||
return effort, validation_msg
|
||||
|
||||
updated_count = 0
|
||||
for issue in issues:
|
||||
old_effort = issue.get('effort_level')
|
||||
new_effort, validation_reason = analyze_issue(issue)
|
||||
|
||||
issue['effort_level'] = new_effort
|
||||
|
||||
existing_reasoning = issue.get('reasoning', '')
|
||||
existing_reasoning = existing_reasoning.split(' | Codebase validation:')[0]
|
||||
existing_reasoning = existing_reasoning.split(' | No specific files identified')[0]
|
||||
|
||||
issue['reasoning'] = f"{existing_reasoning} | {validation_reason}".strip(' |')
|
||||
|
||||
if old_effort != new_effort:
|
||||
updated_count += 1
|
||||
|
||||
with open(ISSUES_FILE, 'w') as f:
|
||||
json.dump(issues, f, indent=2)
|
||||
|
||||
print(f"Successfully re-evaluated and updated {updated_count} issues. Codebase validated.")
|
||||
Reference in New Issue
Block a user