mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
refactor: remove hardcoded api keys and paths to make core analyzers generic
This commit is contained in:
@@ -21,7 +21,7 @@ and determining implementation effort levels for the Gemini CLI project.
|
||||
Use this for a quick, first-pass estimation.
|
||||
|
||||
```bash
|
||||
python3 analyze_bugs.py
|
||||
python3 analyze_bugs.py --api-key "YOUR_KEY"
|
||||
```
|
||||
|
||||
### 2. Deep Agentic Analysis
|
||||
@@ -29,7 +29,7 @@ python3 analyze_bugs.py
|
||||
Uses Gemini as an agent with access to the codebase.
|
||||
|
||||
```bash
|
||||
python3 bug_analyzer_final.py
|
||||
python3 bug_analyzer_final.py --api-key "YOUR_KEY"
|
||||
```
|
||||
|
||||
### 3. Iterative Analysis
|
||||
@@ -37,7 +37,7 @@ python3 bug_analyzer_final.py
|
||||
Runs the single-turn analyzer in a loop until all issues have a valid analysis.
|
||||
|
||||
```bash
|
||||
./loop_analyzer.sh
|
||||
GEMINI_API_KEY="YOUR_KEY" ./loop_analyzer.sh
|
||||
```
|
||||
|
||||
### 4. Validation & Export
|
||||
|
||||
@@ -2,17 +2,11 @@ import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import os
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
from pathlib import Path
|
||||
|
||||
API_KEY = "REDACTED_API_KEY"
|
||||
MODEL = "gemini-3-flash-preview"
|
||||
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
|
||||
|
||||
BUGS_FILE = 'data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
# Collect basic directory structure to provide as context
|
||||
def get_tree(path, max_depth=3):
|
||||
@@ -28,16 +22,14 @@ def get_tree(path, max_depth=3):
|
||||
indent = ' ' * len(depth)
|
||||
tree.append(f"{indent}{Path(root).name}/")
|
||||
for f in files:
|
||||
if f.endswith(('.ts', '.tsx', '.js', '.json', '.toml', '.md')):
|
||||
if f.endswith(('.ts', '.tsx', '.js', '.json', '.toml', '.md', '.py', '.sh')):
|
||||
tree.append(f"{indent} {f}")
|
||||
return "\n".join(tree)
|
||||
|
||||
tree_context = get_tree('../../packages')
|
||||
|
||||
def analyze_bug(bug):
|
||||
def analyze_bug(bug, url, tree_context):
|
||||
prompt = f"""
|
||||
You are analyzing bugs for the google-gemini/gemini-cli codebase.
|
||||
Here is the directory structure of the 'packages' directory:
|
||||
You are analyzing bugs for the current codebase.
|
||||
Here is the directory structure of the project:
|
||||
{tree_context[:4000]}
|
||||
|
||||
Analyze the following GitHub bug report to determine the implementation effort.
|
||||
@@ -57,7 +49,7 @@ Reply with ONLY a valid JSON object matching exactly this schema, without Markdo
|
||||
}
|
||||
}
|
||||
|
||||
req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
try:
|
||||
with urllib.request.urlopen(req) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
@@ -74,12 +66,13 @@ Reply with ONLY a valid JSON object matching exactly this schema, without Markdo
|
||||
parsed = json.loads(text.strip())
|
||||
return parsed
|
||||
except Exception as e:
|
||||
print(f"Error processing bug {bug['number']}: {e}")
|
||||
print(f"Error processing bug {bug.get('number', 'unknown')}: {e}")
|
||||
return {"analysis": "Failed to analyze", "effort_level": "medium", "reasoning": "Error calling Gemini API"}
|
||||
|
||||
def process_bug(bug):
|
||||
print(f"Analyzing Bug #{bug['number']}...")
|
||||
result = analyze_bug(bug)
|
||||
def process_bug_task(args):
|
||||
bug, url, tree_context = args
|
||||
print(f"Analyzing Bug #{bug.get('number', 'unknown')}...")
|
||||
result = analyze_bug(bug, url, tree_context)
|
||||
bug['analysis'] = result.get('analysis', '')
|
||||
bug['effort_level'] = result.get('effort_level', 'medium')
|
||||
bug['reasoning'] = result.get('reasoning', '')
|
||||
@@ -88,16 +81,31 @@ def process_bug(bug):
|
||||
return bug
|
||||
|
||||
def main():
|
||||
print(f"Starting analysis of {len(bugs)} bugs...")
|
||||
parser = argparse.ArgumentParser(description="Static initial triage analyzer for bugs.")
|
||||
parser.add_argument("--api-key", required=True, help="Gemini API Key")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file containing bugs")
|
||||
parser.add_argument("--project", default="../../packages", help="Project root to analyze")
|
||||
args = parser.parse_args()
|
||||
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={args.api_key}"
|
||||
|
||||
with open(args.input, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
tree_context = get_tree(args.project)
|
||||
|
||||
print(f"Starting static analysis of {len(bugs)} bugs...")
|
||||
|
||||
# Process in batches to save incrementally
|
||||
batch_size = 10
|
||||
for i in range(0, len(bugs), batch_size):
|
||||
batch = bugs[i:i+batch_size]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
executor.map(process_bug, batch)
|
||||
tasks = [(bug, url, tree_context) for bug in batch]
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
list(executor.map(process_bug_task, tasks))
|
||||
|
||||
with open(args.input, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
print(f"Saved batch {i//batch_size + 1}")
|
||||
|
||||
|
||||
@@ -2,27 +2,21 @@ import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import os
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
|
||||
API_KEY = "REDACTED_API_KEY"
|
||||
MODEL = "gemini-3-flash-preview"
|
||||
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
|
||||
|
||||
BUGS_FILE = 'data/bugs.json'
|
||||
file_lock = threading.Lock()
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
tools = [
|
||||
tools_decl = [
|
||||
{
|
||||
"functionDeclarations": [
|
||||
{
|
||||
"name": "search_codebase",
|
||||
"description": "Search the gemini-cli packages directory for a string using grep. Returns matching lines and file paths.",
|
||||
"description": "Search the project directory for a string using grep. Returns matching lines and file paths.",
|
||||
"parameters": {
|
||||
"type": "OBJECT",
|
||||
"properties": {
|
||||
@@ -46,17 +40,17 @@ tools = [
|
||||
}
|
||||
]
|
||||
|
||||
def call_gemini(messages):
|
||||
def call_gemini(messages, url):
|
||||
data = {
|
||||
"contents": messages,
|
||||
"tools": tools,
|
||||
"tools": tools_decl,
|
||||
"generationConfig": {"temperature": 0.1}
|
||||
}
|
||||
req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read().decode('utf-8'))
|
||||
|
||||
def execute_tool(call):
|
||||
def execute_tool(call, project_path):
|
||||
name = call['name']
|
||||
args = call.get('args', {})
|
||||
|
||||
@@ -64,7 +58,7 @@ def execute_tool(call):
|
||||
pattern = args.get('pattern', '')
|
||||
pattern = pattern.replace('"', '\\"')
|
||||
try:
|
||||
cmd = f'grep -rn "{pattern}" ../../packages | grep -vE "node_modules|dist|build|\\.test\\." | head -n 20'
|
||||
cmd = f'grep -rn "{pattern}" "{project_path}" | grep -vE "node_modules|dist|build|\\.test\\." | head -n 20'
|
||||
res = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)
|
||||
return res if res else "No matches found."
|
||||
except subprocess.CalledProcessError as e:
|
||||
@@ -72,12 +66,12 @@ def execute_tool(call):
|
||||
elif name == 'read_file':
|
||||
filepath = args.get('filepath', '')
|
||||
if not filepath.startswith('/'):
|
||||
filepath = os.path.join('../../packages', filepath)
|
||||
filepath = os.path.join(project_path, filepath)
|
||||
|
||||
try:
|
||||
if not os.path.exists(filepath):
|
||||
basename = os.path.basename(filepath)
|
||||
find_cmd = f'find ../../packages -name "{basename}" | head -n 1'
|
||||
find_cmd = f'find "{project_path}" -name "{basename}" | head -n 1'
|
||||
found_path = subprocess.check_output(find_cmd, shell=True, text=True).strip()
|
||||
if found_path: filepath = found_path
|
||||
else: return f"File {filepath} not found."
|
||||
@@ -89,8 +83,8 @@ def execute_tool(call):
|
||||
return str(e)
|
||||
return "Unknown tool"
|
||||
|
||||
def analyze_issue(issue):
|
||||
system_instruction = """You are a senior software engineer analyzing bug reports for the gemini-cli codebase.
|
||||
def analyze_issue(issue, url, project_path):
|
||||
system_instruction = """You are a senior software engineer analyzing bug reports.
|
||||
You MUST use the provided tools to investigate the codebase and pinpoint exactly which files and logic are responsible for the bug.
|
||||
DO NOT GUESS.
|
||||
|
||||
@@ -114,9 +108,9 @@ Output format (ONLY valid JSON, NO markdown):
|
||||
prompt = f"{system_instruction}\n\nBug Title: {issue.get('title')}\nBug Body: {issue.get('body', '')[:1200]}"
|
||||
messages = [{"role": "user", "parts": [{"text": prompt}]}]
|
||||
|
||||
for turn in range(30): # Significantly higher turn limit
|
||||
for turn in range(30):
|
||||
try:
|
||||
res = call_gemini(messages)
|
||||
res = call_gemini(messages, url)
|
||||
candidate = res['candidates'][0]['content']
|
||||
parts = candidate.get('parts', [])
|
||||
|
||||
@@ -129,7 +123,7 @@ Output format (ONLY valid JSON, NO markdown):
|
||||
tool_responses = []
|
||||
for fcall in function_calls:
|
||||
call_data = fcall['functionCall']
|
||||
result = execute_tool(call_data)
|
||||
result = execute_tool(call_data, project_path)
|
||||
tool_responses.append({
|
||||
"functionResponse": {
|
||||
"name": call_data['name'],
|
||||
@@ -146,14 +140,14 @@ Output format (ONLY valid JSON, NO markdown):
|
||||
|
||||
return {"analysis": "Failed to analyze autonomously", "effort_level": "medium", "reasoning": "Agent loop exceeded 30 turns or errored."}
|
||||
|
||||
def process_issue(issue):
|
||||
# Re-analyze if empty, failed, or just a placeholder
|
||||
def process_issue_task(args_tuple):
|
||||
issue, url, project_path, input_file, bugs = args_tuple
|
||||
current_analysis = issue.get('analysis', '')
|
||||
if current_analysis and current_analysis != "Failed to analyze autonomously" and len(current_analysis) > 50:
|
||||
return issue
|
||||
|
||||
print(f"Analyzing Bug #{issue['number']}...", flush=True)
|
||||
result = analyze_issue(issue)
|
||||
print(f"Analyzing Bug #{issue.get('number', 'unknown')}...", flush=True)
|
||||
result = analyze_issue(issue, url, project_path)
|
||||
|
||||
issue['analysis'] = result.get('analysis', 'Failed to analyze')
|
||||
issue['effort_level'] = result.get('effort_level', 'medium')
|
||||
@@ -163,18 +157,32 @@ def process_issue(issue):
|
||||
else:
|
||||
issue.pop('recommended_implementation', None)
|
||||
|
||||
print(f"Completed Bug #{issue['number']} -> {issue['effort_level']}", flush=True)
|
||||
print(f"Completed Bug #{issue.get('number', 'unknown')} -> {issue.get('effort_level', 'unknown')}", flush=True)
|
||||
|
||||
with file_lock:
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
with open(input_file, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
return issue
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Deep agentic bug analyzer.")
|
||||
parser.add_argument("--api-key", required=True, help="Gemini API Key")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file containing bugs")
|
||||
parser.add_argument("--project", default="../../packages", help="Project root to analyze")
|
||||
args = parser.parse_args()
|
||||
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={args.api_key}"
|
||||
|
||||
with open(args.input, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
print(f"Starting FINAL RE-ANALYSIS for {len(bugs)} bugs (Turn Limit: 30)...", flush=True)
|
||||
|
||||
tasks = [(b, url, args.project, args.input, bugs) for b in bugs]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
list(executor.map(process_issue, bugs))
|
||||
print("Agentic analysis complete. `bugs.json` is updated.", flush=True)
|
||||
list(executor.map(process_issue_task, tasks))
|
||||
|
||||
print("Agentic analysis complete. JSON is updated.", flush=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -2,6 +2,12 @@
|
||||
# Run from the project root or the scripts/backlog-analysis directory
|
||||
# This script assumes it's running in the same directory as the python scripts
|
||||
|
||||
if [ -z "$GEMINI_API_KEY" ]; then
|
||||
echo "Error: GEMINI_API_KEY environment variable is required."
|
||||
echo "Usage: GEMINI_API_KEY=your_key ./loop_analyzer.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
count=$(jq '[.[] | select(.analysis == "Failed to analyze autonomously" or .analysis == null or .analysis == "" or (.analysis | length) < 30)] | length' data/bugs.json)
|
||||
if [ "$count" -eq 0 ]; then
|
||||
@@ -9,6 +15,6 @@ while true; do
|
||||
break
|
||||
fi
|
||||
echo "Remaining bugs: $count"
|
||||
python3 single_turn_bug_analyzer.py
|
||||
python3 single_turn_bug_analyzer.py --api-key "$GEMINI_API_KEY"
|
||||
done
|
||||
python3 generate_bugs_csv.py
|
||||
|
||||
@@ -3,27 +3,24 @@ import urllib.request
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import threading
|
||||
|
||||
API_KEY = "REDACTED_API_KEY"
|
||||
MODEL = "gemini-3-flash-preview"
|
||||
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
|
||||
BUGS_FILE = 'data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
file_lock = threading.Lock()
|
||||
|
||||
def extract_keywords(text):
|
||||
words = re.findall(r'\b[A-Z][a-zA-Z0-9]+\b|\b\w+\.tsx?\b|\b\w+Service\b|\b\w+Command\b', text)
|
||||
words = list(set([w for w in words if len(w) > 4]))
|
||||
return words[:8]
|
||||
|
||||
def search_codebase(keywords):
|
||||
def search_codebase(keywords, project_path):
|
||||
context = ""
|
||||
for kw in keywords:
|
||||
try:
|
||||
kw_clean = kw.replace('"', '\\"')
|
||||
cmd = f'grep -rn "{kw_clean}" ../../packages | grep -vE "node_modules|dist|build|\\.test\\." | head -n 8'
|
||||
cmd = f'grep -rn "{kw_clean}" "{project_path}" | grep -vE "node_modules|dist|build|\\.test\\." | head -n 8'
|
||||
out = subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT)
|
||||
if out:
|
||||
context += f"\n--- Matches for {kw_clean} ---\n{out}\n"
|
||||
@@ -31,7 +28,9 @@ def search_codebase(keywords):
|
||||
pass
|
||||
return context
|
||||
|
||||
def process_issue(issue):
|
||||
def process_issue_task(args_tuple):
|
||||
issue, url, project_path, input_file, bugs = args_tuple
|
||||
|
||||
if issue.get('analysis') and issue['analysis'] != "Failed to analyze autonomously" and len(issue['analysis']) > 30:
|
||||
return issue
|
||||
|
||||
@@ -39,9 +38,9 @@ def process_issue(issue):
|
||||
body = issue.get('body', '')[:1500]
|
||||
|
||||
keywords = extract_keywords(title + " " + body)
|
||||
code_context = search_codebase(keywords)
|
||||
code_context = search_codebase(keywords, project_path)
|
||||
|
||||
prompt = f"""You are a senior software engineer analyzing bug reports for the gemini-cli codebase.
|
||||
prompt = f"""You are a senior software engineer analyzing bug reports.
|
||||
Based on the bug description and the provided codebase search context, pinpoint exactly which files and logic are responsible for the bug.
|
||||
DO NOT GUESS. If the context isn't enough, provide your best technical hypothesis.
|
||||
|
||||
@@ -69,7 +68,7 @@ Output ONLY valid JSON (no markdown block):
|
||||
}
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
res = json.loads(response.read().decode('utf-8'))
|
||||
txt = res['candidates'][0]['content']['parts'][0]['text']
|
||||
@@ -79,27 +78,38 @@ Output ONLY valid JSON (no markdown block):
|
||||
issue['analysis'] = parsed.get('analysis', 'Failed to analyze')
|
||||
issue['effort_level'] = parsed.get('effort_level', 'medium')
|
||||
issue['reasoning'] = parsed.get('reasoning', 'Could not determine')
|
||||
print(f"Completed {issue['number']} -> {issue['effort_level']}", flush=True)
|
||||
print(f"Completed {issue.get('number', 'unknown')} -> {issue['effort_level']}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"Failed {issue['number']}: {e}", flush=True)
|
||||
print(f"Failed {issue.get('number', 'unknown')}: {e}", flush=True)
|
||||
|
||||
with file_lock:
|
||||
with open(input_file, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
|
||||
return issue
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Single turn code search bug analyzer.")
|
||||
parser.add_argument("--api-key", required=True, help="Gemini API Key")
|
||||
parser.add_argument("--input", default="data/bugs.json", help="Input JSON file containing bugs")
|
||||
parser.add_argument("--project", default="../../packages", help="Project root to analyze")
|
||||
args = parser.parse_args()
|
||||
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={args.api_key}"
|
||||
|
||||
with open(args.input, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
to_analyze = [b for b in bugs if b.get('analysis') == "Failed to analyze autonomously" or not b.get('analysis') or len(b.get('analysis', '')) < 30]
|
||||
|
||||
# Only process 5 at a time
|
||||
to_analyze = to_analyze[:5]
|
||||
|
||||
print(f"Starting single-turn analysis for {len(to_analyze)} bugs...", flush=True)
|
||||
|
||||
tasks = [(b, url, args.project, args.input, bugs) for b in to_analyze]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
list(executor.map(process_issue, to_analyze))
|
||||
|
||||
# Final save
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
list(executor.map(process_issue_task, tasks))
|
||||
|
||||
print("Done processing 5 bugs.", flush=True)
|
||||
print("Done processing batch.", flush=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user