mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-30 13:17:43 -07:00
chore: organize auxiliary and validation scripts into a utils directory
This commit is contained in:
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
API_KEY = "REDACTED_API_KEY"
|
||||
MODEL = "gemini-3-flash-preview"
|
||||
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
|
||||
|
||||
def call_gemini(messages):
|
||||
tools = [{
|
||||
"functionDeclarations": [
|
||||
{
|
||||
"name": "search_codebase",
|
||||
"description": "Search the gemini-cli packages directory for a string using grep.",
|
||||
"parameters": {
|
||||
"type": "OBJECT",
|
||||
"properties": {"pattern": {"type": "STRING"}},
|
||||
"required": ["pattern"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "read_file",
|
||||
"description": "Read a specific file.",
|
||||
"parameters": {
|
||||
"type": "OBJECT",
|
||||
"properties": {"filepath": {"type": "STRING"}},
|
||||
"required": ["filepath"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}]
|
||||
data = {
|
||||
"contents": messages,
|
||||
"tools": tools,
|
||||
"generationConfig": {"temperature": 0.1}
|
||||
}
|
||||
req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read().decode('utf-8'))
|
||||
|
||||
def execute_tool(call):
|
||||
name = call['name']
|
||||
args = call.get('args', {})
|
||||
print(f" > Executing: {name}({args})")
|
||||
if name == 'search_codebase':
|
||||
p = args.get('pattern', '').replace('"', '\\"')
|
||||
cmd = f'grep -rn "{p}" ../../packages | grep -vE "node_modules|dist|build" | head -n 10'
|
||||
try:
|
||||
return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT) or "No results."
|
||||
except: return "Error or no results."
|
||||
elif name == 'read_file':
|
||||
f = args.get('filepath', '')
|
||||
if not f.startswith('/'): f = os.path.join('../../../packages', f)
|
||||
if not os.path.exists(f): return "File not found."
|
||||
try:
|
||||
return subprocess.check_output(f'head -n 200 "{f}"', shell=True, text=True)
|
||||
except: return "Error reading file."
|
||||
return "Unknown tool"
|
||||
|
||||
def debug_one(issue_num):
|
||||
with open('../data/bugs.json', 'r') as f:
|
||||
bugs = json.load(f)
|
||||
issue = next(b for b in bugs if b['number'] == issue_num)
|
||||
|
||||
prompt = f"Analyze this bug for gemini-cli codebase. pinpoint files/logic. rate effort (small/medium/large) with reasoning.\n\nTitle: {issue['title']}\nBody: {issue['body'][:1000]}\n\nOutput ONLY a JSON object with: analysis, effort_level, reasoning, recommended_implementation."
|
||||
messages = [{"role": "user", "parts": [{"text": prompt}]}]
|
||||
|
||||
for i in range(10):
|
||||
print(f"--- Turn {i} ---")
|
||||
res = call_gemini(messages)
|
||||
candidate = res['candidates'][0]['content']
|
||||
parts = candidate.get('parts', [])
|
||||
messages.append(candidate)
|
||||
|
||||
fcalls = [p['functionCall'] for p in parts if 'functionCall' in p]
|
||||
if fcalls:
|
||||
responses = []
|
||||
for fc in fcalls:
|
||||
out = execute_tool(fc)
|
||||
responses.append({"functionResponse": {"name": fc['name'], "response": {"result": out}}})
|
||||
messages.append({"role": "user", "parts": responses})
|
||||
else:
|
||||
txt = parts[0].get('text', '')
|
||||
print("Final Response:", txt)
|
||||
return
|
||||
|
||||
debug_one(23541)
|
||||
@@ -0,0 +1,100 @@
|
||||
import json
|
||||
import urllib.request
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import concurrent.futures
|
||||
|
||||
API_KEY = "REDACTED_API_KEY"
|
||||
MODEL = "gemini-3-flash-preview"
|
||||
URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
|
||||
BUGS_FILE = 'data/issues.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
def extract_files(text):
|
||||
matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
|
||||
return set([m for m in matches if not m.startswith('http')])
|
||||
|
||||
def get_file_content(filepath):
|
||||
try:
|
||||
filename = os.path.basename(filepath)
|
||||
cmd = f'find /Users/cocosheng/gemini-cli -type d -name "node_modules" -prune -o -type f -name "{filename}" -print | head -n 1'
|
||||
actual_path = subprocess.check_output(cmd, shell=True, text=True).strip()
|
||||
if actual_path and os.path.exists(actual_path):
|
||||
with open(actual_path, 'r') as f:
|
||||
content = f.read()
|
||||
return f"\n--- {filepath} ---\n" + "\n".join(content.splitlines()[:200]) + "\n"
|
||||
except:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def process_bug(bug):
|
||||
if bug.get('effort_level') != 'small':
|
||||
return bug
|
||||
|
||||
if bug.get('recommended_implementation') and bug['recommended_implementation'].strip() != "":
|
||||
return bug
|
||||
|
||||
title = bug.get('title', '')
|
||||
body = bug.get('body', '')[:1000]
|
||||
analysis = bug.get('analysis', '')
|
||||
reasoning = bug.get('reasoning', '')
|
||||
|
||||
combined_text = f"{title} {body} {analysis} {reasoning}"
|
||||
files = extract_files(combined_text)
|
||||
code_context = ""
|
||||
for f in list(files)[:3]:
|
||||
code_context += get_file_content(f)
|
||||
|
||||
prompt = f"""You are a senior software engineer working on the gemini-cli codebase.
|
||||
This bug has been classified as a "small" effort bug. Please provide a concise, actionable `recommended_implementation` (or "recommended fix") for it.
|
||||
It should be 1-3 sentences describing exactly what needs to be changed in the code (e.g., "In `file.ts`, change X to Y.").
|
||||
|
||||
Bug Title: {title}
|
||||
Bug Body: {body}
|
||||
Analysis: {analysis}
|
||||
Reasoning: {reasoning}
|
||||
|
||||
Codebase Context:
|
||||
{code_context[:8000]}
|
||||
|
||||
Output ONLY a JSON object (no markdown formatting, no codeblocks):
|
||||
{{
|
||||
"recommended_implementation": "your suggested fix"
|
||||
}}
|
||||
"""
|
||||
data = {
|
||||
"contents": [{"role": "user", "parts": [{"text": prompt}]}],
|
||||
"generationConfig": {"temperature": 0.0, "response_mime_type": "application/json"}
|
||||
}
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
res = json.loads(response.read().decode('utf-8'))
|
||||
txt = res['candidates'][0]['content']['parts'][0]['text']
|
||||
parsed = json.loads(txt)
|
||||
|
||||
bug['recommended_implementation'] = parsed.get('recommended_implementation', '')
|
||||
print(f"Generated fix for #{bug['number']}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"Failed #{bug['number']}: {e}", flush=True)
|
||||
|
||||
return bug
|
||||
|
||||
def main():
|
||||
to_process = [b for b in bugs if b.get('effort_level') == 'small' and not b.get('recommended_implementation')]
|
||||
print(f"Starting LLM generation for {len(to_process)} small bugs...", flush=True)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||
results = list(executor.map(process_bug, bugs))
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print("Done generating fixes.", flush=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,35 @@
|
||||
import json
|
||||
|
||||
BUGS_FILE = '../data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
# Manually verified high-quality analysis for problematic bugs
|
||||
manual_updates = {
|
||||
19468: {
|
||||
"analysis": "The scroll jumping and flickering are caused by frequent re-renders of the `Static` history component in `MainContent.tsx`. This happens when background state updates (like telemetry or periodic model status checks) cause a context update that either increments `historyRemountKey` or forces a full component tree refresh, causing Ink to re-output the entire static history to the terminal buffer.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "TUI-specific bug involving complex state synchronization between background services and the React rendering loop in `packages/cli/src/ui`. Requires tracing high-frequency state changes in `UIStateContext.tsx` and ensuring `Static` is only remounted when absolutely necessary."
|
||||
},
|
||||
23541: {
|
||||
"analysis": "Autocomplete for subcommands (e.g. `/directory `) incorrectly prepends the main command name again, resulting in strings like `/directory /directory list`. This is caused by the completion logic in `useCommandCompletion.tsx` not correctly identifying that the command prefix is already present in the input buffer.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires fixing the string slicing and matching logic in `packages/cli/src/ui/hooks/useCommandCompletion.tsx` (or `atCommandProcessor.ts`). Must correctly handle cursor position and existing buffer content when calculating the completion 'delta' to insert."
|
||||
}
|
||||
}
|
||||
|
||||
for bug in bugs:
|
||||
num = bug.get('number')
|
||||
if num in manual_updates:
|
||||
upd = manual_updates[num]
|
||||
bug['analysis'] = upd['analysis']
|
||||
bug['effort_level'] = upd['effort_level']
|
||||
bug['reasoning'] = upd['reasoning']
|
||||
if 'recommended_implementation' in upd:
|
||||
bug['recommended_implementation'] = upd['recommended_implementation']
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
|
||||
print("Injected high-quality analysis for #19468 and #23541.")
|
||||
@@ -0,0 +1,77 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
BUGS_FILE = '../data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
# Stricter criteria keywords
|
||||
LARGE_KEYWORDS = [
|
||||
'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
|
||||
'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
|
||||
'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
|
||||
'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor'
|
||||
]
|
||||
|
||||
MEDIUM_KEYWORDS = [
|
||||
'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
|
||||
'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
|
||||
'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
|
||||
'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
|
||||
'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
|
||||
]
|
||||
|
||||
SMALL_KEYWORDS = [
|
||||
'typo', 'spelling', 'rename', 'string', 'constant', 'css', 'color', 'theme.status', 'padding', 'margin',
|
||||
'error message', 'econnreset', 'enotdir', 'etimedout', 'documentation', 'jsdoc', 'readme', 'help text',
|
||||
'flag', 'version string'
|
||||
]
|
||||
|
||||
def reevaluate_effort(bug):
|
||||
title = bug.get('title', '').lower()
|
||||
body = bug.get('body', '').lower()
|
||||
analysis = bug.get('analysis', '').lower()
|
||||
reasoning = bug.get('reasoning', '').lower()
|
||||
|
||||
combined_text = f"{title} {body} {analysis} {reasoning}"
|
||||
|
||||
# 1. Check for Large criteria first
|
||||
for kw in LARGE_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
return "large", f"Re-classified to LARGE due to presence of complex architectural/platform keyword: '{kw}'"
|
||||
|
||||
# 2. Check for Medium criteria
|
||||
for kw in MEDIUM_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
return "medium", f"Re-classified to MEDIUM due to presence of logic/state keyword: '{kw}'"
|
||||
|
||||
# 3. Check for Small criteria
|
||||
for kw in SMALL_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
return "small", f"Verified as SMALL due to presence of trivial/localized keyword: '{kw}'"
|
||||
|
||||
# Default to medium if it doesn't match small criteria explicitly
|
||||
return "medium", "Defaulted to MEDIUM as it requires logic tracing and testing, not just a trivial string/constant update."
|
||||
|
||||
updated_count = 0
|
||||
for bug in bugs:
|
||||
old_effort = bug.get('effort_level')
|
||||
new_effort, classification_reason = reevaluate_effort(bug)
|
||||
|
||||
if old_effort != new_effort:
|
||||
bug['effort_level'] = new_effort
|
||||
# Append the re-classification reason to the existing reasoning
|
||||
existing_reasoning = bug.get('reasoning', '')
|
||||
bug['reasoning'] = f"{existing_reasoning} | {classification_reason}".strip(' |')
|
||||
updated_count += 1
|
||||
|
||||
# If it's no longer small, we should probably remove the recommended implementation
|
||||
# as it was likely overly simplistic or incorrect.
|
||||
if new_effort != 'small' and 'recommended_implementation' in bug:
|
||||
del bug['recommended_implementation']
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
|
||||
print(f"Successfully re-evaluated and updated {updated_count} bugs based on stricter criteria.")
|
||||
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
|
||||
BUGS_FILE = '../data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
# Data from generalist analysis
|
||||
updates = {
|
||||
23643: {
|
||||
"analysis": "YOLO mode performs rapid sequential tool calls. On Windows, `fs.writeFile` in `StandardFileSystemService` (packages/core/src/services/fileSystemService.ts) frequently fails due to file locks from IDE watchers or indexing services that trigger on the first write.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires implementing a robust retry-with-backoff mechanism specifically for Windows in the core file system service to handle transient 'Resource busy' errors."
|
||||
},
|
||||
23528: {
|
||||
"analysis": "A variant of the file lock issue where the Node.js `fs` layer fails while native tools might succeed. It can also be caused by path normalization issues in `packages/core/src/tools/write-file.ts` where `getTargetDir()` doesn't align with the environment's path expectations.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires cross-layer debugging of the path resolution and write verification logic on Windows."
|
||||
},
|
||||
23507: {
|
||||
"analysis": "In ACP mode (non-interactive), the shell tool (packages/core/src/tools/shell.ts) attempts to solicit user confirmation. Since no TTY is available, the request hangs or fails to return a result to the ACP stream.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Logic needs to be added to `executeToolWithHooks` or the scheduler to auto-approve or return a specific error when confirmation is impossible."
|
||||
},
|
||||
23480: {
|
||||
"analysis": "When extensions from private repositories are checked for updates, the spawned `git fetch` process prompts for credentials, stealing stdin from the main CLI.",
|
||||
"effort_level": "small",
|
||||
"reasoning": "This is a standard process isolation issue.",
|
||||
"recommended_implementation": "Set `GIT_TERMINAL_PROMPT=0` in the environment of any `git` process spawned for background tasks in `McpClientManager` or the extension service."
|
||||
},
|
||||
23427: {
|
||||
"analysis": "The `executeToolWithHooks` function in `packages/core/src/core/coreToolHookTriggers.ts` processes blocking and stopping decisions but omits the `systemMessage` field from the `HookOutput` for successful turns.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires updating the core client's event loop to yield a new `GeminiEventType.SystemMessage` and modifying the UI to render it."
|
||||
},
|
||||
23417: {
|
||||
"analysis": "`packages/cli/src/utils/readStdin.ts` sets UTF-8 encoding and then uses `chunk.length`, which measures UTF-16 code units, not actual bytes.",
|
||||
"effort_level": "small",
|
||||
"reasoning": "Multi-byte characters (like emojis) are undercounted, leading to inaccurate 8MB limit enforcement.",
|
||||
"recommended_implementation": "Replace `chunk.length` with `Buffer.byteLength(chunk, 'utf8')`."
|
||||
},
|
||||
23356: {
|
||||
"analysis": "Likely an unhandled promise rejection or timeout in the IDE companion communication layer (packages/vscode-ide-companion).",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Intermittent connection drops between the extension host and the `ide-server` need better error boundaries."
|
||||
},
|
||||
23346: {
|
||||
"analysis": "The sidebar input component lacks bracketed paste mode support. Carriage returns in pasted blocks are interpreted as immediate submission signals.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires updating the sidebar input logic to buffer multi-character sequences wrapped in paste escape codes."
|
||||
},
|
||||
23336: {
|
||||
"analysis": "The model's internal thought blocks (prefixed with `s94>thought`) are not correctly stripped by the regex in the CLI's UI rendering layer.",
|
||||
"effort_level": "small",
|
||||
"reasoning": "A simple regex update in the message display component is required.",
|
||||
"recommended_implementation": "Update the display filter to catch and remove `s94>thought` and standard `<thought>` tags before the string reaches Ink's `Text` component."
|
||||
},
|
||||
23297: {
|
||||
"analysis": "The UI is often hung because a fetch call in `IDEConnectionUtils` (used for companion features) has timed out at the Node level (5 mins) without a client-side timeout, blocking the React/Ink event loop.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires adding explicit `AbortSignal` timeouts to all IDE fetch calls."
|
||||
}
|
||||
}
|
||||
|
||||
for bug in bugs:
|
||||
num = bug.get('number')
|
||||
if num in updates:
|
||||
upd = updates[num]
|
||||
bug['analysis'] = upd['analysis']
|
||||
bug['effort_level'] = upd['effort_level']
|
||||
bug['reasoning'] = upd['reasoning']
|
||||
if 'recommended_implementation' in upd:
|
||||
bug['recommended_implementation'] = upd['recommended_implementation']
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
|
||||
print("Updated 10 bugs.")
|
||||
@@ -0,0 +1,51 @@
|
||||
import json
|
||||
|
||||
BUGS_FILE = '../data/bugs.json'
|
||||
|
||||
with open(BUGS_FILE, 'r') as f:
|
||||
bugs = json.load(f)
|
||||
|
||||
verified_updates = {
|
||||
25757: {
|
||||
"analysis": "Slow boot times (9.77s) are likely due to sequential initialization of heavy services like `agentRegistry`, `toolRegistry` (which may check `canUseRipgrep`), and MCP clients in `Config._initialize`.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires profiling `packages/core/src/config/config.ts` to identify the specific bottleneck. Parallelizing initialization is a non-trivial refactor to avoid race conditions during service hydration."
|
||||
},
|
||||
25744: {
|
||||
"analysis": "Account suspension (403 error) is an external issue, but the CLI should catch this status code and provide a user-friendly explanation rather than a raw API error.",
|
||||
"effort_level": "small",
|
||||
"reasoning": "Localized fix in the `OAuth` provider or `GeminiChat` error handler. Requires checking for `status === 403` and returning a clear message directing the user to Google Support.",
|
||||
"recommended_implementation": "In `packages/core/src/utils/errors.ts` or the API transport layer, add a specific case for 403 errors that maps to a 'Account Suspended or Restricted' message."
|
||||
},
|
||||
25656: {
|
||||
"analysis": "Markdown rendering fails for LaTeX syntax because the `inlineRegex` in `markdownParsingUtils.ts` does not account for `$` delimiters, and `stripUnsafeCharacters` may be over-eager.",
|
||||
"effort_level": "medium",
|
||||
"reasoning": "Requires updating the markdown parser logic in `packages/cli/src/ui/utils/markdownParsingUtils.ts` to recognize math blocks and ensuring that LaTeX-specific characters like `\\` are preserved during sanitization.",
|
||||
},
|
||||
25615: {
|
||||
"analysis": "Infinite UI loop on Windows during `run_shell_command` suggests a synchronization or buffer handling issue between the shell process and the Ink TUI when handling Windows-specific control characters.",
|
||||
"effort_level": "large",
|
||||
"reasoning": "Extremely hard to reproduce and debug without a Windows environment. Impacts core process execution in `packages/core/src/tools/shell.ts` and terminal rendering in `packages/cli`.",
|
||||
},
|
||||
25610: {
|
||||
"analysis": "Theme validation error for `text.response` key is caused by a mismatch between the `CustomTheme` TypeScript interface and the JSON schema used for validation.",
|
||||
"effort_level": "small",
|
||||
"reasoning": "The `CustomTheme` interface in `packages/core/src/config/config.ts` includes `response`, but the `SETTINGS_SCHEMA` in `packages/cli/src/config/settingsSchema.ts` does not. This is a one-line schema update.",
|
||||
"recommended_implementation": "In `packages/cli/src/config/settingsSchema.ts`, add `response: { type: 'string' }` to the `CustomTheme.properties.text.properties` object."
|
||||
}
|
||||
}
|
||||
|
||||
for bug in bugs:
|
||||
num = bug.get('number')
|
||||
if num in verified_updates:
|
||||
upd = verified_updates[num]
|
||||
bug['analysis'] = upd['analysis']
|
||||
bug['effort_level'] = upd['effort_level']
|
||||
bug['reasoning'] = upd['reasoning']
|
||||
if 'recommended_implementation' in upd:
|
||||
bug['recommended_implementation'] = upd['recommended_implementation']
|
||||
|
||||
with open(BUGS_FILE, 'w') as f:
|
||||
json.dump(bugs, f, indent=2)
|
||||
|
||||
print("Saved verified updates for first 5 bugs.")
|
||||
@@ -0,0 +1,134 @@
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
ISSUES_FILE = 'backlog-analysis/issues.json'
|
||||
REPO_ROOT = '/Users/cocosheng/gemini-cli'
|
||||
|
||||
with open(ISSUES_FILE, 'r') as f:
|
||||
issues = json.load(f)
|
||||
|
||||
# Stricter criteria keywords
|
||||
LARGE_KEYWORDS = [
|
||||
'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
|
||||
'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
|
||||
'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
|
||||
'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor', 'architecture', 'refactor'
|
||||
]
|
||||
|
||||
MEDIUM_KEYWORDS = [
|
||||
'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
|
||||
'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
|
||||
'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
|
||||
'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
|
||||
'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
|
||||
]
|
||||
|
||||
SMALL_KEYWORDS = [
|
||||
'typo', 'spelling', 'rename', 'string', 'constant', 'css', 'color', 'theme.status', 'padding', 'margin',
|
||||
'error message', 'econnreset', 'enotdir', 'etimedout', 'documentation', 'jsdoc', 'readme', 'help text',
|
||||
'flag', 'version string', 'static value'
|
||||
]
|
||||
|
||||
def find_files_in_text(text):
|
||||
# match patterns like packages/cli/src/ui/components/Footer.tsx or Footer.tsx
|
||||
# We will look for anything ending in .ts, .tsx, .js, .json
|
||||
matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
|
||||
# filter out URLs or common false positives
|
||||
return set([m for m in matches if not m.startswith('http')])
|
||||
|
||||
def resolve_file(filename):
|
||||
if os.path.exists(os.path.join(REPO_ROOT, filename)):
|
||||
return os.path.join(REPO_ROOT, filename)
|
||||
|
||||
# Try searching the repo for the basename
|
||||
basename = os.path.basename(filename)
|
||||
for root, dirs, files in os.walk(REPO_ROOT):
|
||||
if '.git' in root or 'node_modules' in root:
|
||||
continue
|
||||
if basename in files:
|
||||
return os.path.join(root, basename)
|
||||
return None
|
||||
|
||||
def analyze_issue(issue):
|
||||
title = issue.get('title', '').lower()
|
||||
body = issue.get('body', '').lower()
|
||||
analysis = issue.get('analysis', '').lower()
|
||||
reasoning = issue.get('reasoning', '').lower()
|
||||
|
||||
combined_text = f"{title} {body} {analysis} {reasoning}"
|
||||
|
||||
potential_files = find_files_in_text(combined_text)
|
||||
actual_files = []
|
||||
total_lines = 0
|
||||
|
||||
for f in potential_files:
|
||||
resolved = resolve_file(f)
|
||||
if resolved and resolved not in [a[0] for a in actual_files]:
|
||||
try:
|
||||
with open(resolved, 'r', encoding='utf-8') as file_obj:
|
||||
lines = sum(1 for line in file_obj)
|
||||
actual_files.append((resolved, lines))
|
||||
total_lines += lines
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
num_files = len(actual_files)
|
||||
|
||||
effort = "small"
|
||||
validation_msg = ""
|
||||
|
||||
# Keyword analysis
|
||||
keyword_effort = "small"
|
||||
for kw in LARGE_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
keyword_effort = "large"
|
||||
break
|
||||
|
||||
if keyword_effort != "large":
|
||||
for kw in MEDIUM_KEYWORDS:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
|
||||
keyword_effort = "medium"
|
||||
break
|
||||
|
||||
# Codebase heuristic
|
||||
if num_files == 0:
|
||||
# If no files found, rely strictly on keywords, but default to medium to be safe
|
||||
effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'
|
||||
validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."
|
||||
else:
|
||||
file_details = ", ".join([f"{os.path.basename(f[0])} ({f[1]} lines)" for f in actual_files])
|
||||
if num_files > 3 or total_lines > 1500 or keyword_effort == "large":
|
||||
effort = "large"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
|
||||
elif num_files >= 2 or total_lines > 500 or keyword_effort == "medium":
|
||||
effort = "medium"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
|
||||
else:
|
||||
effort = "small"
|
||||
validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Appears highly localized."
|
||||
|
||||
return effort, validation_msg
|
||||
|
||||
updated_count = 0
|
||||
for issue in issues:
|
||||
old_effort = issue.get('effort_level')
|
||||
new_effort, validation_reason = analyze_issue(issue)
|
||||
|
||||
issue['effort_level'] = new_effort
|
||||
|
||||
# Store the validation reason in the reasoning field
|
||||
existing_reasoning = issue.get('reasoning', '')
|
||||
# Strip any previous validation messages
|
||||
existing_reasoning = existing_reasoning.split(' | Codebase validation:')[0]
|
||||
existing_reasoning = existing_reasoning.split(' | No specific files identified')[0]
|
||||
|
||||
issue['reasoning'] = f"{existing_reasoning} | {validation_reason}".strip(' |')
|
||||
|
||||
if old_effort != new_effort:
|
||||
updated_count += 1
|
||||
|
||||
with open(ISSUES_FILE, 'w') as f:
|
||||
json.dump(issues, f, indent=2)
|
||||
|
||||
print(f"Successfully re-evaluated and updated {updated_count} issues. Codebase validated.")
|
||||
Reference in New Issue
Block a user