chore: organize auxiliary and validation scripts into a utils directory

2026-07-12 11:00:51 -07:00 · 2026-05-06 16:04:00 -04:00
parent a365af630d
commit 268fe7cbe1
8 changed files with 13 additions and 9 deletions
@@ -0,0 +1,89 @@
+import json
+import urllib.request
+import urllib.error
+import os
+import subprocess
+
+API_KEY = "REDACTED_API_KEY"
+MODEL = "gemini-3-flash-preview"
+URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
+
+def call_gemini(messages):
+    tools = [{
+        "functionDeclarations": [
+            {
+                "name": "search_codebase",
+                "description": "Search the gemini-cli packages directory for a string using grep.",
+                "parameters": {
+                    "type": "OBJECT",
+                    "properties": {"pattern": {"type": "STRING"}},
+                    "required": ["pattern"]
+                }
+            },
+            {
+                "name": "read_file",
+                "description": "Read a specific file.",
+                "parameters": {
+                    "type": "OBJECT",
+                    "properties": {"filepath": {"type": "STRING"}},
+                    "required": ["filepath"]
+                }
+            }
+        ]
+    }]
+    data = {
+        "contents": messages,
+        "tools": tools,
+        "generationConfig": {"temperature": 0.1}
+    }
+    req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
+    with urllib.request.urlopen(req) as response:
+        return json.loads(response.read().decode('utf-8'))
+
+def execute_tool(call):
+    name = call['name']
+    args = call.get('args', {})
+    print(f"  > Executing: {name}({args})")
+    if name == 'search_codebase':
+        p = args.get('pattern', '').replace('"', '\\"')
+        cmd = f'grep -rn "{p}" ../../packages | grep -vE "node_modules|dist|build" | head -n 10'
+        try:
+            return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT) or "No results."
+        except: return "Error or no results."
+    elif name == 'read_file':
+        f = args.get('filepath', '')
+        if not f.startswith('/'): f = os.path.join('../../../packages', f)
+        if not os.path.exists(f): return "File not found."
+        try:
+            return subprocess.check_output(f'head -n 200 "{f}"', shell=True, text=True)
+        except: return "Error reading file."
+    return "Unknown tool"
+
+def debug_one(issue_num):
+    with open('../data/bugs.json', 'r') as f:
+        bugs = json.load(f)
+    issue = next(b for b in bugs if b['number'] == issue_num)
+    
+    prompt = f"Analyze this bug for gemini-cli codebase. pinpoint files/logic. rate effort (small/medium/large) with reasoning.\n\nTitle: {issue['title']}\nBody: {issue['body'][:1000]}\n\nOutput ONLY a JSON object with: analysis, effort_level, reasoning, recommended_implementation."
+    messages = [{"role": "user", "parts": [{"text": prompt}]}]
+    
+    for i in range(10):
+        print(f"--- Turn {i} ---")
+        res = call_gemini(messages)
+        candidate = res['candidates'][0]['content']
+        parts = candidate.get('parts', [])
+        messages.append(candidate)
+        
+        fcalls = [p['functionCall'] for p in parts if 'functionCall' in p]
+        if fcalls:
+            responses = []
+            for fc in fcalls:
+                out = execute_tool(fc)
+                responses.append({"functionResponse": {"name": fc['name'], "response": {"result": out}}})
+            messages.append({"role": "user", "parts": responses})
+        else:
+            txt = parts[0].get('text', '')
+            print("Final Response:", txt)
+            return
+
+debug_one(23541)
@@ -0,0 +1,100 @@
+import json
+import urllib.request
+import os
+import subprocess
+import re
+import concurrent.futures
+
+API_KEY = "REDACTED_API_KEY"
+MODEL = "gemini-3-flash-preview"
+URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"
+BUGS_FILE = 'data/issues.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+def extract_files(text):
+    matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
+    return set([m for m in matches if not m.startswith('http')])
+
+def get_file_content(filepath):
+    try:
+        filename = os.path.basename(filepath)
+        cmd = f'find /Users/cocosheng/gemini-cli -type d -name "node_modules" -prune -o -type f -name "{filename}" -print | head -n 1'
+        actual_path = subprocess.check_output(cmd, shell=True, text=True).strip()
+        if actual_path and os.path.exists(actual_path):
+            with open(actual_path, 'r') as f:
+                content = f.read()
+                return f"\n--- {filepath} ---\n" + "\n".join(content.splitlines()[:200]) + "\n"
+    except:
+        pass
+    return ""
+
+def process_bug(bug):
+    if bug.get('effort_level') != 'small':
+        return bug
+    
+    if bug.get('recommended_implementation') and bug['recommended_implementation'].strip() != "":
+        return bug
+
+    title = bug.get('title', '')
+    body = bug.get('body', '')[:1000]
+    analysis = bug.get('analysis', '')
+    reasoning = bug.get('reasoning', '')
+    
+    combined_text = f"{title} {body} {analysis} {reasoning}"
+    files = extract_files(combined_text)
+    code_context = ""
+    for f in list(files)[:3]:
+        code_context += get_file_content(f)
+
+    prompt = f"""You are a senior software engineer working on the gemini-cli codebase. 
+This bug has been classified as a "small" effort bug. Please provide a concise, actionable `recommended_implementation` (or "recommended fix") for it.
+It should be 1-3 sentences describing exactly what needs to be changed in the code (e.g., "In `file.ts`, change X to Y.").
+
+Bug Title: {title}
+Bug Body: {body}
+Analysis: {analysis}
+Reasoning: {reasoning}
+
+Codebase Context:
+{code_context[:8000]}
+
+Output ONLY a JSON object (no markdown formatting, no codeblocks):
+{{
+  "recommended_implementation": "your suggested fix"
+}}
+"""
+    data = {
+        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
+        "generationConfig": {"temperature": 0.0, "response_mime_type": "application/json"}
+    }
+    
+    try:
+        req = urllib.request.Request(URL, data=json.dumps(data).encode('utf-8'), headers={'Content-Type': 'application/json'})
+        with urllib.request.urlopen(req, timeout=30) as response:
+            res = json.loads(response.read().decode('utf-8'))
+            txt = res['candidates'][0]['content']['parts'][0]['text']
+            parsed = json.loads(txt)
+            
+            bug['recommended_implementation'] = parsed.get('recommended_implementation', '')
+            print(f"Generated fix for #{bug['number']}", flush=True)
+    except Exception as e:
+        print(f"Failed #{bug['number']}: {e}", flush=True)
+        
+    return bug
+
+def main():
+    to_process = [b for b in bugs if b.get('effort_level') == 'small' and not b.get('recommended_implementation')]
+    print(f"Starting LLM generation for {len(to_process)} small bugs...", flush=True)
+    
+    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+        results = list(executor.map(process_bug, bugs))
+    
+    with open(BUGS_FILE, 'w') as f:
+        json.dump(results, f, indent=2)
+        
+    print("Done generating fixes.", flush=True)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,35 @@
+import json
+
+BUGS_FILE = '../data/bugs.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+# Manually verified high-quality analysis for problematic bugs
+manual_updates = {
+    19468: {
+        "analysis": "The scroll jumping and flickering are caused by frequent re-renders of the `Static` history component in `MainContent.tsx`. This happens when background state updates (like telemetry or periodic model status checks) cause a context update that either increments `historyRemountKey` or forces a full component tree refresh, causing Ink to re-output the entire static history to the terminal buffer.",
+        "effort_level": "medium",
+        "reasoning": "TUI-specific bug involving complex state synchronization between background services and the React rendering loop in `packages/cli/src/ui`. Requires tracing high-frequency state changes in `UIStateContext.tsx` and ensuring `Static` is only remounted when absolutely necessary."
+    },
+    23541: {
+        "analysis": "Autocomplete for subcommands (e.g. `/directory `) incorrectly prepends the main command name again, resulting in strings like `/directory /directory list`. This is caused by the completion logic in `useCommandCompletion.tsx` not correctly identifying that the command prefix is already present in the input buffer.",
+        "effort_level": "medium",
+        "reasoning": "Requires fixing the string slicing and matching logic in `packages/cli/src/ui/hooks/useCommandCompletion.tsx` (or `atCommandProcessor.ts`). Must correctly handle cursor position and existing buffer content when calculating the completion 'delta' to insert."
+    }
+}
+
+for bug in bugs:
+    num = bug.get('number')
+    if num in manual_updates:
+        upd = manual_updates[num]
+        bug['analysis'] = upd['analysis']
+        bug['effort_level'] = upd['effort_level']
+        bug['reasoning'] = upd['reasoning']
+        if 'recommended_implementation' in upd:
+            bug['recommended_implementation'] = upd['recommended_implementation']
+
+with open(BUGS_FILE, 'w') as f:
+    json.dump(bugs, f, indent=2)
+
+print("Injected high-quality analysis for #19468 and #23541.")
@@ -0,0 +1,77 @@
+import json
+import re
+
+BUGS_FILE = '../data/bugs.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+# Stricter criteria keywords
+LARGE_KEYWORDS = [
+    'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
+    'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
+    'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
+    'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor'
+]
+
+MEDIUM_KEYWORDS = [
+    'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
+    'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
+    'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
+    'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
+    'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
+]
+
+SMALL_KEYWORDS = [
+    'typo', 'spelling', 'rename', 'string', 'constant', 'css', 'color', 'theme.status', 'padding', 'margin',
+    'error message', 'econnreset', 'enotdir', 'etimedout', 'documentation', 'jsdoc', 'readme', 'help text',
+    'flag', 'version string'
+]
+
+def reevaluate_effort(bug):
+    title = bug.get('title', '').lower()
+    body = bug.get('body', '').lower()
+    analysis = bug.get('analysis', '').lower()
+    reasoning = bug.get('reasoning', '').lower()
+    
+    combined_text = f"{title} {body} {analysis} {reasoning}"
+    
+    # 1. Check for Large criteria first
+    for kw in LARGE_KEYWORDS:
+        if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
+            return "large", f"Re-classified to LARGE due to presence of complex architectural/platform keyword: '{kw}'"
+            
+    # 2. Check for Medium criteria
+    for kw in MEDIUM_KEYWORDS:
+        if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
+            return "medium", f"Re-classified to MEDIUM due to presence of logic/state keyword: '{kw}'"
+            
+    # 3. Check for Small criteria
+    for kw in SMALL_KEYWORDS:
+        if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
+            return "small", f"Verified as SMALL due to presence of trivial/localized keyword: '{kw}'"
+            
+    # Default to medium if it doesn't match small criteria explicitly
+    return "medium", "Defaulted to MEDIUM as it requires logic tracing and testing, not just a trivial string/constant update."
+
+updated_count = 0
+for bug in bugs:
+    old_effort = bug.get('effort_level')
+    new_effort, classification_reason = reevaluate_effort(bug)
+    
+    if old_effort != new_effort:
+        bug['effort_level'] = new_effort
+        # Append the re-classification reason to the existing reasoning
+        existing_reasoning = bug.get('reasoning', '')
+        bug['reasoning'] = f"{existing_reasoning} | {classification_reason}".strip(' |')
+        updated_count += 1
+        
+        # If it's no longer small, we should probably remove the recommended implementation
+        # as it was likely overly simplistic or incorrect.
+        if new_effort != 'small' and 'recommended_implementation' in bug:
+            del bug['recommended_implementation']
+
+with open(BUGS_FILE, 'w') as f:
+    json.dump(bugs, f, indent=2)
+
+print(f"Successfully re-evaluated and updated {updated_count} bugs based on stricter criteria.")
@@ -0,0 +1,78 @@
+import json
+
+BUGS_FILE = '../data/bugs.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+# Data from generalist analysis
+updates = {
+    23643: {
+        "analysis": "YOLO mode performs rapid sequential tool calls. On Windows, `fs.writeFile` in `StandardFileSystemService` (packages/core/src/services/fileSystemService.ts) frequently fails due to file locks from IDE watchers or indexing services that trigger on the first write.",
+        "effort_level": "medium",
+        "reasoning": "Requires implementing a robust retry-with-backoff mechanism specifically for Windows in the core file system service to handle transient 'Resource busy' errors."
+    },
+    23528: {
+        "analysis": "A variant of the file lock issue where the Node.js `fs` layer fails while native tools might succeed. It can also be caused by path normalization issues in `packages/core/src/tools/write-file.ts` where `getTargetDir()` doesn't align with the environment's path expectations.",
+        "effort_level": "medium",
+        "reasoning": "Requires cross-layer debugging of the path resolution and write verification logic on Windows."
+    },
+    23507: {
+        "analysis": "In ACP mode (non-interactive), the shell tool (packages/core/src/tools/shell.ts) attempts to solicit user confirmation. Since no TTY is available, the request hangs or fails to return a result to the ACP stream.",
+        "effort_level": "medium",
+        "reasoning": "Logic needs to be added to `executeToolWithHooks` or the scheduler to auto-approve or return a specific error when confirmation is impossible."
+    },
+    23480: {
+        "analysis": "When extensions from private repositories are checked for updates, the spawned `git fetch` process prompts for credentials, stealing stdin from the main CLI.",
+        "effort_level": "small",
+        "reasoning": "This is a standard process isolation issue.",
+        "recommended_implementation": "Set `GIT_TERMINAL_PROMPT=0` in the environment of any `git` process spawned for background tasks in `McpClientManager` or the extension service."
+    },
+    23427: {
+        "analysis": "The `executeToolWithHooks` function in `packages/core/src/core/coreToolHookTriggers.ts` processes blocking and stopping decisions but omits the `systemMessage` field from the `HookOutput` for successful turns.",
+        "effort_level": "medium",
+        "reasoning": "Requires updating the core client's event loop to yield a new `GeminiEventType.SystemMessage` and modifying the UI to render it."
+    },
+    23417: {
+        "analysis": "`packages/cli/src/utils/readStdin.ts` sets UTF-8 encoding and then uses `chunk.length`, which measures UTF-16 code units, not actual bytes.",
+        "effort_level": "small",
+        "reasoning": "Multi-byte characters (like emojis) are undercounted, leading to inaccurate 8MB limit enforcement.",
+        "recommended_implementation": "Replace `chunk.length` with `Buffer.byteLength(chunk, 'utf8')`."
+    },
+    23356: {
+        "analysis": "Likely an unhandled promise rejection or timeout in the IDE companion communication layer (packages/vscode-ide-companion).",
+        "effort_level": "medium",
+        "reasoning": "Intermittent connection drops between the extension host and the `ide-server` need better error boundaries."
+    },
+    23346: {
+        "analysis": "The sidebar input component lacks bracketed paste mode support. Carriage returns in pasted blocks are interpreted as immediate submission signals.",
+        "effort_level": "medium",
+        "reasoning": "Requires updating the sidebar input logic to buffer multi-character sequences wrapped in paste escape codes."
+    },
+    23336: {
+        "analysis": "The model's internal thought blocks (prefixed with `s94>thought`) are not correctly stripped by the regex in the CLI's UI rendering layer.",
+        "effort_level": "small",
+        "reasoning": "A simple regex update in the message display component is required.",
+        "recommended_implementation": "Update the display filter to catch and remove `s94>thought` and standard `<thought>` tags before the string reaches Ink's `Text` component."
+    },
+    23297: {
+        "analysis": "The UI is often hung because a fetch call in `IDEConnectionUtils` (used for companion features) has timed out at the Node level (5 mins) without a client-side timeout, blocking the React/Ink event loop.",
+        "effort_level": "medium",
+        "reasoning": "Requires adding explicit `AbortSignal` timeouts to all IDE fetch calls."
+    }
+}
+
+for bug in bugs:
+    num = bug.get('number')
+    if num in updates:
+        upd = updates[num]
+        bug['analysis'] = upd['analysis']
+        bug['effort_level'] = upd['effort_level']
+        bug['reasoning'] = upd['reasoning']
+        if 'recommended_implementation' in upd:
+            bug['recommended_implementation'] = upd['recommended_implementation']
+
+with open(BUGS_FILE, 'w') as f:
+    json.dump(bugs, f, indent=2)
+
+print("Updated 10 bugs.")
@@ -0,0 +1,51 @@
+import json
+
+BUGS_FILE = '../data/bugs.json'
+
+with open(BUGS_FILE, 'r') as f:
+    bugs = json.load(f)
+
+verified_updates = {
+    25757: {
+        "analysis": "Slow boot times (9.77s) are likely due to sequential initialization of heavy services like `agentRegistry`, `toolRegistry` (which may check `canUseRipgrep`), and MCP clients in `Config._initialize`.",
+        "effort_level": "medium",
+        "reasoning": "Requires profiling `packages/core/src/config/config.ts` to identify the specific bottleneck. Parallelizing initialization is a non-trivial refactor to avoid race conditions during service hydration."
+    },
+    25744: {
+        "analysis": "Account suspension (403 error) is an external issue, but the CLI should catch this status code and provide a user-friendly explanation rather than a raw API error.",
+        "effort_level": "small",
+        "reasoning": "Localized fix in the `OAuth` provider or `GeminiChat` error handler. Requires checking for `status === 403` and returning a clear message directing the user to Google Support.",
+        "recommended_implementation": "In `packages/core/src/utils/errors.ts` or the API transport layer, add a specific case for 403 errors that maps to a 'Account Suspended or Restricted' message."
+    },
+    25656: {
+        "analysis": "Markdown rendering fails for LaTeX syntax because the `inlineRegex` in `markdownParsingUtils.ts` does not account for `$` delimiters, and `stripUnsafeCharacters` may be over-eager.",
+        "effort_level": "medium",
+        "reasoning": "Requires updating the markdown parser logic in `packages/cli/src/ui/utils/markdownParsingUtils.ts` to recognize math blocks and ensuring that LaTeX-specific characters like `\\` are preserved during sanitization.",
+    },
+    25615: {
+        "analysis": "Infinite UI loop on Windows during `run_shell_command` suggests a synchronization or buffer handling issue between the shell process and the Ink TUI when handling Windows-specific control characters.",
+        "effort_level": "large",
+        "reasoning": "Extremely hard to reproduce and debug without a Windows environment. Impacts core process execution in `packages/core/src/tools/shell.ts` and terminal rendering in `packages/cli`.",
+    },
+    25610: {
+        "analysis": "Theme validation error for `text.response` key is caused by a mismatch between the `CustomTheme` TypeScript interface and the JSON schema used for validation.",
+        "effort_level": "small",
+        "reasoning": "The `CustomTheme` interface in `packages/core/src/config/config.ts` includes `response`, but the `SETTINGS_SCHEMA` in `packages/cli/src/config/settingsSchema.ts` does not. This is a one-line schema update.",
+        "recommended_implementation": "In `packages/cli/src/config/settingsSchema.ts`, add `response: { type: 'string' }` to the `CustomTheme.properties.text.properties` object."
+    }
+}
+
+for bug in bugs:
+    num = bug.get('number')
+    if num in verified_updates:
+        upd = verified_updates[num]
+        bug['analysis'] = upd['analysis']
+        bug['effort_level'] = upd['effort_level']
+        bug['reasoning'] = upd['reasoning']
+        if 'recommended_implementation' in upd:
+            bug['recommended_implementation'] = upd['recommended_implementation']
+
+with open(BUGS_FILE, 'w') as f:
+    json.dump(bugs, f, indent=2)
+
+print("Saved verified updates for first 5 bugs.")
@@ -0,0 +1,134 @@
+import json
+import re
+import os
+
+ISSUES_FILE = 'backlog-analysis/issues.json'
+REPO_ROOT = '/Users/cocosheng/gemini-cli'
+
+with open(ISSUES_FILE, 'r') as f:
+    issues = json.load(f)
+
+# Stricter criteria keywords
+LARGE_KEYWORDS = [
+    'windows', 'win32', 'wsl', 'wsl2', 'pty', 'pseudo-terminal', 'child_process', 'spawn', 'sigint', 'sigterm',
+    'memory leak', 'performance', 'boot time', 'infinite loop', 'hangs', 'freezes', 'crashes', 'race condition',
+    'intermittent', 'sometimes', 'flickering', 'a2a', 'mcp protocol', 'scheduler', 'event loop', 'websocket',
+    'stream', 'throughput', 'concurrency', 'deadlock', 'file descriptor', 'architecture', 'refactor'
+]
+
+MEDIUM_KEYWORDS = [
+    'react', 'hook', 'useeffect', 'usestate', 'usememo', 'ink', 'tui', 'ui state', 'parser', 'markdown',
+    'regex', 'regular expression', 'ansi', 'escape sequence', 'toml', 'schema', 'validation', 'zod',
+    'promise', 'async', 'await', 'unhandled', 'rejection', 'config', 'settings', 'env', 'environment',
+    'path resolution', 'symlink', 'git', 'telemetry', 'logging', 'format', 'display', 'rendering',
+    'clipboard', 'copy', 'paste', 'bracketed', 'interactive', 'dialog', 'modal', 'focus'
+]
+
+SMALL_KEYWORDS = [
+    'typo', 'spelling', 'rename', 'string', 'constant', 'css', 'color', 'theme.status', 'padding', 'margin',
+    'error message', 'econnreset', 'enotdir', 'etimedout', 'documentation', 'jsdoc', 'readme', 'help text',
+    'flag', 'version string', 'static value'
+]
+
+def find_files_in_text(text):
+    # match patterns like packages/cli/src/ui/components/Footer.tsx or Footer.tsx
+    # We will look for anything ending in .ts, .tsx, .js, .json
+    matches = re.findall(r'([\w\.\/\-]+\.(?:ts|tsx|js|json|md))', text)
+    # filter out URLs or common false positives
+    return set([m for m in matches if not m.startswith('http')])
+
+def resolve_file(filename):
+    if os.path.exists(os.path.join(REPO_ROOT, filename)):
+        return os.path.join(REPO_ROOT, filename)
+    
+    # Try searching the repo for the basename
+    basename = os.path.basename(filename)
+    for root, dirs, files in os.walk(REPO_ROOT):
+        if '.git' in root or 'node_modules' in root:
+            continue
+        if basename in files:
+            return os.path.join(root, basename)
+    return None
+
+def analyze_issue(issue):
+    title = issue.get('title', '').lower()
+    body = issue.get('body', '').lower()
+    analysis = issue.get('analysis', '').lower()
+    reasoning = issue.get('reasoning', '').lower()
+    
+    combined_text = f"{title} {body} {analysis} {reasoning}"
+    
+    potential_files = find_files_in_text(combined_text)
+    actual_files = []
+    total_lines = 0
+    
+    for f in potential_files:
+        resolved = resolve_file(f)
+        if resolved and resolved not in [a[0] for a in actual_files]:
+            try:
+                with open(resolved, 'r', encoding='utf-8') as file_obj:
+                    lines = sum(1 for line in file_obj)
+                    actual_files.append((resolved, lines))
+                    total_lines += lines
+            except Exception:
+                pass
+                
+    num_files = len(actual_files)
+    
+    effort = "small"
+    validation_msg = ""
+    
+    # Keyword analysis
+    keyword_effort = "small"
+    for kw in LARGE_KEYWORDS:
+        if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
+            keyword_effort = "large"
+            break
+            
+    if keyword_effort != "large":
+        for kw in MEDIUM_KEYWORDS:
+            if re.search(r'\b' + re.escape(kw) + r'\b', combined_text):
+                keyword_effort = "medium"
+                break
+
+    # Codebase heuristic
+    if num_files == 0:
+        # If no files found, rely strictly on keywords, but default to medium to be safe
+        effort = keyword_effort if keyword_effort in ['medium', 'large'] else 'medium'
+        validation_msg = f"No specific files identified in codebase. Keyword heuristic: {keyword_effort}."
+    else:
+        file_details = ", ".join([f"{os.path.basename(f[0])} ({f[1]} lines)" for f in actual_files])
+        if num_files > 3 or total_lines > 1500 or keyword_effort == "large":
+            effort = "large"
+            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
+        elif num_files >= 2 or total_lines > 500 or keyword_effort == "medium":
+            effort = "medium"
+            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Keyword hint: {keyword_effort}."
+        else:
+            effort = "small"
+            validation_msg = f"Codebase validation: {num_files} files ({file_details}), {total_lines} total lines. Appears highly localized."
+
+    return effort, validation_msg
+
+updated_count = 0
+for issue in issues:
+    old_effort = issue.get('effort_level')
+    new_effort, validation_reason = analyze_issue(issue)
+    
+    issue['effort_level'] = new_effort
+    
+    # Store the validation reason in the reasoning field
+    existing_reasoning = issue.get('reasoning', '')
+    # Strip any previous validation messages
+    existing_reasoning = existing_reasoning.split(' | Codebase validation:')[0]
+    existing_reasoning = existing_reasoning.split(' | No specific files identified')[0]
+    
+    issue['reasoning'] = f"{existing_reasoning} | {validation_reason}".strip(' |')
+    
+    if old_effort != new_effort:
+        updated_count += 1
+
+with open(ISSUES_FILE, 'w') as f:
+    json.dump(issues, f, indent=2)
+
+print(f"Successfully re-evaluated and updated {updated_count} issues. Codebase validated.")