diff --git a/.github/workflows/gemini-cli-bot-brain.yml b/.github/workflows/gemini-cli-bot-brain.yml index ed63e73887..cf1741f946 100644 --- a/.github/workflows/gemini-cli-bot-brain.yml +++ b/.github/workflows/gemini-cli-bot-brain.yml @@ -9,16 +9,17 @@ concurrency: group: '${{ github.workflow }}-${{ github.ref }}' cancel-in-progress: true -permissions: - contents: 'write' - issues: 'write' - pull-requests: 'write' - jobs: - brain: + reasoning: name: 'Brain (Reasoning Layer)' runs-on: 'ubuntu-latest' if: "github.repository == 'google-gemini/gemini-cli'" + # The reasoning phase is strictly readonly. + permissions: + contents: 'read' + issues: 'read' + pull-requests: 'read' + actions: 'read' steps: - name: 'Checkout' uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 @@ -37,9 +38,78 @@ jobs: - name: 'Build Gemini CLI' run: 'npm run bundle' - - name: 'Download Previous Metrics' + - name: 'Download Previous State' + env: + GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: | + # Find the last successful run of this workflow + LAST_RUN_ID=$(gh run list --workflow "${{ github.workflow }}" --status success --limit 1 --json databaseId --jq '.[0].databaseId') + + if [ -n "$LAST_RUN_ID" ]; then + echo "Found previous successful run: $LAST_RUN_ID" + + # Download metrics artifacts + gh run download "$LAST_RUN_ID" -n metrics-before -D tools/gemini-cli-bot/history/ || echo "metrics-before not found" + gh run download "$LAST_RUN_ID" -n metrics-timeseries -D tools/gemini-cli-bot/history/ || echo "metrics-timeseries not found" + + # Download brain memory (lessons learned and scripts) + gh run download "$LAST_RUN_ID" -n lessons-learned -D tools/gemini-cli-bot/ || echo "lessons-learned not found" + gh run download "$LAST_RUN_ID" -n brain-scripts -D tools/gemini-cli-bot/processes/scripts/ || echo "brain-scripts not found" + else + echo "No previous successful run found." + fi + + - name: 'Collect Current Metrics' + env: + GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: 'npm run metrics' + + - name: 'Prepare Metrics' + run: | + if [ -f "tools/gemini-cli-bot/history/metrics-before.csv" ]; then + mv tools/gemini-cli-bot/history/metrics-before.csv tools/gemini-cli-bot/history/metrics-before-prev.csv + fi + + - name: 'Run Brain Phases' + env: + GOOGLE_GENERATIVE_AI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: 'npm run brain' + + - name: 'Stash Brain Outputs' + uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 + with: + name: 'brain-outputs' + path: | + tools/gemini-cli-bot/lessons-learned.md + tools/gemini-cli-bot/processes/scripts/ + retention-days: 1 + + publish: + name: 'Publish Artifacts (Archive Layer)' + needs: reasoning + runs-on: 'ubuntu-latest' + if: "github.repository == 'google-gemini/gemini-cli'" + # The publish phase is now just for archiving artifacts to preserve state. + permissions: + actions: 'write' + steps: + - name: 'Download Brain Outputs' uses: 'actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093' # ratchet:actions/download-artifact@v4 with: - name: 'metrics-before' - path: 'tools/gemini-cli-bot/history/' - continue-on-error: true + name: 'brain-outputs' + path: 'temp_outputs/' + + - name: 'Archive Lessons Learned' + uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 + with: + name: 'lessons-learned' + path: 'temp_outputs/lessons-learned.md' + retention-days: 90 + + - name: 'Archive Brain Scripts' + uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 + with: + name: 'brain-scripts' + path: 'temp_outputs/processes/scripts/' + retention-days: 90 diff --git a/package.json b/package.json index 06e4765317..b818ff00da 100644 --- a/package.json +++ b/package.json @@ -64,6 +64,7 @@ "format": "prettier --experimental-cli --write .", "typecheck": "npm run typecheck --workspaces --if-present && tsc -b evals/tsconfig.json integration-tests/tsconfig.json memory-tests/tsconfig.json", "metrics": "tsx tools/gemini-cli-bot/metrics/index.ts", + "brain": "node bundle/gemini.js tools/gemini-cli-bot/investigations/metrics_analysis.md && node bundle/gemini.js tools/gemini-cli-bot/critique/policy_evaluation.md", "preflight": "npm run clean && npm ci && npm run format && npm run build && npm run lint:ci && npm run typecheck && npm run test:ci", "prepare": "husky && npm run bundle", "prepare:package": "node scripts/prepare-package.js", diff --git a/tools/gemini-cli-bot/critique/policy_evaluation.md b/tools/gemini-cli-bot/critique/policy_evaluation.md new file mode 100644 index 0000000000..7a5f9a97d6 --- /dev/null +++ b/tools/gemini-cli-bot/critique/policy_evaluation.md @@ -0,0 +1,32 @@ +# Phase: Policy Critique & Evaluation + +## Goal + +Evaluate the effectiveness of current repository policies and automation, and +suggest improvements based on empirical data. + +## Context + +- Current "Pulse" scripts are in `tools/gemini-cli-bot/processes/scripts/`. + (Note: These may be empty if not yet implemented). +- Metrics and investigation results are available. +- Previous lessons learned are in `tools/gemini-cli-bot/lessons-learned.md`. + +## Instructions + +1. **Review Policies**: Examine the existing automation in `.github/workflows/` + and any scripts in `tools/gemini-cli-bot/processes/scripts/`. +2. **Analyze Effectiveness**: Based on the metrics analysis from the + Investigation phase, determine if the current policies are achieving their + goals. + - Is the "Pulse" triage effectively reducing issue/PR latency? + - Are stale issues being closed as expected? + - Is the "Brain" identifying the right problems? +3. **Identify Gaps**: Where is the automation failing? Are there manual tasks + that should be automated? +4. **Propose Changes**: Recommend specific changes to: + - GitHub Workflows. + - Triage scripts. + - Repository `CONTRIBUTING.md` or `GEMINI.md` guidelines. +5. **Record Critique**: Append your evaluation and proposed changes to + `tools/gemini-cli-bot/lessons-learned.md`. diff --git a/tools/gemini-cli-bot/investigations/metrics_analysis.md b/tools/gemini-cli-bot/investigations/metrics_analysis.md new file mode 100644 index 0000000000..ab36b8426a --- /dev/null +++ b/tools/gemini-cli-bot/investigations/metrics_analysis.md @@ -0,0 +1,83 @@ +# Phase: Metrics Investigation & Root-Cause Analysis + +## Goal + +Analyze time-series repository metrics to identify trends and anomalies, +formulate hypotheses, and rigorously investigate root causes to safely improve +repository health. + +## Context + +- Time-series repository metrics are stored in + `tools/gemini-cli-bot/history/metrics-timeseries.csv`. +- Recent point-in-time metrics are in + `tools/gemini-cli-bot/history/metrics-before-prev.csv` and the current run's + metrics. +- Findings and state are recorded in `tools/gemini-cli-bot/lessons-learned.md`. + +## Repo Policy Priorities + +When analyzing data and proposing solutions, prioritize the following in order: + +1. **Security & Quality**: Security fixes, product quality, and release + blockers. +2. **Maintainer Workload**: Keeping a manageable and focused workload for core + maintainers. +3. **Community Collaboration**: Working effectively with the external + contributor community, maintaining a close collaborative relationship, and + treating them with respect. + +## Instructions + +### 1. Read & Identify Trends (Time-Series Analysis) + +- Load and analyze `tools/gemini-cli-bot/history/metrics-timeseries.csv`. +- Identify significant anomalies or deteriorating trends over time (e.g., + `latency_pr_overall_hours` steadily increasing, `open_issues` growing faster + than closure rates, spikes in `review_distribution_variance`). + +### 2. Hypothesis Testing & Deep Dive + +For each metric not meeting goals or showing a negative trend: + +- **Develop Competing Hypotheses**: Brainstorm multiple potential root causes + (e.g., "PR Latency is high because CI is flaky" vs. "PR Latency is high + because reviewers are unresponsive"). +- **Gather Evidence**: Use your tools (e.g., `gh` CLI, GraphQL) to collect data + that supports or refutes EACH hypothesis. You may write temporary local + scripts to slice the data (e.g., checking issue labels, ages, or assignees). +- **Select Root Cause**: Identify the hypothesis most strongly supported by the + data. +- **Prioritize Impact**: Always prioritize solving for verified hypotheses that + have the largest impact (e.g., if 30 out of 500 PRs have merge conflicts, + fixing merge conflicts is lower priority than addressing a bottleneck + affecting 300 PRs). + +### 3. Maintainer Workload Assessment + +Before blaming or proposing processes that rely on maintainer action (e.g., more +triage, more reviews): + +- **Quantify Capacity**: Assess the volume of open, unactioned work (untriaged + issues, review requests) against the number of active maintainers. +- If the ratio indicates overload, **do not propose solutions that simply + generate more pings**. Instead, prioritize systemic triage, automated routing, + or auto-closure processes. + +### 4. Actor-Aware Bottleneck Identification + +Before proposing an intervention, accurately identify the blocker: + +- **Waiting on Author**: Needs a polite nudge or closure grace period. +- **Waiting on Maintainer**: Needs routing, aggregated reports, or escalation + (do not nudge the author). +- **Waiting on System (CI/Infra)**: Needs tooling fixes or reporting. + +### 5. Record Findings & Propose Actions + +- Document your formulated hypotheses, the evidence gathered, and your final + conclusions in `tools/gemini-cli-bot/lessons-learned.md`. +- Propose specific, data-backed actions or script updates to address the root + cause. Ensure proposed actions align with the Repo Policy Priorities and + include concepts like graceful closures and terminal escalations to prevent + spam.