Implement analysis phase.

2026-05-15 22:33:05 -07:00 · 2026-04-24 12:16:44 -07:00
parent 6f4dff177a
commit 3de330ebac
4 changed files with 196 additions and 10 deletions
@@ -9,16 +9,17 @@ concurrency:
  group: '${{ github.workflow }}-${{ github.ref }}'
  cancel-in-progress: true

-permissions:
-  contents: 'write'
-  issues: 'write'
-  pull-requests: 'write'
-
 jobs:
-  brain:
+  reasoning:
    name: 'Brain (Reasoning Layer)'
    runs-on: 'ubuntu-latest'
    if: "github.repository == 'google-gemini/gemini-cli'"
+    # The reasoning phase is strictly readonly.
+    permissions:
+      contents: 'read'
+      issues: 'read'
+      pull-requests: 'read'
+      actions: 'read'
    steps:
      - name: 'Checkout'
        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
@@ -37,9 +38,78 @@ jobs:
      - name: 'Build Gemini CLI'
        run: 'npm run bundle'

-      - name: 'Download Previous Metrics'
+      - name: 'Download Previous State'
+        env:
+          GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: |
+          # Find the last successful run of this workflow
+          LAST_RUN_ID=$(gh run list --workflow "${{ github.workflow }}" --status success --limit 1 --json databaseId --jq '.[0].databaseId')
+
+          if [ -n "$LAST_RUN_ID" ]; then
+            echo "Found previous successful run: $LAST_RUN_ID"
+            
+            # Download metrics artifacts
+            gh run download "$LAST_RUN_ID" -n metrics-before -D tools/gemini-cli-bot/history/ || echo "metrics-before not found"
+            gh run download "$LAST_RUN_ID" -n metrics-timeseries -D tools/gemini-cli-bot/history/ || echo "metrics-timeseries not found"
+            
+            # Download brain memory (lessons learned and scripts)
+            gh run download "$LAST_RUN_ID" -n lessons-learned -D tools/gemini-cli-bot/ || echo "lessons-learned not found"
+            gh run download "$LAST_RUN_ID" -n brain-scripts -D tools/gemini-cli-bot/processes/scripts/ || echo "brain-scripts not found"
+          else
+            echo "No previous successful run found."
+          fi
+
+      - name: 'Collect Current Metrics'
+        env:
+          GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: 'npm run metrics'
+
+      - name: 'Prepare Metrics'
+        run: |
+          if [ -f "tools/gemini-cli-bot/history/metrics-before.csv" ]; then
+            mv tools/gemini-cli-bot/history/metrics-before.csv tools/gemini-cli-bot/history/metrics-before-prev.csv
+          fi
+
+      - name: 'Run Brain Phases'
+        env:
+          GOOGLE_GENERATIVE_AI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
+          GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
+        run: 'npm run brain'
+
+      - name: 'Stash Brain Outputs'
+        uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4
+        with:
+          name: 'brain-outputs'
+          path: |
+            tools/gemini-cli-bot/lessons-learned.md
+            tools/gemini-cli-bot/processes/scripts/
+          retention-days: 1
+
+  publish:
+    name: 'Publish Artifacts (Archive Layer)'
+    needs: reasoning
+    runs-on: 'ubuntu-latest'
+    if: "github.repository == 'google-gemini/gemini-cli'"
+    # The publish phase is now just for archiving artifacts to preserve state.
+    permissions:
+      actions: 'write'
+    steps:
+      - name: 'Download Brain Outputs'
        uses: 'actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093' # ratchet:actions/download-artifact@v4
        with:
-          name: 'metrics-before'
-          path: 'tools/gemini-cli-bot/history/'
-        continue-on-error: true
+          name: 'brain-outputs'
+          path: 'temp_outputs/'
+
+      - name: 'Archive Lessons Learned'
+        uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4
+        with:
+          name: 'lessons-learned'
+          path: 'temp_outputs/lessons-learned.md'
+          retention-days: 90
+
+      - name: 'Archive Brain Scripts'
+        uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4
+        with:
+          name: 'brain-scripts'
+          path: 'temp_outputs/processes/scripts/'
+          retention-days: 90
@@ -64,6 +64,7 @@
    "format": "prettier --experimental-cli --write .",
    "typecheck": "npm run typecheck --workspaces --if-present && tsc -b evals/tsconfig.json integration-tests/tsconfig.json memory-tests/tsconfig.json",
    "metrics": "tsx tools/gemini-cli-bot/metrics/index.ts",
+    "brain": "node bundle/gemini.js tools/gemini-cli-bot/investigations/metrics_analysis.md && node bundle/gemini.js tools/gemini-cli-bot/critique/policy_evaluation.md",
    "preflight": "npm run clean && npm ci && npm run format && npm run build && npm run lint:ci && npm run typecheck && npm run test:ci",
    "prepare": "husky && npm run bundle",
    "prepare:package": "node scripts/prepare-package.js",
@@ -0,0 +1,32 @@
+# Phase: Policy Critique & Evaluation
+
+## Goal
+
+Evaluate the effectiveness of current repository policies and automation, and
+suggest improvements based on empirical data.
+
+## Context
+
+- Current "Pulse" scripts are in `tools/gemini-cli-bot/processes/scripts/`.
+  (Note: These may be empty if not yet implemented).
+- Metrics and investigation results are available.
+- Previous lessons learned are in `tools/gemini-cli-bot/lessons-learned.md`.
+
+## Instructions
+
+1.  **Review Policies**: Examine the existing automation in `.github/workflows/`
+    and any scripts in `tools/gemini-cli-bot/processes/scripts/`.
+2.  **Analyze Effectiveness**: Based on the metrics analysis from the
+    Investigation phase, determine if the current policies are achieving their
+    goals.
+    - Is the "Pulse" triage effectively reducing issue/PR latency?
+    - Are stale issues being closed as expected?
+    - Is the "Brain" identifying the right problems?
+3.  **Identify Gaps**: Where is the automation failing? Are there manual tasks
+    that should be automated?
+4.  **Propose Changes**: Recommend specific changes to:
+    - GitHub Workflows.
+    - Triage scripts.
+    - Repository `CONTRIBUTING.md` or `GEMINI.md` guidelines.
+5.  **Record Critique**: Append your evaluation and proposed changes to
+    `tools/gemini-cli-bot/lessons-learned.md`.
@@ -0,0 +1,83 @@
+# Phase: Metrics Investigation & Root-Cause Analysis
+
+## Goal
+
+Analyze time-series repository metrics to identify trends and anomalies,
+formulate hypotheses, and rigorously investigate root causes to safely improve
+repository health.
+
+## Context
+
+- Time-series repository metrics are stored in
+  `tools/gemini-cli-bot/history/metrics-timeseries.csv`.
+- Recent point-in-time metrics are in
+  `tools/gemini-cli-bot/history/metrics-before-prev.csv` and the current run's
+  metrics.
+- Findings and state are recorded in `tools/gemini-cli-bot/lessons-learned.md`.
+
+## Repo Policy Priorities
+
+When analyzing data and proposing solutions, prioritize the following in order:
+
+1.  **Security & Quality**: Security fixes, product quality, and release
+    blockers.
+2.  **Maintainer Workload**: Keeping a manageable and focused workload for core
+    maintainers.
+3.  **Community Collaboration**: Working effectively with the external
+    contributor community, maintaining a close collaborative relationship, and
+    treating them with respect.
+
+## Instructions
+
+### 1. Read & Identify Trends (Time-Series Analysis)
+
+- Load and analyze `tools/gemini-cli-bot/history/metrics-timeseries.csv`.
+- Identify significant anomalies or deteriorating trends over time (e.g.,
+  `latency_pr_overall_hours` steadily increasing, `open_issues` growing faster
+  than closure rates, spikes in `review_distribution_variance`).
+
+### 2. Hypothesis Testing & Deep Dive
+
+For each metric not meeting goals or showing a negative trend:
+
+- **Develop Competing Hypotheses**: Brainstorm multiple potential root causes
+  (e.g., "PR Latency is high because CI is flaky" vs. "PR Latency is high
+  because reviewers are unresponsive").
+- **Gather Evidence**: Use your tools (e.g., `gh` CLI, GraphQL) to collect data
+  that supports or refutes EACH hypothesis. You may write temporary local
+  scripts to slice the data (e.g., checking issue labels, ages, or assignees).
+- **Select Root Cause**: Identify the hypothesis most strongly supported by the
+  data.
+- **Prioritize Impact**: Always prioritize solving for verified hypotheses that
+  have the largest impact (e.g., if 30 out of 500 PRs have merge conflicts,
+  fixing merge conflicts is lower priority than addressing a bottleneck
+  affecting 300 PRs).
+
+### 3. Maintainer Workload Assessment
+
+Before blaming or proposing processes that rely on maintainer action (e.g., more
+triage, more reviews):
+
+- **Quantify Capacity**: Assess the volume of open, unactioned work (untriaged
+  issues, review requests) against the number of active maintainers.
+- If the ratio indicates overload, **do not propose solutions that simply
+  generate more pings**. Instead, prioritize systemic triage, automated routing,
+  or auto-closure processes.
+
+### 4. Actor-Aware Bottleneck Identification
+
+Before proposing an intervention, accurately identify the blocker:
+
+- **Waiting on Author**: Needs a polite nudge or closure grace period.
+- **Waiting on Maintainer**: Needs routing, aggregated reports, or escalation
+  (do not nudge the author).
+- **Waiting on System (CI/Infra)**: Needs tooling fixes or reporting.
+
+### 5. Record Findings & Propose Actions
+
+- Document your formulated hypotheses, the evidence gathered, and your final
+  conclusions in `tools/gemini-cli-bot/lessons-learned.md`.
+- Propose specific, data-backed actions or script updates to address the root
+  cause. Ensure proposed actions align with the Repo Policy Priorities and
+  include concepts like graceful closures and terminal escalations to prevent
+  spam.