From 99e5164c8280ae1bf4bcd9e84d0c43da5dd32daf Mon Sep 17 00:00:00 2001 From: Alisa <62909685+alisa-alisa@users.noreply.github.com> Date: Mon, 23 Mar 2026 08:07:40 -0700 Subject: [PATCH] Evals: PR Guidance adding workflow (#23164) --- .github/workflows/eval-guidance.yml | 69 ++++++++++++++++++++++++++ scripts/changed_prompt.js | 76 ++++++++++++++++++++++++----- 2 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/eval-guidance.yml diff --git a/.github/workflows/eval-guidance.yml b/.github/workflows/eval-guidance.yml new file mode 100644 index 0000000000..e1f1ab3168 --- /dev/null +++ b/.github/workflows/eval-guidance.yml @@ -0,0 +1,69 @@ +name: 'Evals: PR Guidance' + +on: + pull_request: + paths: + - 'packages/core/src/**/*.ts' + - '!**/*.test.ts' + - '!**/*.test.tsx' + +permissions: + pull-requests: 'write' + contents: 'read' + +jobs: + provide-guidance: + name: 'Model Steering Guidance' + runs-on: 'ubuntu-latest' + if: "github.repository == 'google-gemini/gemini-cli'" + steps: + - name: 'Checkout' + uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Detect Steering Changes' + id: 'detect' + run: | + STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only) + echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT" + + - name: 'Analyze PR Content' + if: "steps.detect.outputs.STEERING_DETECTED == 'true'" + id: 'analysis' + env: + GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: | + # Check for behavioral eval changes + EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true) + if [ -z "$EVAL_CHANGES" ]; then + echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT" + fi + + # Check if user is a maintainer (has write/admin access) + USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission') + if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then + echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT" + fi + + - name: 'Post Guidance Comment' + if: "steps.detect.outputs.STEERING_DETECTED == 'true'" + uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3 + with: + comment-tag: 'eval-guidance-bot' + message: | + ### 🧠 Model Steering Guidance + + This PR modifies files that affect the model's behavior (prompts, tools, or instructions). + + ${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- ⚠️ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }} + ${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- 🚀 **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }} + + --- + *This is an automated guidance message triggered by steering logic signatures.* diff --git a/scripts/changed_prompt.js b/scripts/changed_prompt.js index 0ad0e365f7..22563810e4 100644 --- a/scripts/changed_prompt.js +++ b/scripts/changed_prompt.js @@ -5,14 +5,26 @@ */ import { execSync } from 'node:child_process'; -const EVALS_FILE_PREFIXES = [ +const CORE_STEERING_PATHS = [ 'packages/core/src/prompts/', 'packages/core/src/tools/', - 'evals/', +]; + +const TEST_PATHS = ['evals/']; + +const STEERING_SIGNATURES = [ + 'LocalAgentDefinition', + 'LocalInvocation', + 'ToolDefinition', + 'inputSchema', + "kind: 'local'", ]; function main() { const targetBranch = process.env.GITHUB_BASE_REF || 'main'; + const verbose = process.argv.includes('--verbose'); + const steeringOnly = process.argv.includes('--steering-only'); + try { const remoteUrl = process.env.GITHUB_REPOSITORY ? `https://github.com/${process.env.GITHUB_REPOSITORY}.git` @@ -30,18 +42,60 @@ function main() { .split('\n') .filter(Boolean); - const shouldRun = changedFiles.some((file) => - EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)), - ); + let detected = false; + const reasons = []; - console.log(shouldRun ? 'true' : 'false'); + // 1. Path-based detection + for (const file of changedFiles) { + if (CORE_STEERING_PATHS.some((prefix) => file.startsWith(prefix))) { + detected = true; + reasons.push(`Matched core steering path: ${file}`); + if (!verbose) break; + } + if ( + !steeringOnly && + TEST_PATHS.some((prefix) => file.startsWith(prefix)) + ) { + detected = true; + reasons.push(`Matched test path: ${file}`); + if (!verbose) break; + } + } + + // 2. Signature-based detection (only in packages/core/src/ and only if not already detected or if verbose) + if (!detected || verbose) { + const coreChanges = changedFiles.filter((f) => + f.startsWith('packages/core/src/'), + ); + if (coreChanges.length > 0) { + // Get the actual diff content for core files + const diff = execSync( + `git diff -U0 FETCH_HEAD...HEAD -- packages/core/src/`, + { encoding: 'utf-8' }, + ); + for (const sig of STEERING_SIGNATURES) { + if (diff.includes(sig)) { + detected = true; + reasons.push(`Matched steering signature in core: ${sig}`); + if (!verbose) break; + } + } + } + } + + if (verbose && reasons.length > 0) { + process.stderr.write('Detection reasons:\n'); + reasons.forEach((r) => process.stderr.write(` - ${r}\n`)); + } + + process.stdout.write(detected ? 'true' : 'false'); } catch (error) { - // If anything fails (e.g., no git history), run evals to be safe - console.warn( - 'Warning: Failed to determine if evals should run. Defaulting to true.', + // If anything fails (e.g., no git history), run evals/guidance to be safe + process.stderr.write( + 'Warning: Failed to determine if changes occurred. Defaulting to true.\n', ); - console.error(error); - console.log('true'); + process.stderr.write(String(error) + '\n'); + process.stdout.write('true'); } }