Evals: PR Guidance adding workflow (#23164)

This commit is contained in:
Alisa
2026-03-23 08:07:40 -07:00
committed by GitHub
parent d0ebc81c28
commit 99e5164c82
2 changed files with 134 additions and 11 deletions

69
.github/workflows/eval-guidance.yml vendored Normal file
View File

@@ -0,0 +1,69 @@
name: 'Evals: PR Guidance'
on:
pull_request:
paths:
- 'packages/core/src/**/*.ts'
- '!**/*.test.ts'
- '!**/*.test.tsx'
permissions:
pull-requests: 'write'
contents: 'read'
jobs:
provide-guidance:
name: 'Model Steering Guidance'
runs-on: 'ubuntu-latest'
if: "github.repository == 'google-gemini/gemini-cli'"
steps:
- name: 'Checkout'
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4
with:
fetch-depth: 0
- name: 'Set up Node.js'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0
with:
node-version-file: '.nvmrc'
cache: 'npm'
- name: 'Detect Steering Changes'
id: 'detect'
run: |
STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only)
echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT"
- name: 'Analyze PR Content'
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
id: 'analysis'
env:
GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
run: |
# Check for behavioral eval changes
EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true)
if [ -z "$EVAL_CHANGES" ]; then
echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT"
fi
# Check if user is a maintainer (has write/admin access)
USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission')
if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then
echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT"
fi
- name: 'Post Guidance Comment'
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3
with:
comment-tag: 'eval-guidance-bot'
message: |
### 🧠 Model Steering Guidance
This PR modifies files that affect the model's behavior (prompts, tools, or instructions).
${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- ⚠️ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }}
${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- 🚀 **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }}
---
*This is an automated guidance message triggered by steering logic signatures.*

View File

@@ -5,14 +5,26 @@
*/
import { execSync } from 'node:child_process';
const EVALS_FILE_PREFIXES = [
const CORE_STEERING_PATHS = [
'packages/core/src/prompts/',
'packages/core/src/tools/',
'evals/',
];
const TEST_PATHS = ['evals/'];
const STEERING_SIGNATURES = [
'LocalAgentDefinition',
'LocalInvocation',
'ToolDefinition',
'inputSchema',
"kind: 'local'",
];
function main() {
const targetBranch = process.env.GITHUB_BASE_REF || 'main';
const verbose = process.argv.includes('--verbose');
const steeringOnly = process.argv.includes('--steering-only');
try {
const remoteUrl = process.env.GITHUB_REPOSITORY
? `https://github.com/${process.env.GITHUB_REPOSITORY}.git`
@@ -30,18 +42,60 @@ function main() {
.split('\n')
.filter(Boolean);
const shouldRun = changedFiles.some((file) =>
EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)),
);
let detected = false;
const reasons = [];
console.log(shouldRun ? 'true' : 'false');
// 1. Path-based detection
for (const file of changedFiles) {
if (CORE_STEERING_PATHS.some((prefix) => file.startsWith(prefix))) {
detected = true;
reasons.push(`Matched core steering path: ${file}`);
if (!verbose) break;
}
if (
!steeringOnly &&
TEST_PATHS.some((prefix) => file.startsWith(prefix))
) {
detected = true;
reasons.push(`Matched test path: ${file}`);
if (!verbose) break;
}
}
// 2. Signature-based detection (only in packages/core/src/ and only if not already detected or if verbose)
if (!detected || verbose) {
const coreChanges = changedFiles.filter((f) =>
f.startsWith('packages/core/src/'),
);
if (coreChanges.length > 0) {
// Get the actual diff content for core files
const diff = execSync(
`git diff -U0 FETCH_HEAD...HEAD -- packages/core/src/`,
{ encoding: 'utf-8' },
);
for (const sig of STEERING_SIGNATURES) {
if (diff.includes(sig)) {
detected = true;
reasons.push(`Matched steering signature in core: ${sig}`);
if (!verbose) break;
}
}
}
}
if (verbose && reasons.length > 0) {
process.stderr.write('Detection reasons:\n');
reasons.forEach((r) => process.stderr.write(` - ${r}\n`));
}
process.stdout.write(detected ? 'true' : 'false');
} catch (error) {
// If anything fails (e.g., no git history), run evals to be safe
console.warn(
'Warning: Failed to determine if evals should run. Defaulting to true.',
// If anything fails (e.g., no git history), run evals/guidance to be safe
process.stderr.write(
'Warning: Failed to determine if changes occurred. Defaulting to true.\n',
);
console.error(error);
console.log('true');
process.stderr.write(String(error) + '\n');
process.stdout.write('true');
}
}