mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-24 04:52:43 -07:00
Evals: PR Guidance adding workflow (#23164)
This commit is contained in:
69
.github/workflows/eval-guidance.yml
vendored
Normal file
69
.github/workflows/eval-guidance.yml
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
name: 'Evals: PR Guidance'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'packages/core/src/**/*.ts'
|
||||
- '!**/*.test.ts'
|
||||
- '!**/*.test.tsx'
|
||||
|
||||
permissions:
|
||||
pull-requests: 'write'
|
||||
contents: 'read'
|
||||
|
||||
jobs:
|
||||
provide-guidance:
|
||||
name: 'Model Steering Guidance'
|
||||
runs-on: 'ubuntu-latest'
|
||||
if: "github.repository == 'google-gemini/gemini-cli'"
|
||||
steps:
|
||||
- name: 'Checkout'
|
||||
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 'Set up Node.js'
|
||||
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0
|
||||
with:
|
||||
node-version-file: '.nvmrc'
|
||||
cache: 'npm'
|
||||
|
||||
- name: 'Detect Steering Changes'
|
||||
id: 'detect'
|
||||
run: |
|
||||
STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only)
|
||||
echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: 'Analyze PR Content'
|
||||
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
|
||||
id: 'analysis'
|
||||
env:
|
||||
GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
|
||||
run: |
|
||||
# Check for behavioral eval changes
|
||||
EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true)
|
||||
if [ -z "$EVAL_CHANGES" ]; then
|
||||
echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Check if user is a maintainer (has write/admin access)
|
||||
USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission')
|
||||
if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then
|
||||
echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: 'Post Guidance Comment'
|
||||
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
|
||||
uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3
|
||||
with:
|
||||
comment-tag: 'eval-guidance-bot'
|
||||
message: |
|
||||
### 🧠 Model Steering Guidance
|
||||
|
||||
This PR modifies files that affect the model's behavior (prompts, tools, or instructions).
|
||||
|
||||
${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- ⚠️ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }}
|
||||
${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- 🚀 **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }}
|
||||
|
||||
---
|
||||
*This is an automated guidance message triggered by steering logic signatures.*
|
||||
@@ -5,14 +5,26 @@
|
||||
*/
|
||||
import { execSync } from 'node:child_process';
|
||||
|
||||
const EVALS_FILE_PREFIXES = [
|
||||
const CORE_STEERING_PATHS = [
|
||||
'packages/core/src/prompts/',
|
||||
'packages/core/src/tools/',
|
||||
'evals/',
|
||||
];
|
||||
|
||||
const TEST_PATHS = ['evals/'];
|
||||
|
||||
const STEERING_SIGNATURES = [
|
||||
'LocalAgentDefinition',
|
||||
'LocalInvocation',
|
||||
'ToolDefinition',
|
||||
'inputSchema',
|
||||
"kind: 'local'",
|
||||
];
|
||||
|
||||
function main() {
|
||||
const targetBranch = process.env.GITHUB_BASE_REF || 'main';
|
||||
const verbose = process.argv.includes('--verbose');
|
||||
const steeringOnly = process.argv.includes('--steering-only');
|
||||
|
||||
try {
|
||||
const remoteUrl = process.env.GITHUB_REPOSITORY
|
||||
? `https://github.com/${process.env.GITHUB_REPOSITORY}.git`
|
||||
@@ -30,18 +42,60 @@ function main() {
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
|
||||
const shouldRun = changedFiles.some((file) =>
|
||||
EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)),
|
||||
);
|
||||
let detected = false;
|
||||
const reasons = [];
|
||||
|
||||
console.log(shouldRun ? 'true' : 'false');
|
||||
// 1. Path-based detection
|
||||
for (const file of changedFiles) {
|
||||
if (CORE_STEERING_PATHS.some((prefix) => file.startsWith(prefix))) {
|
||||
detected = true;
|
||||
reasons.push(`Matched core steering path: ${file}`);
|
||||
if (!verbose) break;
|
||||
}
|
||||
if (
|
||||
!steeringOnly &&
|
||||
TEST_PATHS.some((prefix) => file.startsWith(prefix))
|
||||
) {
|
||||
detected = true;
|
||||
reasons.push(`Matched test path: ${file}`);
|
||||
if (!verbose) break;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Signature-based detection (only in packages/core/src/ and only if not already detected or if verbose)
|
||||
if (!detected || verbose) {
|
||||
const coreChanges = changedFiles.filter((f) =>
|
||||
f.startsWith('packages/core/src/'),
|
||||
);
|
||||
if (coreChanges.length > 0) {
|
||||
// Get the actual diff content for core files
|
||||
const diff = execSync(
|
||||
`git diff -U0 FETCH_HEAD...HEAD -- packages/core/src/`,
|
||||
{ encoding: 'utf-8' },
|
||||
);
|
||||
for (const sig of STEERING_SIGNATURES) {
|
||||
if (diff.includes(sig)) {
|
||||
detected = true;
|
||||
reasons.push(`Matched steering signature in core: ${sig}`);
|
||||
if (!verbose) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose && reasons.length > 0) {
|
||||
process.stderr.write('Detection reasons:\n');
|
||||
reasons.forEach((r) => process.stderr.write(` - ${r}\n`));
|
||||
}
|
||||
|
||||
process.stdout.write(detected ? 'true' : 'false');
|
||||
} catch (error) {
|
||||
// If anything fails (e.g., no git history), run evals to be safe
|
||||
console.warn(
|
||||
'Warning: Failed to determine if evals should run. Defaulting to true.',
|
||||
// If anything fails (e.g., no git history), run evals/guidance to be safe
|
||||
process.stderr.write(
|
||||
'Warning: Failed to determine if changes occurred. Defaulting to true.\n',
|
||||
);
|
||||
console.error(error);
|
||||
console.log('true');
|
||||
process.stderr.write(String(error) + '\n');
|
||||
process.stdout.write('true');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user