mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
Evals: PR Guidance adding workflow (#23164)
This commit is contained in:
@@ -0,0 +1,69 @@
|
|||||||
|
name: 'Evals: PR Guidance'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- 'packages/core/src/**/*.ts'
|
||||||
|
- '!**/*.test.ts'
|
||||||
|
- '!**/*.test.tsx'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
pull-requests: 'write'
|
||||||
|
contents: 'read'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
provide-guidance:
|
||||||
|
name: 'Model Steering Guidance'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
if: "github.repository == 'google-gemini/gemini-cli'"
|
||||||
|
steps:
|
||||||
|
- name: 'Checkout'
|
||||||
|
uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: 'Set up Node.js'
|
||||||
|
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0
|
||||||
|
with:
|
||||||
|
node-version-file: '.nvmrc'
|
||||||
|
cache: 'npm'
|
||||||
|
|
||||||
|
- name: 'Detect Steering Changes'
|
||||||
|
id: 'detect'
|
||||||
|
run: |
|
||||||
|
STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only)
|
||||||
|
echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: 'Analyze PR Content'
|
||||||
|
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
|
||||||
|
id: 'analysis'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
|
||||||
|
run: |
|
||||||
|
# Check for behavioral eval changes
|
||||||
|
EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true)
|
||||||
|
if [ -z "$EVAL_CHANGES" ]; then
|
||||||
|
echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if user is a maintainer (has write/admin access)
|
||||||
|
USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission')
|
||||||
|
if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then
|
||||||
|
echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: 'Post Guidance Comment'
|
||||||
|
if: "steps.detect.outputs.STEERING_DETECTED == 'true'"
|
||||||
|
uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3
|
||||||
|
with:
|
||||||
|
comment-tag: 'eval-guidance-bot'
|
||||||
|
message: |
|
||||||
|
### 🧠 Model Steering Guidance
|
||||||
|
|
||||||
|
This PR modifies files that affect the model's behavior (prompts, tools, or instructions).
|
||||||
|
|
||||||
|
${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- ⚠️ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }}
|
||||||
|
${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- 🚀 **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }}
|
||||||
|
|
||||||
|
---
|
||||||
|
*This is an automated guidance message triggered by steering logic signatures.*
|
||||||
+65
-11
@@ -5,14 +5,26 @@
|
|||||||
*/
|
*/
|
||||||
import { execSync } from 'node:child_process';
|
import { execSync } from 'node:child_process';
|
||||||
|
|
||||||
const EVALS_FILE_PREFIXES = [
|
const CORE_STEERING_PATHS = [
|
||||||
'packages/core/src/prompts/',
|
'packages/core/src/prompts/',
|
||||||
'packages/core/src/tools/',
|
'packages/core/src/tools/',
|
||||||
'evals/',
|
];
|
||||||
|
|
||||||
|
const TEST_PATHS = ['evals/'];
|
||||||
|
|
||||||
|
const STEERING_SIGNATURES = [
|
||||||
|
'LocalAgentDefinition',
|
||||||
|
'LocalInvocation',
|
||||||
|
'ToolDefinition',
|
||||||
|
'inputSchema',
|
||||||
|
"kind: 'local'",
|
||||||
];
|
];
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const targetBranch = process.env.GITHUB_BASE_REF || 'main';
|
const targetBranch = process.env.GITHUB_BASE_REF || 'main';
|
||||||
|
const verbose = process.argv.includes('--verbose');
|
||||||
|
const steeringOnly = process.argv.includes('--steering-only');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const remoteUrl = process.env.GITHUB_REPOSITORY
|
const remoteUrl = process.env.GITHUB_REPOSITORY
|
||||||
? `https://github.com/${process.env.GITHUB_REPOSITORY}.git`
|
? `https://github.com/${process.env.GITHUB_REPOSITORY}.git`
|
||||||
@@ -30,18 +42,60 @@ function main() {
|
|||||||
.split('\n')
|
.split('\n')
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
const shouldRun = changedFiles.some((file) =>
|
let detected = false;
|
||||||
EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)),
|
const reasons = [];
|
||||||
);
|
|
||||||
|
|
||||||
console.log(shouldRun ? 'true' : 'false');
|
// 1. Path-based detection
|
||||||
|
for (const file of changedFiles) {
|
||||||
|
if (CORE_STEERING_PATHS.some((prefix) => file.startsWith(prefix))) {
|
||||||
|
detected = true;
|
||||||
|
reasons.push(`Matched core steering path: ${file}`);
|
||||||
|
if (!verbose) break;
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
!steeringOnly &&
|
||||||
|
TEST_PATHS.some((prefix) => file.startsWith(prefix))
|
||||||
|
) {
|
||||||
|
detected = true;
|
||||||
|
reasons.push(`Matched test path: ${file}`);
|
||||||
|
if (!verbose) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Signature-based detection (only in packages/core/src/ and only if not already detected or if verbose)
|
||||||
|
if (!detected || verbose) {
|
||||||
|
const coreChanges = changedFiles.filter((f) =>
|
||||||
|
f.startsWith('packages/core/src/'),
|
||||||
|
);
|
||||||
|
if (coreChanges.length > 0) {
|
||||||
|
// Get the actual diff content for core files
|
||||||
|
const diff = execSync(
|
||||||
|
`git diff -U0 FETCH_HEAD...HEAD -- packages/core/src/`,
|
||||||
|
{ encoding: 'utf-8' },
|
||||||
|
);
|
||||||
|
for (const sig of STEERING_SIGNATURES) {
|
||||||
|
if (diff.includes(sig)) {
|
||||||
|
detected = true;
|
||||||
|
reasons.push(`Matched steering signature in core: ${sig}`);
|
||||||
|
if (!verbose) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose && reasons.length > 0) {
|
||||||
|
process.stderr.write('Detection reasons:\n');
|
||||||
|
reasons.forEach((r) => process.stderr.write(` - ${r}\n`));
|
||||||
|
}
|
||||||
|
|
||||||
|
process.stdout.write(detected ? 'true' : 'false');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// If anything fails (e.g., no git history), run evals to be safe
|
// If anything fails (e.g., no git history), run evals/guidance to be safe
|
||||||
console.warn(
|
process.stderr.write(
|
||||||
'Warning: Failed to determine if evals should run. Defaulting to true.',
|
'Warning: Failed to determine if changes occurred. Defaulting to true.\n',
|
||||||
);
|
);
|
||||||
console.error(error);
|
process.stderr.write(String(error) + '\n');
|
||||||
console.log('true');
|
process.stdout.write('true');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user