feat(evals): implement related evaluation system for targeted testing

This commit is contained in:
Alisa Novikova
2026-04-07 19:35:12 -07:00
parent 06fcdc231c
commit db8910c39b
9 changed files with 610 additions and 43 deletions
+4 -3
View File
@@ -1,7 +1,7 @@
name: 'Evals: PR Evaluation & Regression'
on:
pull_request_target:
pull_request:
types: ['opened', 'synchronize', 'reopened', 'ready_for_review']
paths:
- 'packages/core/src/prompts/**'
@@ -153,9 +153,10 @@ jobs:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
MODEL_LIST: '${{ env.MODEL_LIST }}'
GITHUB_BASE_REF: '${{ github.base_ref }}'
run: |
# Run the regression check loop. The script saves the report to a file.
node scripts/run_eval_regression.js
# Run the related regression check loop.
node scripts/run_eval_regression.js --related
# Use the generated report file if it exists
if [[ -f eval_regression_report.md ]]; then