Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941)

This commit is contained in:
Christian Gunderman
2026-04-08 23:57:26 +00:00
committed by GitHub
parent bc3ed61adb
commit f1bb2af6de
32 changed files with 475 additions and 133 deletions

View File

@@ -335,6 +335,8 @@ jobs:
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_MODEL: 'gemini-3-pro-preview'
# Only run always passes behavioral tests.
EVAL_SUITE_TYPE: 'behavioral'
# Disable Vitest internal retries to avoid double-retrying;
# custom retry logic is handled in evals/test-helper.ts
VITEST_RETRY: 0