mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-13 06:40:33 -07:00
Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941)
This commit is contained in:
committed by
GitHub
parent
bc3ed61adb
commit
f1bb2af6de
2
.github/workflows/chained_e2e.yml
vendored
2
.github/workflows/chained_e2e.yml
vendored
@@ -335,6 +335,8 @@ jobs:
|
||||
env:
|
||||
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
|
||||
GEMINI_MODEL: 'gemini-3-pro-preview'
|
||||
# Only run always passes behavioral tests.
|
||||
EVAL_SUITE_TYPE: 'behavioral'
|
||||
# Disable Vitest internal retries to avoid double-retrying;
|
||||
# custom retry logic is handled in evals/test-helper.ts
|
||||
VITEST_RETRY: 0
|
||||
|
||||
20
.github/workflows/evals-nightly.yml
vendored
20
.github/workflows/evals-nightly.yml
vendored
@@ -5,10 +5,18 @@ on:
|
||||
- cron: '0 1 * * *' # Runs at 1 AM every day
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_all:
|
||||
description: 'Run all evaluations (including usually passing)'
|
||||
type: 'boolean'
|
||||
default: true
|
||||
suite_type:
|
||||
description: 'Suite type to run'
|
||||
type: 'choice'
|
||||
options:
|
||||
- 'behavioral'
|
||||
- 'component-level'
|
||||
- 'hero-scenario'
|
||||
default: 'behavioral'
|
||||
suite_name:
|
||||
description: 'Specific suite name to run'
|
||||
required: false
|
||||
type: 'string'
|
||||
test_name_pattern:
|
||||
description: 'Test name pattern or file name'
|
||||
required: false
|
||||
@@ -59,7 +67,9 @@ jobs:
|
||||
env:
|
||||
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
|
||||
GEMINI_MODEL: '${{ matrix.model }}'
|
||||
RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}"
|
||||
RUN_EVALS: 'true'
|
||||
EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
|
||||
EVAL_SUITE_NAME: '${{ github.event.inputs.suite_name }}'
|
||||
TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
|
||||
# Disable Vitest internal retries to avoid double-retrying;
|
||||
# custom retry logic is handled in evals/test-helper.ts
|
||||
|
||||
Reference in New Issue
Block a user