diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml index 76e4da22a3..c64d59ec77 100644 --- a/.github/workflows/evals-nightly.yml +++ b/.github/workflows/evals-nightly.yml @@ -9,6 +9,10 @@ on: description: 'Run all evaluations (including usually passing)' type: 'boolean' default: true + test_filter: + description: 'Regex to filter tests (passed to vitest -t)' + type: 'string' + required: false permissions: contents: 'read' @@ -53,7 +57,12 @@ jobs: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' GEMINI_MODEL: '${{ matrix.model }}' RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" - run: 'npm run test:all_evals' + run: | + if [ -n "${{ inputs.test_filter }}" ]; then + npm run test:all_evals -- -t "${{ inputs.test_filter }}" + else + npm run test:all_evals + fi - name: 'Upload Logs' if: 'always()' diff --git a/package.json b/package.json index 08c7a7ccd6..fe191fe813 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", "test:always_passing_evals": "vitest run --config evals/vitest.config.ts", + "test:eval": "cross-env RUN_EVALS=1 vitest run --config evals/vitest.config.ts", "test:all_evals": "cross-env RUN_EVALS=1 vitest run --config evals/vitest.config.ts", "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none", "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",