feat(ci): isolate workflow evals into independent nightly job

- Splits 'Evals: Nightly' into 'evals' (general capabilities) and 'workflow-evals' (specific workflow simulations).
- 'workflow-evals' runs only on 'gemini-2.5-pro' (the target model).
- 'evals' excludes workflow tests to prevent noise/skewed metrics on other models.
- Removes code-level 'targetModels' restrictions in favor of CI configuration.
- Updates aggregation script to handle skipped tests correctly (though exclusion avoids them).
This commit is contained in:
cocosheng-g
2026-02-03 22:37:15 -05:00
parent d23499db90
commit 9da1542071
6 changed files with 42 additions and 47 deletions

View File

@@ -70,7 +70,8 @@ jobs:
$CMD -- -t "$PATTERN"
fi
else
$CMD
# Exclude workflow evals from the general matrix run
$CMD -- --exclude evals/workflows
fi
- name: 'Upload Logs'
@@ -81,9 +82,48 @@ jobs:
path: 'evals/logs'
retention-days: 7
workflow-evals:
name: 'Evals (Workflow) nightly run'
runs-on: 'gemini-cli-ubuntu-16-core'
steps:
- name: 'Checkout'
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
- name: 'Set up Node.js'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
with:
node-version-file: '.nvmrc'
cache: 'npm'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Create logs directory'
run: 'mkdir -p evals/logs'
- name: 'Run Workflow Evals'
continue-on-error: true
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_MODEL: 'gemini-2.5-pro'
RUN_EVALS: 'true'
# Explicitly target the workflow directory
run: 'npm run test:all_evals -- evals/workflows'
- name: 'Upload Logs'
if: 'always()'
uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4
with:
name: 'eval-logs-workflows-gemini-2.5-pro'
path: 'evals/logs'
retention-days: 7
aggregate-results:
name: 'Aggregate Results'
needs: ['evals']
needs: ['evals', 'workflow-evals']
if: 'always()'
runs-on: 'gemini-cli-ubuntu-16-core'
steps: