name: 'Evals: Nightly' on: schedule: - cron: '0 1 * * *' # Runs at 1 AM every day workflow_dispatch: inputs: run_all: description: 'Run all evaluations (including usually passing)' type: 'boolean' default: true test_name_pattern: description: 'Test name pattern or file name' required: false type: 'string' permissions: contents: 'read' checks: 'write' actions: 'read' jobs: evals: name: 'Evals (USUALLY_PASSING) nightly run' runs-on: 'gemini-cli-ubuntu-16-core' if: "github.repository == 'google-gemini/gemini-cli'" strategy: fail-fast: false matrix: model: - 'gemini-3.1-pro-preview-customtools' - 'gemini-3-pro-preview' - 'gemini-3-flash-preview' - 'gemini-2.5-pro' - 'gemini-2.5-flash' - 'gemini-2.5-flash-lite' run_attempt: [1, 2, 3] steps: - name: 'Checkout' uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 - name: 'Set up Node.js' uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version-file: '.nvmrc' cache: 'npm' - name: 'Install dependencies' run: 'npm ci' - name: 'Build project' run: 'npm run build' - name: 'Create logs directory' run: 'mkdir -p evals/logs' - name: 'Run Evals' continue-on-error: true env: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' GEMINI_MODEL: '${{ matrix.model }}' RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' run: | CMD="npm run test:all_evals" PATTERN="${TEST_NAME_PATTERN}" if [[ -n "$PATTERN" ]]; then if [[ "$PATTERN" == *.ts || "$PATTERN" == *.js || "$PATTERN" == */* ]]; then $CMD -- "$PATTERN" else $CMD -- -t "$PATTERN" fi else $CMD fi - name: 'Upload Logs' if: 'always()' uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'eval-logs-${{ matrix.model }}-${{ matrix.run_attempt }}' path: 'evals/logs' retention-days: 7 aggregate-results: name: 'Aggregate Results' needs: ['evals'] if: "github.repository == 'google-gemini/gemini-cli' && always()" runs-on: 'gemini-cli-ubuntu-16-core' steps: - name: 'Checkout' uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 - name: 'Download Logs' uses: 'actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806' # ratchet:actions/download-artifact@v4 with: path: 'artifacts' - name: 'Generate Summary' env: GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' run: 'node scripts/aggregate_evals.js artifacts >> "$GITHUB_STEP_SUMMARY"'