mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-13 05:12:55 -07:00
feat(actions): add swebench eval harness to github actions (#8028)
Co-authored-by: matt korwel <matt.korwel@gmail.com>
This commit is contained in:
+36
-17
@@ -3,27 +3,46 @@ name: 'Eval'
|
|||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: 'bash'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: 'read'
|
||||||
|
id-token: 'write'
|
||||||
|
packages: 'read'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
eval:
|
eval:
|
||||||
name: 'Eval'
|
name: 'Eval'
|
||||||
|
if: >-
|
||||||
|
github.repository == 'google-gemini/gemini-cli'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
strategy:
|
container:
|
||||||
matrix:
|
image: 'ghcr.io/google-gemini/gemini-cli-swe-agent-eval@sha256:cd5edc4afd2245c1f575e791c0859b3c084a86bb3bd9a6762296da5162b35a8f'
|
||||||
node-version:
|
credentials:
|
||||||
- '20.x'
|
username: '${{ github.actor }}'
|
||||||
- '22.x'
|
password: '${{ secrets.GITHUB_TOKEN }}'
|
||||||
- '24.x'
|
env:
|
||||||
|
GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
|
||||||
|
DEFAULT_VERTEXAI_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||||
|
GOOGLE_CLOUD_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||||
|
GEMINI_API_KEY: '${{ secrets.EVAL_GEMINI_API_KEY }}'
|
||||||
|
GCLI_LOCAL_FILE_TELEMETRY: 'True'
|
||||||
|
EVAL_GCS_BUCKET: '${{ vars.EVAL_GCS_ARTIFACTS_BUCKET }}'
|
||||||
steps:
|
steps:
|
||||||
- name: 'Set up Node.js ${{ matrix.node-version }}'
|
- name: 'Authenticate to Google Cloud'
|
||||||
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
|
id: 'auth'
|
||||||
|
uses: 'google-github-actions/auth@v2' # ratchet:exclude
|
||||||
with:
|
with:
|
||||||
node-version: '${{ matrix.node-version }}'
|
project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||||
cache: 'npm'
|
workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
|
||||||
|
service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
|
||||||
|
token_format: 'access_token'
|
||||||
|
access_token_scopes: 'https://www.googleapis.com/auth/cloud-platform'
|
||||||
|
|
||||||
- name: 'Set up Python'
|
- name: 'Run evaluation'
|
||||||
uses: 'actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065' # ratchet:actions/setup-python@v5
|
working-directory: '/app'
|
||||||
with:
|
run: |
|
||||||
python-version: '3.11'
|
poetry run exp_run --experiment-mode=on-demand --branch-or-commit=${{ github.ref_name }} --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15
|
||||||
|
poetry run python agent_prototypes/scripts/parse_gcli_logs_experiment.py --experiment_dir=experiments/adhoc/gcli_temp_exp --gcs-bucket="${EVAL_GCS_BUCKET}" --gcs-path=gh_action_artifacts
|
||||||
- name: 'Install and configure Poetry'
|
|
||||||
uses: 'snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a' # ratchet:snok/install-poetry@v1
|
|
||||||
|
|||||||
Reference in New Issue
Block a user