diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index 23dc1cfdfb..7a8a038f76 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -7,42 +7,26 @@ defaults: run: shell: 'bash' -permissions: - contents: 'read' - id-token: 'write' - packages: 'read' - jobs: - eval: - name: 'Eval' + trigger-bench-hub: + name: 'Trigger BenchHub E2E' if: >- github.repository == 'google-gemini/gemini-cli' runs-on: 'ubuntu-latest' - container: - image: 'ghcr.io/google-gemini/gemini-cli-swe-agent-eval@sha256:cd5edc4afd2245c1f575e791c0859b3c084a86bb3bd9a6762296da5162b35a8f' - credentials: - username: '${{ github.actor }}' - password: '${{ secrets.GITHUB_TOKEN }}' - env: - GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' - DEFAULT_VERTEXAI_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}' - GOOGLE_CLOUD_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}' - GEMINI_API_KEY: '${{ secrets.EVAL_GEMINI_API_KEY }}' - GCLI_LOCAL_FILE_TELEMETRY: 'True' - EVAL_GCS_BUCKET: '${{ vars.EVAL_GCS_ARTIFACTS_BUCKET }}' steps: - - name: 'Authenticate to Google Cloud' - id: 'auth' - uses: 'google-github-actions/auth@c200f3691d83b41bf9bbd8638997a462592937ed' # ratchet:exclude pin@v2.1.7 + - name: 'Generate token' + id: 'generate_token' + uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2 with: - project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}' - workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}' - service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}' - token_format: 'access_token' - access_token_scopes: 'https://www.googleapis.com/auth/cloud-platform' + app-id: '${{ secrets.APP_ID }}' + private-key: '${{ secrets.PRIVATE_KEY }}' + owner: 'googlecloud-appeco-incubator' + repositories: 'bench-hub' - - name: 'Run evaluation' - working-directory: '/app' - run: | - poetry run exp_run --experiment-mode=on-demand --branch-or-commit="${GITHUB_REF_NAME}" --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15 - poetry run python agent_prototypes/scripts/parse_gcli_logs_experiment.py --experiment_dir=experiments/adhoc/gcli_temp_exp --gcs-bucket="${EVAL_GCS_BUCKET}" --gcs-path=gh_action_artifacts + - name: 'Repository Dispatch' + uses: 'peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0' # ratchet:peter-evans/repository-dispatch@v3 + with: + token: '${{ steps.generate_token.outputs.token }}' + repository: 'googlecloud-appeco-incubator/bench-hub' + event-type: 'gemini_cli_updated' + client-payload: '{"branch": "${{ github.ref_name }}"}'