mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
Update evals workflow to use new infra.
This commit is contained in:
+16
-32
@@ -7,42 +7,26 @@ defaults:
|
||||
run:
|
||||
shell: 'bash'
|
||||
|
||||
permissions:
|
||||
contents: 'read'
|
||||
id-token: 'write'
|
||||
packages: 'read'
|
||||
|
||||
jobs:
|
||||
eval:
|
||||
name: 'Eval'
|
||||
trigger-bench-hub:
|
||||
name: 'Trigger BenchHub E2E'
|
||||
if: >-
|
||||
github.repository == 'google-gemini/gemini-cli'
|
||||
runs-on: 'ubuntu-latest'
|
||||
container:
|
||||
image: 'ghcr.io/google-gemini/gemini-cli-swe-agent-eval@sha256:cd5edc4afd2245c1f575e791c0859b3c084a86bb3bd9a6762296da5162b35a8f'
|
||||
credentials:
|
||||
username: '${{ github.actor }}'
|
||||
password: '${{ secrets.GITHUB_TOKEN }}'
|
||||
env:
|
||||
GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
|
||||
DEFAULT_VERTEXAI_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||
GOOGLE_CLOUD_PROJECT: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||
GEMINI_API_KEY: '${{ secrets.EVAL_GEMINI_API_KEY }}'
|
||||
GCLI_LOCAL_FILE_TELEMETRY: 'True'
|
||||
EVAL_GCS_BUCKET: '${{ vars.EVAL_GCS_ARTIFACTS_BUCKET }}'
|
||||
steps:
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: 'google-github-actions/auth@c200f3691d83b41bf9bbd8638997a462592937ed' # ratchet:exclude pin@v2.1.7
|
||||
- name: 'Generate token'
|
||||
id: 'generate_token'
|
||||
uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2
|
||||
with:
|
||||
project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
|
||||
workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
|
||||
service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
|
||||
token_format: 'access_token'
|
||||
access_token_scopes: 'https://www.googleapis.com/auth/cloud-platform'
|
||||
app-id: '${{ secrets.APP_ID }}'
|
||||
private-key: '${{ secrets.PRIVATE_KEY }}'
|
||||
owner: 'googlecloud-appeco-incubator'
|
||||
repositories: 'bench-hub'
|
||||
|
||||
- name: 'Run evaluation'
|
||||
working-directory: '/app'
|
||||
run: |
|
||||
poetry run exp_run --experiment-mode=on-demand --branch-or-commit="${GITHUB_REF_NAME}" --model-name=gemini-2.5-pro --dataset=swebench_verified --concurrency=15
|
||||
poetry run python agent_prototypes/scripts/parse_gcli_logs_experiment.py --experiment_dir=experiments/adhoc/gcli_temp_exp --gcs-bucket="${EVAL_GCS_BUCKET}" --gcs-path=gh_action_artifacts
|
||||
- name: 'Repository Dispatch'
|
||||
uses: 'peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0' # ratchet:peter-evans/repository-dispatch@v3
|
||||
with:
|
||||
token: '${{ steps.generate_token.outputs.token }}'
|
||||
repository: 'googlecloud-appeco-incubator/bench-hub'
|
||||
event-type: 'gemini_cli_updated'
|
||||
client-payload: '{"branch": "${{ github.ref_name }}"}'
|
||||
|
||||
Reference in New Issue
Block a user