Merge branch 'main' into alisa/eval_suites

This commit is contained in:
Alisa Novikova
2026-04-08 21:03:01 -07:00
228 changed files with 8385 additions and 3279 deletions
+2
View File
@@ -335,6 +335,8 @@ jobs:
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_MODEL: 'gemini-3-pro-preview'
# Only run always passes behavioral tests.
EVAL_SUITE_TYPE: 'behavioral'
# Disable Vitest internal retries to avoid double-retrying;
# custom retry logic is handled in evals/test-helper.ts
VITEST_RETRY: 0
+15 -5
View File
@@ -5,10 +5,18 @@ on:
- cron: '0 1 * * *' # Runs at 1 AM every day
workflow_dispatch:
inputs:
run_all:
description: 'Run all evaluations (including usually passing)'
type: 'boolean'
default: true
suite_type:
description: 'Suite type to run'
type: 'choice'
options:
- 'behavioral'
- 'component-level'
- 'hero-scenario'
default: 'behavioral'
suite_name:
description: 'Specific suite name to run'
required: false
type: 'string'
test_name_pattern:
description: 'Test name pattern or file name'
required: false
@@ -59,7 +67,9 @@ jobs:
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
GEMINI_MODEL: '${{ matrix.model }}'
RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}"
RUN_EVALS: 'true'
EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
EVAL_SUITE_NAME: '${{ github.event.inputs.suite_name }}'
TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
# Disable Vitest internal retries to avoid double-retrying;
# custom retry logic is handled in evals/test-helper.ts
+33
View File
@@ -0,0 +1,33 @@
name: 'Memory Tests: Nightly'
on:
schedule:
- cron: '0 2 * * *' # Runs at 2 AM every day
workflow_dispatch: # Allow manual trigger
permissions:
contents: 'read'
jobs:
memory-test:
name: 'Run Memory Usage Tests'
runs-on: 'gemini-cli-ubuntu-16-core'
if: "github.repository == 'google-gemini/gemini-cli'"
steps:
- name: 'Checkout'
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
- name: 'Set up Node.js'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
with:
node-version-file: '.nvmrc'
cache: 'npm'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Run Memory Tests'
run: 'npm run test:memory'
+33
View File
@@ -0,0 +1,33 @@
name: 'Performance Tests: Nightly'
on:
schedule:
- cron: '0 3 * * *' # Runs at 3 AM every day
workflow_dispatch: # Allow manual trigger
permissions:
contents: 'read'
jobs:
perf-test:
name: 'Run Performance Usage Tests'
runs-on: 'gemini-cli-ubuntu-16-core'
if: "github.repository == 'google-gemini/gemini-cli'"
steps:
- name: 'Checkout'
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
- name: 'Set up Node.js'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
with:
node-version-file: '.nvmrc'
cache: 'npm'
- name: 'Install dependencies'
run: 'npm ci'
- name: 'Build project'
run: 'npm run build'
- name: 'Run Performance Tests'
run: 'npm run test:perf'