From 46231a1755e8e03bc8c026b98c7e3a962b318a2c Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Tue, 3 Mar 2026 00:29:31 +0000 Subject: [PATCH] ci(evals): only run evals in CI if prompts or tools changed (#20898) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .github/workflows/chained_e2e.yml | 15 ++++++++-- scripts/changed_prompt.js | 49 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 scripts/changed_prompt.js diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index 2e1586bcd4..3633c5027b 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -290,6 +290,7 @@ jobs: with: ref: '${{ needs.parse_run_context.outputs.sha }}' repository: '${{ needs.parse_run_context.outputs.repository }}' + fetch-depth: 0 - name: 'Set up Node.js 20.x' uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4 @@ -302,7 +303,14 @@ jobs: - name: 'Build project' run: 'npm run build' - - name: 'Run Evals (ALWAYS_PASSING)' + - name: 'Check if evals should run' + id: 'check_evals' + run: | + SHOULD_RUN=$(node scripts/changed_prompt.js) + echo "should_run=$SHOULD_RUN" >> "$GITHUB_OUTPUT" + + - name: 'Run Evals (Required to pass)' + if: "${{ steps.check_evals.outputs.should_run == 'true' }}" env: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' run: 'npm run test:always_passing_evals' @@ -315,6 +323,7 @@ jobs: - 'e2e_linux' - 'e2e_mac' - 'e2e_windows' + - 'evals' - 'merge_queue_skipper' runs-on: 'gemini-cli-ubuntu-16-core' steps: @@ -322,7 +331,8 @@ jobs: run: | if [[ ${NEEDS_E2E_LINUX_RESULT} != 'success' || \ ${NEEDS_E2E_MAC_RESULT} != 'success' || \ - ${NEEDS_E2E_WINDOWS_RESULT} != 'success' ]]; then + ${NEEDS_E2E_WINDOWS_RESULT} != 'success' || \ + ${NEEDS_EVALS_RESULT} != 'success' ]]; then echo "One or more E2E jobs failed." exit 1 fi @@ -331,6 +341,7 @@ jobs: NEEDS_E2E_LINUX_RESULT: '${{ needs.e2e_linux.result }}' NEEDS_E2E_MAC_RESULT: '${{ needs.e2e_mac.result }}' NEEDS_E2E_WINDOWS_RESULT: '${{ needs.e2e_windows.result }}' + NEEDS_EVALS_RESULT: '${{ needs.evals.result }}' set_workflow_status: runs-on: 'gemini-cli-ubuntu-16-core' diff --git a/scripts/changed_prompt.js b/scripts/changed_prompt.js new file mode 100644 index 0000000000..9cf7c1a261 --- /dev/null +++ b/scripts/changed_prompt.js @@ -0,0 +1,49 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { execSync } from 'node:child_process'; + +const EVALS_FILE_PREFIXES = [ + 'packages/core/src/prompts/', + 'packages/core/src/tools/', + 'evals/', +]; + +function main() { + const targetBranch = process.env.GITHUB_BASE_REF || 'main'; + try { + // Fetch target branch from origin. + execSync(`git fetch origin ${targetBranch}`, { + stdio: 'ignore', + }); + + // Find the merge base with the target branch. + const mergeBase = execSync('git merge-base HEAD FETCH_HEAD', { + encoding: 'utf-8', + }).trim(); + + // Get changed files + const changedFiles = execSync(`git diff --name-only ${mergeBase} HEAD`, { + encoding: 'utf-8', + }) + .split('\n') + .filter(Boolean); + + const shouldRun = changedFiles.some((file) => + EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)), + ); + + console.log(shouldRun ? 'true' : 'false'); + } catch (error) { + // If anything fails (e.g., no git history), run evals to be safe + console.warn( + 'Warning: Failed to determine if evals should run. Defaulting to true.', + ); + console.error(error); + console.log('true'); + } +} + +main();