From fbb17ebf58dfb753dfb8bc6ef0b4ae44c831d0dd Mon Sep 17 00:00:00 2001 From: Alisa <62909685+alisa-alisa@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:52:35 -0700 Subject: [PATCH] Disabling failing test while investigating (#23311) --- evals/model_steering.eval.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts index 87109c1225..4a5ae46e3f 100644 --- a/evals/model_steering.eval.ts +++ b/evals/model_steering.eval.ts @@ -12,7 +12,7 @@ import { appEvalTest } from './app-test-helper.js'; import { PolicyDecision } from '@google/gemini-cli-core'; describe('Model Steering Behavioral Evals', () => { - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Corrective Hint: Model switches task based on hint during tool turn', configOverrides: { excludeTools: ['run_shell_command', 'ls', 'google_web_search'], @@ -52,7 +52,7 @@ describe('Model Steering Behavioral Evals', () => { }, }); - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Suggestive Hint: Model incorporates user guidance mid-stream', configOverrides: { excludeTools: ['run_shell_command', 'ls', 'google_web_search'],