diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts index 87109c1225..4a5ae46e3f 100644 --- a/evals/model_steering.eval.ts +++ b/evals/model_steering.eval.ts @@ -12,7 +12,7 @@ import { appEvalTest } from './app-test-helper.js'; import { PolicyDecision } from '@google/gemini-cli-core'; describe('Model Steering Behavioral Evals', () => { - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Corrective Hint: Model switches task based on hint during tool turn', configOverrides: { excludeTools: ['run_shell_command', 'ls', 'google_web_search'], @@ -52,7 +52,7 @@ describe('Model Steering Behavioral Evals', () => { }, }); - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Suggestive Hint: Model incorporates user guidance mid-stream', configOverrides: { excludeTools: ['run_shell_command', 'ls', 'google_web_search'],