diff --git a/evals/ask_user.eval.ts b/evals/ask_user.eval.ts index 580081108a..60d89f7b5b 100644 --- a/evals/ask_user.eval.ts +++ b/evals/ask_user.eval.ts @@ -31,7 +31,7 @@ function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { } describe('ask_user', () => { - askUserEvalTest('ALWAYS_PASSES', { + askUserEvalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', name: 'Agent uses AskUser tool to present multiple choice options', @@ -48,7 +48,7 @@ describe('ask_user', () => { }, }); - askUserEvalTest('ALWAYS_PASSES', { + askUserEvalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', name: 'Agent uses AskUser tool to clarify ambiguous requirements', @@ -68,7 +68,7 @@ describe('ask_user', () => { }, }); - askUserEvalTest('ALWAYS_PASSES', { + askUserEvalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', name: 'Agent uses AskUser tool before performing significant ambiguous rework', @@ -110,7 +110,7 @@ describe('ask_user', () => { // confirm shell commands. Fixed via prompt refinements and tool definition // updates to clarify that shell command confirmation is handled by the UI. // See fix: https://github.com/google-gemini/gemini-cli/pull/20504 - askUserEvalTest('ALWAYS_PASSES', { + askUserEvalTest('USUALLY_PASSES', { suiteName: 'default', suiteType: 'behavioral', name: 'Agent does NOT use AskUser to confirm shell commands',