2026-03-03 12:51:15 -05:00
/ * *
* @license
* Copyright 2026 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
import { describe , expect } from 'vitest' ;
2026-03-13 09:30:19 -04:00
import { appEvalTest , AppEvalCase } from './app-test-helper.js' ;
import { EvalPolicy } from './test-helper.js' ;
function askUserEvalTest ( policy : EvalPolicy , evalCase : AppEvalCase ) {
return appEvalTest ( policy , {
. . . evalCase ,
configOverrides : {
. . . evalCase . configOverrides ,
general : {
. . . evalCase . configOverrides ? . general ,
approvalMode : 'default' ,
enableAutoUpdate : false ,
enableAutoUpdateNotification : false ,
} ,
} ,
files : {
. . . evalCase . files ,
} ,
} ) ;
}
2026-03-03 12:51:15 -05:00
describe ( 'ask_user' , ( ) = > {
2026-03-13 09:30:19 -04:00
askUserEvalTest ( 'USUALLY_PASSES' , {
2026-03-03 12:51:15 -05:00
name : 'Agent uses AskUser tool to present multiple choice options' ,
prompt : ` Use the ask_user tool to ask me what my favorite color is. Provide 3 options: red, green, or blue. ` ,
2026-03-13 09:30:19 -04:00
setup : async ( rig ) = > {
rig . setBreakpoint ( [ 'ask_user' ] ) ;
} ,
2026-03-03 12:51:15 -05:00
assert : async ( rig ) = > {
2026-03-13 09:30:19 -04:00
const confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
expect (
confirmation ,
'Expected a pending confirmation for ask_user tool' ,
) . toBeDefined ( ) ;
2026-03-03 12:51:15 -05:00
} ,
} ) ;
2026-03-13 09:30:19 -04:00
askUserEvalTest ( 'USUALLY_PASSES' , {
2026-03-03 12:51:15 -05:00
name : 'Agent uses AskUser tool to clarify ambiguous requirements' ,
files : {
'package.json' : JSON . stringify ( { name : 'my-app' , version : '1.0.0' } ) ,
} ,
prompt : ` I want to build a new feature in this app. Ask me questions to clarify the requirements before proceeding. ` ,
2026-03-13 09:30:19 -04:00
setup : async ( rig ) = > {
rig . setBreakpoint ( [ 'ask_user' ] ) ;
} ,
2026-03-03 12:51:15 -05:00
assert : async ( rig ) = > {
2026-03-13 09:30:19 -04:00
const confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
expect (
confirmation ,
'Expected a pending confirmation for ask_user tool' ,
) . toBeDefined ( ) ;
2026-03-03 12:51:15 -05:00
} ,
} ) ;
2026-03-13 09:30:19 -04:00
askUserEvalTest ( 'USUALLY_PASSES' , {
2026-03-03 12:51:15 -05:00
name : 'Agent uses AskUser tool before performing significant ambiguous rework' ,
files : {
'packages/core/src/index.ts' : '// index\nexport const version = "1.0.0";' ,
'packages/core/src/util.ts' : '// util\nexport function help() {}' ,
'packages/core/package.json' : JSON . stringify ( {
name : '@google/gemini-cli-core' ,
} ) ,
'README.md' : '# Gemini CLI' ,
} ,
2026-03-13 09:30:19 -04:00
prompt : ` I want to completely rewrite the core package to support the upcoming V2 architecture, but I haven't decided what that looks like yet. We need to figure out the requirements first. Can you ask me some questions to help nail down the design? ` ,
setup : async ( rig ) = > {
rig . setBreakpoint ( [ 'enter_plan_mode' , 'ask_user' ] ) ;
} ,
2026-03-03 12:51:15 -05:00
assert : async ( rig ) = > {
2026-03-13 09:30:19 -04:00
// It might call enter_plan_mode first.
let confirmation = await rig . waitForPendingConfirmation ( [
'enter_plan_mode' ,
'ask_user' ,
] ) ;
expect ( confirmation , 'Expected a tool call confirmation' ) . toBeDefined ( ) ;
if ( confirmation ? . name === 'enter_plan_mode' ) {
rig . acceptConfirmation ( 'enter_plan_mode' ) ;
confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
}
2026-03-03 12:51:15 -05:00
expect (
2026-03-13 09:30:19 -04:00
confirmation ? . toolName ,
'Expected ask_user to be called to clarify the significant rework' ,
) . toBe ( 'ask_user' ) ;
2026-03-03 12:51:15 -05:00
} ,
} ) ;
// --- Regression Tests for Recent Fixes ---
2026-03-13 09:30:19 -04:00
// Regression test for issue #20177: Ensure the agent does not use \`ask_user\` to
2026-03-03 12:51:15 -05:00
// confirm shell commands. Fixed via prompt refinements and tool definition
// updates to clarify that shell command confirmation is handled by the UI.
// See fix: https://github.com/google-gemini/gemini-cli/pull/20504
2026-03-13 09:30:19 -04:00
askUserEvalTest ( 'USUALLY_PASSES' , {
2026-03-03 12:51:15 -05:00
name : 'Agent does NOT use AskUser to confirm shell commands' ,
files : {
'package.json' : JSON . stringify ( {
scripts : { build : 'echo building' } ,
} ) ,
} ,
prompt : ` Run 'npm run build' in the current directory. ` ,
2026-03-13 09:30:19 -04:00
setup : async ( rig ) = > {
rig . setBreakpoint ( [ 'run_shell_command' , 'ask_user' ] ) ;
} ,
2026-03-03 12:51:15 -05:00
assert : async ( rig ) = > {
2026-03-13 09:30:19 -04:00
const confirmation = await rig . waitForPendingConfirmation ( [
'run_shell_command' ,
'ask_user' ,
] ) ;
2026-03-03 12:51:15 -05:00
expect (
2026-03-13 09:30:19 -04:00
confirmation ,
'Expected a pending confirmation for a tool' ,
) . toBeDefined ( ) ;
2026-03-03 12:51:15 -05:00
expect (
2026-03-13 09:30:19 -04:00
confirmation ? . toolName ,
2026-03-03 12:51:15 -05:00
'ask_user should not be called to confirm shell commands' ,
2026-03-13 09:30:19 -04:00
) . toBe ( 'run_shell_command' ) ;
2026-03-03 12:51:15 -05:00
} ,
} ) ;
} ) ;