gemini-cli/evals/ask_user.eval.ts

/**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import { describe, expect } from 'vitest';
import { appEvalTest, AppEvalCase } from './app-test-helper.js';
import { EvalPolicy } from './test-helper.js';

function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
  return appEvalTest(policy, {
    ...evalCase,
    configOverrides: {
      ...evalCase.configOverrides,
      general: {
        ...evalCase.configOverrides?.general,
        approvalMode: 'default',
        enableAutoUpdate: false,
        enableAutoUpdateNotification: false,
      },
    },
    files: {
      ...evalCase.files,
    },
  });
}

describe('ask_user', () => {
  askUserEvalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool to present multiple choice options',
    prompt: `Use the ask_user tool to ask me what my favorite color is. Provide 3 options: red, green, or blue.`,
    setup: async (rig) => {
      rig.setBreakpoint(['ask_user']);
    },
    assert: async (rig) => {
      const confirmation = await rig.waitForPendingConfirmation('ask_user');
      expect(
        confirmation,
        'Expected a pending confirmation for ask_user tool',
      ).toBeDefined();
    },
  });

  askUserEvalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool to clarify ambiguous requirements',
    files: {
      'package.json': JSON.stringify({ name: 'my-app', version: '1.0.0' }),
    },
    prompt: `I want to build a new feature in this app. Ask me questions to clarify the requirements before proceeding.`,
    setup: async (rig) => {
      rig.setBreakpoint(['ask_user']);
    },
    assert: async (rig) => {
      const confirmation = await rig.waitForPendingConfirmation('ask_user');
      expect(
        confirmation,
        'Expected a pending confirmation for ask_user tool',
      ).toBeDefined();
    },
  });

  askUserEvalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool before performing significant ambiguous rework',
    files: {
      'packages/core/src/index.ts': '// index\nexport const version = "1.0.0";',
      'packages/core/src/util.ts': '// util\nexport function help() {}',
      'packages/core/package.json': JSON.stringify({
        name: '@google/gemini-cli-core',
      }),
      'README.md': '# Gemini CLI',
    },
    prompt: `I want to completely rewrite the core package to support the upcoming V2 architecture, but I haven't decided what that looks like yet. We need to figure out the requirements first. Can you ask me some questions to help nail down the design?`,
    setup: async (rig) => {
      rig.setBreakpoint(['enter_plan_mode', 'ask_user']);
    },
    assert: async (rig) => {
      // It might call enter_plan_mode first.
      let confirmation = await rig.waitForPendingConfirmation([
        'enter_plan_mode',
        'ask_user',
      ]);
      expect(confirmation, 'Expected a tool call confirmation').toBeDefined();

      if (confirmation?.name === 'enter_plan_mode') {
        rig.acceptConfirmation('enter_plan_mode');
        confirmation = await rig.waitForPendingConfirmation('ask_user');
      }

      expect(
        confirmation?.toolName,
        'Expected ask_user to be called to clarify the significant rework',
      ).toBe('ask_user');
    },
  });

  // --- Regression Tests for Recent Fixes ---

  // Regression test for issue #20177: Ensure the agent does not use \`ask_user\` to
  // confirm shell commands. Fixed via prompt refinements and tool definition
  // updates to clarify that shell command confirmation is handled by the UI.
  // See fix: https://github.com/google-gemini/gemini-cli/pull/20504
  askUserEvalTest('USUALLY_PASSES', {
    name: 'Agent does NOT use AskUser to confirm shell commands',
    files: {
      'package.json': JSON.stringify({
        scripts: { build: 'echo building' },
      }),
    },
    prompt: `Run 'npm run build' in the current directory.`,
    setup: async (rig) => {
      rig.setBreakpoint(['run_shell_command', 'ask_user']);
    },
    assert: async (rig) => {
      const confirmation = await rig.waitForPendingConfirmation([
        'run_shell_command',
        'ask_user',
      ]);

      expect(
        confirmation,
        'Expected a pending confirmation for a tool',
      ).toBeDefined();

      expect(
        confirmation?.toolName,
        'ask_user should not be called to confirm shell commands',
      ).toBe('run_shell_command');
    },
  });
});