feat(ui): implement refreshed UX for Composer layout

- Promotes refreshed multi-row status area and footer as the default experience. - Stabilizes Composer row heights to prevent layout 'jitter' during typing and model turns. - Unifies active hook status and model loading indicators into a single, stable Row 1. - Refactors settings to use backward-compatible 'Hide' booleans (ui.hideStatusTips, ui.hideStatusWit). - Removes vestigial context usage bleed-through logic in minimal mode to align with global UX direction. - Relocates toast notifications to the top status row for improved visibility. - Updates all CLI UI snapshots and architectural tests to reflect the stabilized layout.
2026-04-03 01:40:59 -07:00 · 2026-03-17 15:00:53 -07:00
parent ff196fbe6f
commit 576eaff9cd
371 changed files with 4713 additions and 14249 deletions
--- a/evals/answer-vs-act.eval.ts
+++ b/evals/answer-vs-act.eval.ts
@@ -111,7 +111,7 @@ describe('Answer vs. ask eval', () => {
   * Ensures that when the user asks a question about style, the agent does NOT
   * automatically modify the file.
   */
-  evalTest('ALWAYS_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: 'should not edit files when asked about style',
    prompt: 'Is app.ts following good style?',
    files: FILES,
--- a/evals/ask_user.eval.ts
+++ b/evals/ask_user.eval.ts
@@ -5,62 +5,31 @@
 */

 import { describe, expect } from 'vitest';
-import { appEvalTest, AppEvalCase } from './app-test-helper.js';
-import { EvalPolicy } from './test-helper.js';
-
-function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
-  return appEvalTest(policy, {
-    ...evalCase,
-    configOverrides: {
-      ...evalCase.configOverrides,
-      general: {
-        ...evalCase.configOverrides?.general,
-        approvalMode: 'default',
-        enableAutoUpdate: false,
-        enableAutoUpdateNotification: false,
-      },
-    },
-    files: {
-      ...evalCase.files,
-    },
-  });
-}
+import { evalTest } from './test-helper.js';

 describe('ask_user', () => {
-  askUserEvalTest('USUALLY_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool to present multiple choice options',
    prompt: `Use the ask_user tool to ask me what my favorite color is. Provide 3 options: red, green, or blue.`,
-    setup: async (rig) => {
-      rig.setBreakpoint(['ask_user']);
-    },
    assert: async (rig) => {
-      const confirmation = await rig.waitForPendingConfirmation('ask_user');
-      expect(
-        confirmation,
-        'Expected a pending confirmation for ask_user tool',
-      ).toBeDefined();
+      const wasToolCalled = await rig.waitForToolCall('ask_user');
+      expect(wasToolCalled, 'Expected ask_user tool to be called').toBe(true);
    },
  });

-  askUserEvalTest('USUALLY_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool to clarify ambiguous requirements',
    files: {
      'package.json': JSON.stringify({ name: 'my-app', version: '1.0.0' }),
    },
    prompt: `I want to build a new feature in this app. Ask me questions to clarify the requirements before proceeding.`,
-    setup: async (rig) => {
-      rig.setBreakpoint(['ask_user']);
-    },
    assert: async (rig) => {
-      const confirmation = await rig.waitForPendingConfirmation('ask_user');
-      expect(
-        confirmation,
-        'Expected a pending confirmation for ask_user tool',
-      ).toBeDefined();
+      const wasToolCalled = await rig.waitForToolCall('ask_user');
+      expect(wasToolCalled, 'Expected ask_user tool to be called').toBe(true);
    },
  });

-  askUserEvalTest('USUALLY_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: 'Agent uses AskUser tool before performing significant ambiguous rework',
    files: {
      'packages/core/src/index.ts': '// index\nexport const version = "1.0.0";',
@@ -70,37 +39,28 @@ describe('ask_user', () => {
      }),
      'README.md': '# Gemini CLI',
    },
-    prompt: `I want to completely rewrite the core package to support the upcoming V2 architecture, but I haven't decided what that looks like yet. We need to figure out the requirements first. Can you ask me some questions to help nail down the design?`,
-    setup: async (rig) => {
-      rig.setBreakpoint(['enter_plan_mode', 'ask_user']);
-    },
+    prompt: `Refactor the entire core package to be better.`,
    assert: async (rig) => {
-      // It might call enter_plan_mode first.
-      let confirmation = await rig.waitForPendingConfirmation([
-        'enter_plan_mode',
-        'ask_user',
-      ]);
-      expect(confirmation, 'Expected a tool call confirmation').toBeDefined();
-
-      if (confirmation?.name === 'enter_plan_mode') {
-        rig.acceptConfirmation('enter_plan_mode');
-        confirmation = await rig.waitForPendingConfirmation('ask_user');
-      }
+      const wasPlanModeCalled = await rig.waitForToolCall('enter_plan_mode');
+      expect(wasPlanModeCalled, 'Expected enter_plan_mode to be called').toBe(
+        true,
+      );

+      const wasAskUserCalled = await rig.waitForToolCall('ask_user');
      expect(
-        confirmation?.toolName,
-        'Expected ask_user to be called to clarify the significant rework',
-      ).toBe('ask_user');
+        wasAskUserCalled,
+        'Expected ask_user tool to be called to clarify the significant rework',
+      ).toBe(true);
    },
  });

  // --- Regression Tests for Recent Fixes ---

-  // Regression test for issue #20177: Ensure the agent does not use \`ask_user\` to
+  // Regression test for issue #20177: Ensure the agent does not use `ask_user` to
  // confirm shell commands. Fixed via prompt refinements and tool definition
  // updates to clarify that shell command confirmation is handled by the UI.
  // See fix: https://github.com/google-gemini/gemini-cli/pull/20504
-  askUserEvalTest('USUALLY_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: 'Agent does NOT use AskUser to confirm shell commands',
    files: {
      'package.json': JSON.stringify({
@@ -108,24 +68,25 @@ describe('ask_user', () => {
      }),
    },
    prompt: `Run 'npm run build' in the current directory.`,
-    setup: async (rig) => {
-      rig.setBreakpoint(['run_shell_command', 'ask_user']);
-    },
    assert: async (rig) => {
-      const confirmation = await rig.waitForPendingConfirmation([
-        'run_shell_command',
-        'ask_user',
-      ]);
+      await rig.waitForTelemetryReady();
+
+      const toolLogs = rig.readToolLogs();
+      const wasShellCalled = toolLogs.some(
+        (log) => log.toolRequest.name === 'run_shell_command',
+      );
+      const wasAskUserCalled = toolLogs.some(
+        (log) => log.toolRequest.name === 'ask_user',
+      );

      expect(
-        confirmation,
-        'Expected a pending confirmation for a tool',
-      ).toBeDefined();
-
+        wasShellCalled,
+        'Expected run_shell_command tool to be called',
+      ).toBe(true);
      expect(
-        confirmation?.toolName,
+        wasAskUserCalled,
        'ask_user should not be called to confirm shell commands',
-      ).toBe('run_shell_command');
+      ).toBe(false);
    },
  });
 });
--- a/evals/hierarchical_memory.eval.ts
+++ b/evals/hierarchical_memory.eval.ts
@@ -11,7 +11,7 @@ import { assertModelHasOutput } from '../integration-tests/test-helper.js';
 describe('Hierarchical Memory', () => {
  const conflictResolutionTest =
    'Agent follows hierarchy for contradictory instructions';
-  evalTest('ALWAYS_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: conflictResolutionTest,
    params: {
      settings: {
--- a/evals/save_memory.eval.ts
+++ b/evals/save_memory.eval.ts
@@ -14,7 +14,7 @@ import {
 describe('save_memory', () => {
  const TEST_PREFIX = 'Save memory test: ';
  const rememberingFavoriteColor = "Agent remembers user's favorite color";
-  evalTest('ALWAYS_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: rememberingFavoriteColor,
    params: {
      settings: { tools: { core: ['save_memory'] } },
@@ -79,7 +79,7 @@ describe('save_memory', () => {

  const ignoringTemporaryInformation =
    'Agent ignores temporary conversation details';
-  evalTest('ALWAYS_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: ignoringTemporaryInformation,
    params: {
      settings: { tools: { core: ['save_memory'] } },
@@ -104,7 +104,7 @@ describe('save_memory', () => {
  });

  const rememberingPetName = "Agent remembers user's pet's name";
-  evalTest('ALWAYS_PASSES', {
+  evalTest('USUALLY_PASSES', {
    name: rememberingPetName,
    params: {
      settings: { tools: { core: ['save_memory'] } },