fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)

2026-05-21 01:02:54 -07:00 · 2026-04-01 07:00:16 +08:00
parent 94f9480a3a
commit 782bb4e4bd
5 changed files with 144 additions and 128 deletions
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { execSync } from 'node:child_process';
 import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
+import { env } from 'node:process';
 import stripAnsi from 'strip-ansi';

+// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
+// See: https://github.com/google-gemini/gemini-cli/issues/24382
+const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
+
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);

@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
    await rig.cleanup();
  });

-  it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
-    rig.setup('browser-policy-skip-confirmation', {
-      fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
-      settings: {
-        agents: {
-          overrides: {
-            browser_agent: {
-              enabled: true,
+  it.skipIf(isDockerSandbox)(
+    'should skip confirmation when "Allow all server tools for this session" is chosen',
+    async () => {
+      rig.setup('browser-policy-skip-confirmation', {
+        fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
+        settings: {
+          agents: {
+            overrides: {
+              browser_agent: {
+                enabled: true,
+              },
+            },
+            browser: {
+              headless: true,
+              sessionMode: 'isolated',
+              allowedDomains: ['example.com'],
            },
          },
-          browser: {
-            headless: true,
-            sessionMode: 'isolated',
-            allowedDomains: ['example.com'],
-          },
        },
-      },
-    });
+      });

-    // Manually trust the folder to avoid the dialog and enable option 3
-    const geminiDir = join(rig.homeDir!, '.gemini');
-    mkdirSync(geminiDir, { recursive: true });
+      // Manually trust the folder to avoid the dialog and enable option 3
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      mkdirSync(geminiDir, { recursive: true });

-    // Write to trustedFolders.json
-    const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
-    const trustedFolders = {
-      [rig.testDir!]: 'TRUST_FOLDER',
-    };
-    writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
+      // Write to trustedFolders.json
+      const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
+      const trustedFolders = {
+        [rig.testDir!]: 'TRUST_FOLDER',
+      };
+      writeFileSync(
+        trustedFoldersPath,
+        JSON.stringify(trustedFolders, null, 2),
+      );

-    // Force confirmation for browser agent.
-    // NOTE: We don't force confirm browser tools here because "Allow all server tools"
-    // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
-    // a rule in the user tier (4.x) like the one from this TOML.
-    // By removing the explicit mcp rule, the first MCP tool will still prompt
-    // due to default approvalMode = 'default', and then "Allow all" will correctly
-    // bypass subsequent tools.
-    const policyFile = join(rig.testDir!, 'force-confirm.toml');
-    writeFileSync(
-      policyFile,
-      `
+      // Force confirmation for browser agent.
+      // NOTE: We don't force confirm browser tools here because "Allow all server tools"
+      // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
+      // a rule in the user tier (4.x) like the one from this TOML.
+      // By removing the explicit mcp rule, the first MCP tool will still prompt
+      // due to default approvalMode = 'default', and then "Allow all" will correctly
+      // bypass subsequent tools.
+      const policyFile = join(rig.testDir!, 'force-confirm.toml');
+      writeFileSync(
+        policyFile,
+        `
 [[rule]]
 name = "Force confirm browser_agent"
 toolName = "browser_agent"
 decision = "ask_user"
 priority = 200
 `,
-    );
+      );

-    // Update settings.json in both project and home directories to point to the policy file
-    for (const baseDir of [rig.testDir!, rig.homeDir!]) {
-      const settingsPath = join(baseDir, '.gemini', 'settings.json');
-      if (existsSync(settingsPath)) {
-        const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
-        settings.policyPaths = [policyFile];
-        // Ensure folder trust is enabled
-        settings.security = settings.security || {};
-        settings.security.folderTrust = settings.security.folderTrust || {};
-        settings.security.folderTrust.enabled = true;
-        writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+      // Update settings.json in both project and home directories to point to the policy file
+      for (const baseDir of [rig.testDir!, rig.homeDir!]) {
+        const settingsPath = join(baseDir, '.gemini', 'settings.json');
+        if (existsSync(settingsPath)) {
+          const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
+          settings.policyPaths = [policyFile];
+          // Ensure folder trust is enabled
+          settings.security = settings.security || {};
+          settings.security.folderTrust = settings.security.folderTrust || {};
+          settings.security.folderTrust.enabled = true;
+          writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+        }
      }
-    }

-    const run = await rig.runInteractive({
-      approvalMode: 'default',
-      env: {
-        GEMINI_CLI_INTEGRATION_TEST: 'true',
-      },
-    });
+      const run = await rig.runInteractive({
+        approvalMode: 'default',
+        env: {
+          GEMINI_CLI_INTEGRATION_TEST: 'true',
+        },
+      });

-    await run.sendKeys(
-      'Open https://example.com and check if there is a heading\r',
-    );
-    await run.sendKeys('\r');
+      await run.sendKeys(
+        'Open https://example.com and check if there is a heading\r',
+      );
+      await run.sendKeys('\r');

-    // Handle confirmations.
-    // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('action required'),
-      60000,
-      1000,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 2000));
+      // Handle confirmations.
+      // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('action required'),
+        60000,
+        1000,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 2000));

-    // Handle privacy notice
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
-      5000,
-      100,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 5000));
+      // Handle privacy notice
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
+        5000,
+        100,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 5000));

-    // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
-    await poll(
-      () => {
-        const stripped = stripAnsi(run.output).toLowerCase();
-        return (
-          stripped.includes('new_page') &&
-          stripped.includes('allow all server tools for this session')
-        );
-      },
-      60000,
-      1000,
-    );
+      // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
+      await poll(
+        () => {
+          const stripped = stripAnsi(run.output).toLowerCase();
+          return (
+            stripped.includes('new_page') &&
+            stripped.includes('allow all server tools for this session')
+          );
+        },
+        60000,
+        1000,
+      );

-    // Select "Allow all server tools for this session" (option 3)
-    await run.sendKeys('3\r');
-    await new Promise((r) => setTimeout(r, 30000));
+      // Select "Allow all server tools for this session" (option 3)
+      await run.sendKeys('3\r');
+      await new Promise((r) => setTimeout(r, 30000));

-    const output = stripAnsi(run.output).toLowerCase();
+      const output = stripAnsi(run.output).toLowerCase();

-    expect(output).toContain('browser_agent');
-    expect(output).toContain('completed successfully');
-  });
+      expect(output).toContain('browser_agent');
+      expect(output).toContain('completed successfully');
+    },
+  );

  it('should show the visible warning when browser agent starts in existing session mode', async () => {
    rig.setup('browser-session-warning', {
@@ -121,6 +121,7 @@ describe('file-system', () => {

    const result = await rig.run({
      args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
+      timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
    });

    const foundToolCall = await rig.waitForToolCall('write_file');
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
      expect(browserManager).toBeDefined();
    });

-    it('should call printOutput when provided', async () => {
+    it('should not call printOutput for internal setup messages', async () => {
      const printOutput = vi.fn();

      await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
        printOutput,
      );

-      expect(printOutput).toHaveBeenCalled();
+      expect(printOutput).not.toHaveBeenCalled();
    });

    it('should create definition with correct structure', async () => {
@@ -53,7 +53,7 @@ import {
 export async function createBrowserAgentDefinition(
  config: Config,
  messageBus: MessageBus,
-  printOutput?: (msg: string) => void,
+  _printOutput?: (msg: string) => void,
 ): Promise<{
  definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
  browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
  const browserManager = BrowserManager.getInstance(config);
  await browserManager.ensureConnection();

-  if (printOutput) {
-    printOutput('Browser connected with isolated MCP client.');
-  }
+  debugLogger.log('Browser connected with isolated MCP client.');

  // Determine if input blocker should be active (non-headless + enabled)
  const shouldDisableInput = config.shouldDisableBrowserUserInput();
  // Inject automation overlay and input blocker if not in headless mode
  const browserConfig = config.getBrowserAgentConfig();
  if (!browserConfig?.customConfig?.headless) {
-    if (printOutput) {
-      printOutput('Injecting automation overlay...');
-    }
+    debugLogger.log('Injecting automation overlay...');
    await injectAutomationOverlay(browserManager);
    if (shouldDisableInput) {
-      if (printOutput) {
-        printOutput('Injecting input blocker...');
-      }
+      debugLogger.log('Injecting input blocker...');
      await injectInputBlocker(browserManager);
    }
  }
@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
 import type { Config } from '../../config/config.js';
 import { type AgentLoopContext } from '../../config/agent-loop-context.js';
 import { LocalAgentExecutor } from '../local-executor.js';
-import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
 import {
  BaseToolInvocation,
  type ToolResult,
@@ -30,6 +29,7 @@ import {
  type SubagentActivityEvent,
  type SubagentProgress,
  type SubagentActivityItem,
+  AgentTerminateMode,
  isToolActivityError,
 } from '../types.js';
 import type { MessageBus } from '../../confirmation-bus/message-bus.js';
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
  AgentInputs,
  ToolResult
 > {
+  private readonly agentName: string;
+
  constructor(
    private readonly context: AgentLoopContext,
    params: AgentInputs,
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
    _toolName?: string,
    _toolDisplayName?: string,
  ) {
+    const resolvedName = _toolName ?? 'browser_agent';
    // Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
    super(
      params,
      messageBus,
-      _toolName ?? 'browser_agent',
+      resolvedName,
      _toolDisplayName ?? 'Browser Agent',
    );
+    this.agentName = resolvedName;
  }

  private get config(): Config {
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
        // Send initial state
        const initialProgress: SubagentProgress = {
          isSubagentProgress: true,
-          agentName: this['_toolName'] ?? 'browser_agent',
+          agentName: this.agentName,
          recentActivity: [],
          state: 'running',
        };
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
            }
            updateOutput({
              isSubagentProgress: true,
-              agentName: this['_toolName'] ?? 'browser_agent',
+              agentName: this.agentName,
              recentActivity: [...recentActivity],
              state: 'running',
            } as SubagentProgress);
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<

          const progress: SubagentProgress = {
            isSubagentProgress: true,
-            agentName: this['_toolName'] ?? 'browser_agent',
+            agentName: this.agentName,
            recentActivity: [...recentActivity],
            state: 'running',
          };
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<

      const output = await executor.run(this.params, signal);

-      const displayResult = safeJsonToMarkdown(output.result);
-
      const resultContent = `Browser agent finished.
 Termination Reason: ${output.terminate_reason}
 Result:
 ${output.result}`;

-      const displayContent = `
-Browser Agent Finished
+      // Map terminate_reason to the correct SubagentProgress state.
+      // GOAL = agent completed its task normally.
+      // ABORTED = user cancelled.
+      // Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
+      let progressState: SubagentProgress['state'];
+      if (output.terminate_reason === AgentTerminateMode.ABORTED) {
+        progressState = 'cancelled';
+      } else if (output.terminate_reason === AgentTerminateMode.GOAL) {
+        progressState = 'completed';
+      } else {
+        progressState = 'error';
+      }

-Termination Reason: ${output.terminate_reason}
-
-Result:
-${displayResult}
-`;
+      const progress: SubagentProgress = {
+        isSubagentProgress: true,
+        agentName: this.agentName,
+        recentActivity: [...recentActivity],
+        state: progressState,
+        result: output.result,
+        terminateReason: output.terminate_reason,
+      };

      if (updateOutput) {
-        updateOutput({
-          isSubagentProgress: true,
-          agentName: this['_toolName'] ?? 'browser_agent',
-          recentActivity: [...recentActivity],
-          state: 'completed',
-        } as SubagentProgress);
+        updateOutput(progress);
      }

      return {
        llmContent: [{ text: resultContent }],
-        returnDisplay: displayContent,
+        returnDisplay: progress,
      };
    } catch (error) {
      const rawErrorMessage =
@@ -343,7 +353,7 @@ ${displayResult}

      const progress: SubagentProgress = {
        isSubagentProgress: true,
-        agentName: this['_toolName'] ?? 'browser_agent',
+        agentName: this.agentName,
        recentActivity: [...recentActivity],
        state: isAbort ? 'cancelled' : 'error',
      };