From 782bb4e4bd022988a40f0837e3f6b2844243a04b Mon Sep 17 00:00:00 2001
From: Gaurav <39389231+gsquared94@users.noreply.github.com>
Date: Wed, 1 Apr 2026 07:00:16 +0800
Subject: [PATCH] fix(core): fix browser agent UX issues and improve E2E test
 reliability (#24312)

---
 integration-tests/browser-policy.test.ts      | 199 +++++++++---------
 integration-tests/file-system.test.ts         |   1 +
 .../browser/browserAgentFactory.test.ts       |   4 +-
 .../src/agents/browser/browserAgentFactory.ts |  14 +-
 .../agents/browser/browserAgentInvocation.ts  |  54 +++--
 5 files changed, 144 insertions(+), 128 deletions(-)

diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts
index f533cb3f5e..7d60ab2c7e 100644
--- a/integration-tests/browser-policy.test.ts
+++ b/integration-tests/browser-policy.test.ts
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { execSync } from 'node:child_process';
 import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
+import { env } from 'node:process';
 import stripAnsi from 'strip-ansi';
 
+// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
+// See: https://github.com/google-gemini/gemini-cli/issues/24382
+const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
+
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
 
@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
     await rig.cleanup();
   });
 
-  it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
-    rig.setup('browser-policy-skip-confirmation', {
-      fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
-      settings: {
-        agents: {
-          overrides: {
-            browser_agent: {
-              enabled: true,
+  it.skipIf(isDockerSandbox)(
+    'should skip confirmation when "Allow all server tools for this session" is chosen',
+    async () => {
+      rig.setup('browser-policy-skip-confirmation', {
+        fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
+        settings: {
+          agents: {
+            overrides: {
+              browser_agent: {
+                enabled: true,
+              },
+            },
+            browser: {
+              headless: true,
+              sessionMode: 'isolated',
+              allowedDomains: ['example.com'],
             },
           },
-          browser: {
-            headless: true,
-            sessionMode: 'isolated',
-            allowedDomains: ['example.com'],
-          },
         },
-      },
-    });
+      });
 
-    // Manually trust the folder to avoid the dialog and enable option 3
-    const geminiDir = join(rig.homeDir!, '.gemini');
-    mkdirSync(geminiDir, { recursive: true });
+      // Manually trust the folder to avoid the dialog and enable option 3
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      mkdirSync(geminiDir, { recursive: true });
 
-    // Write to trustedFolders.json
-    const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
-    const trustedFolders = {
-      [rig.testDir!]: 'TRUST_FOLDER',
-    };
-    writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
+      // Write to trustedFolders.json
+      const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
+      const trustedFolders = {
+        [rig.testDir!]: 'TRUST_FOLDER',
+      };
+      writeFileSync(
+        trustedFoldersPath,
+        JSON.stringify(trustedFolders, null, 2),
+      );
 
-    // Force confirmation for browser agent.
-    // NOTE: We don't force confirm browser tools here because "Allow all server tools"
-    // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
-    // a rule in the user tier (4.x) like the one from this TOML.
-    // By removing the explicit mcp rule, the first MCP tool will still prompt
-    // due to default approvalMode = 'default', and then "Allow all" will correctly
-    // bypass subsequent tools.
-    const policyFile = join(rig.testDir!, 'force-confirm.toml');
-    writeFileSync(
-      policyFile,
-      `
+      // Force confirmation for browser agent.
+      // NOTE: We don't force confirm browser tools here because "Allow all server tools"
+      // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
+      // a rule in the user tier (4.x) like the one from this TOML.
+      // By removing the explicit mcp rule, the first MCP tool will still prompt
+      // due to default approvalMode = 'default', and then "Allow all" will correctly
+      // bypass subsequent tools.
+      const policyFile = join(rig.testDir!, 'force-confirm.toml');
+      writeFileSync(
+        policyFile,
+        `
 [[rule]]
 name = "Force confirm browser_agent"
 toolName = "browser_agent"
 decision = "ask_user"
 priority = 200
 `,
-    );
+      );
 
-    // Update settings.json in both project and home directories to point to the policy file
-    for (const baseDir of [rig.testDir!, rig.homeDir!]) {
-      const settingsPath = join(baseDir, '.gemini', 'settings.json');
-      if (existsSync(settingsPath)) {
-        const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
-        settings.policyPaths = [policyFile];
-        // Ensure folder trust is enabled
-        settings.security = settings.security || {};
-        settings.security.folderTrust = settings.security.folderTrust || {};
-        settings.security.folderTrust.enabled = true;
-        writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+      // Update settings.json in both project and home directories to point to the policy file
+      for (const baseDir of [rig.testDir!, rig.homeDir!]) {
+        const settingsPath = join(baseDir, '.gemini', 'settings.json');
+        if (existsSync(settingsPath)) {
+          const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
+          settings.policyPaths = [policyFile];
+          // Ensure folder trust is enabled
+          settings.security = settings.security || {};
+          settings.security.folderTrust = settings.security.folderTrust || {};
+          settings.security.folderTrust.enabled = true;
+          writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+        }
       }
-    }
 
-    const run = await rig.runInteractive({
-      approvalMode: 'default',
-      env: {
-        GEMINI_CLI_INTEGRATION_TEST: 'true',
-      },
-    });
+      const run = await rig.runInteractive({
+        approvalMode: 'default',
+        env: {
+          GEMINI_CLI_INTEGRATION_TEST: 'true',
+        },
+      });
 
-    await run.sendKeys(
-      'Open https://example.com and check if there is a heading\r',
-    );
-    await run.sendKeys('\r');
+      await run.sendKeys(
+        'Open https://example.com and check if there is a heading\r',
+      );
+      await run.sendKeys('\r');
 
-    // Handle confirmations.
-    // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('action required'),
-      60000,
-      1000,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 2000));
+      // Handle confirmations.
+      // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('action required'),
+        60000,
+        1000,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 2000));
 
-    // Handle privacy notice
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
-      5000,
-      100,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 5000));
+      // Handle privacy notice
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
+        5000,
+        100,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 5000));
 
-    // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
-    await poll(
-      () => {
-        const stripped = stripAnsi(run.output).toLowerCase();
-        return (
-          stripped.includes('new_page') &&
-          stripped.includes('allow all server tools for this session')
-        );
-      },
-      60000,
-      1000,
-    );
+      // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
+      await poll(
+        () => {
+          const stripped = stripAnsi(run.output).toLowerCase();
+          return (
+            stripped.includes('new_page') &&
+            stripped.includes('allow all server tools for this session')
+          );
+        },
+        60000,
+        1000,
+      );
 
-    // Select "Allow all server tools for this session" (option 3)
-    await run.sendKeys('3\r');
-    await new Promise((r) => setTimeout(r, 30000));
+      // Select "Allow all server tools for this session" (option 3)
+      await run.sendKeys('3\r');
+      await new Promise((r) => setTimeout(r, 30000));
 
-    const output = stripAnsi(run.output).toLowerCase();
+      const output = stripAnsi(run.output).toLowerCase();
 
-    expect(output).toContain('browser_agent');
-    expect(output).toContain('completed successfully');
-  });
+      expect(output).toContain('browser_agent');
+      expect(output).toContain('completed successfully');
+    },
+  );
 
   it('should show the visible warning when browser agent starts in existing session mode', async () => {
     rig.setup('browser-session-warning', {
diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts
index 64481068c2..80552cfd68 100644
--- a/integration-tests/file-system.test.ts
+++ b/integration-tests/file-system.test.ts
@@ -121,6 +121,7 @@ describe('file-system', () => {
 
     const result = await rig.run({
       args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
+      timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
     });
 
     const foundToolCall = await rig.waitForToolCall('write_file');
diff --git a/packages/core/src/agents/browser/browserAgentFactory.test.ts b/packages/core/src/agents/browser/browserAgentFactory.test.ts
index 22a99edab2..79e38c5361 100644
--- a/packages/core/src/agents/browser/browserAgentFactory.test.ts
+++ b/packages/core/src/agents/browser/browserAgentFactory.test.ts
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
       expect(browserManager).toBeDefined();
     });
 
-    it('should call printOutput when provided', async () => {
+    it('should not call printOutput for internal setup messages', async () => {
       const printOutput = vi.fn();
 
       await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
         printOutput,
       );
 
-      expect(printOutput).toHaveBeenCalled();
+      expect(printOutput).not.toHaveBeenCalled();
     });
 
     it('should create definition with correct structure', async () => {
diff --git a/packages/core/src/agents/browser/browserAgentFactory.ts b/packages/core/src/agents/browser/browserAgentFactory.ts
index 94632354d7..a1f34a127d 100644
--- a/packages/core/src/agents/browser/browserAgentFactory.ts
+++ b/packages/core/src/agents/browser/browserAgentFactory.ts
@@ -53,7 +53,7 @@ import {
 export async function createBrowserAgentDefinition(
   config: Config,
   messageBus: MessageBus,
-  printOutput?: (msg: string) => void,
+  _printOutput?: (msg: string) => void,
 ): Promise<{
   definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
   browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
   const browserManager = BrowserManager.getInstance(config);
   await browserManager.ensureConnection();
 
-  if (printOutput) {
-    printOutput('Browser connected with isolated MCP client.');
-  }
+  debugLogger.log('Browser connected with isolated MCP client.');
 
   // Determine if input blocker should be active (non-headless + enabled)
   const shouldDisableInput = config.shouldDisableBrowserUserInput();
   // Inject automation overlay and input blocker if not in headless mode
   const browserConfig = config.getBrowserAgentConfig();
   if (!browserConfig?.customConfig?.headless) {
-    if (printOutput) {
-      printOutput('Injecting automation overlay...');
-    }
+    debugLogger.log('Injecting automation overlay...');
     await injectAutomationOverlay(browserManager);
     if (shouldDisableInput) {
-      if (printOutput) {
-        printOutput('Injecting input blocker...');
-      }
+      debugLogger.log('Injecting input blocker...');
       await injectInputBlocker(browserManager);
     }
   }
diff --git a/packages/core/src/agents/browser/browserAgentInvocation.ts b/packages/core/src/agents/browser/browserAgentInvocation.ts
index 586baf7d5a..86191c7cc3 100644
--- a/packages/core/src/agents/browser/browserAgentInvocation.ts
+++ b/packages/core/src/agents/browser/browserAgentInvocation.ts
@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
 import type { Config } from '../../config/config.js';
 import { type AgentLoopContext } from '../../config/agent-loop-context.js';
 import { LocalAgentExecutor } from '../local-executor.js';
-import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
 import {
   BaseToolInvocation,
   type ToolResult,
@@ -30,6 +29,7 @@ import {
   type SubagentActivityEvent,
   type SubagentProgress,
   type SubagentActivityItem,
+  AgentTerminateMode,
   isToolActivityError,
 } from '../types.js';
 import type { MessageBus } from '../../confirmation-bus/message-bus.js';
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
   AgentInputs,
   ToolResult
 > {
+  private readonly agentName: string;
+
   constructor(
     private readonly context: AgentLoopContext,
     params: AgentInputs,
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
     _toolName?: string,
     _toolDisplayName?: string,
   ) {
+    const resolvedName = _toolName ?? 'browser_agent';
     // Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
     super(
       params,
       messageBus,
-      _toolName ?? 'browser_agent',
+      resolvedName,
       _toolDisplayName ?? 'Browser Agent',
     );
+    this.agentName = resolvedName;
   }
 
   private get config(): Config {
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
         // Send initial state
         const initialProgress: SubagentProgress = {
           isSubagentProgress: true,
-          agentName: this['_toolName'] ?? 'browser_agent',
+          agentName: this.agentName,
           recentActivity: [],
           state: 'running',
         };
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
             }
             updateOutput({
               isSubagentProgress: true,
-              agentName: this['_toolName'] ?? 'browser_agent',
+              agentName: this.agentName,
               recentActivity: [...recentActivity],
               state: 'running',
             } as SubagentProgress);
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
 
           const progress: SubagentProgress = {
             isSubagentProgress: true,
-            agentName: this['_toolName'] ?? 'browser_agent',
+            agentName: this.agentName,
             recentActivity: [...recentActivity],
             state: 'running',
           };
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
 
       const output = await executor.run(this.params, signal);
 
-      const displayResult = safeJsonToMarkdown(output.result);
-
       const resultContent = `Browser agent finished.
 Termination Reason: ${output.terminate_reason}
 Result:
 ${output.result}`;
 
-      const displayContent = `
-Browser Agent Finished
+      // Map terminate_reason to the correct SubagentProgress state.
+      // GOAL = agent completed its task normally.
+      // ABORTED = user cancelled.
+      // Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
+      let progressState: SubagentProgress['state'];
+      if (output.terminate_reason === AgentTerminateMode.ABORTED) {
+        progressState = 'cancelled';
+      } else if (output.terminate_reason === AgentTerminateMode.GOAL) {
+        progressState = 'completed';
+      } else {
+        progressState = 'error';
+      }
 
-Termination Reason: ${output.terminate_reason}
-
-Result:
-${displayResult}
-`;
+      const progress: SubagentProgress = {
+        isSubagentProgress: true,
+        agentName: this.agentName,
+        recentActivity: [...recentActivity],
+        state: progressState,
+        result: output.result,
+        terminateReason: output.terminate_reason,
+      };
 
       if (updateOutput) {
-        updateOutput({
-          isSubagentProgress: true,
-          agentName: this['_toolName'] ?? 'browser_agent',
-          recentActivity: [...recentActivity],
-          state: 'completed',
-        } as SubagentProgress);
+        updateOutput(progress);
       }
 
       return {
         llmContent: [{ text: resultContent }],
-        returnDisplay: displayContent,
+        returnDisplay: progress,
       };
     } catch (error) {
       const rawErrorMessage =
@@ -343,7 +353,7 @@ ${displayResult}
 
       const progress: SubagentProgress = {
         isSubagentProgress: true,
-        agentName: this['_toolName'] ?? 'browser_agent',
+        agentName: this.agentName,
         recentActivity: [...recentActivity],
         state: isAbort ? 'cancelled' : 'error',
       };