From 782bb4e4bd022988a40f0837e3f6b2844243a04b Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Wed, 1 Apr 2026 07:00:16 +0800 Subject: [PATCH] fix(core): fix browser agent UX issues and improve E2E test reliability (#24312) --- integration-tests/browser-policy.test.ts | 199 +++++++++--------- integration-tests/file-system.test.ts | 1 + .../browser/browserAgentFactory.test.ts | 4 +- .../src/agents/browser/browserAgentFactory.ts | 14 +- .../agents/browser/browserAgentInvocation.ts | 54 +++-- 5 files changed, 144 insertions(+), 128 deletions(-) diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts index f533cb3f5e..7d60ab2c7e 100644 --- a/integration-tests/browser-policy.test.ts +++ b/integration-tests/browser-policy.test.ts @@ -10,8 +10,13 @@ import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { execSync } from 'node:child_process'; import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs'; +import { env } from 'node:process'; import stripAnsi from 'strip-ansi'; +// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox. +// See: https://github.com/google-gemini/gemini-cli/issues/24382 +const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker'; + const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); @@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => { await rig.cleanup(); }); - it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => { - rig.setup('browser-policy-skip-confirmation', { - fakeResponsesPath: join(__dirname, 'browser-policy.responses'), - settings: { - agents: { - overrides: { - browser_agent: { - enabled: true, + it.skipIf(isDockerSandbox)( + 'should skip confirmation when "Allow all server tools for this session" is chosen', + async () => { + rig.setup('browser-policy-skip-confirmation', { + fakeResponsesPath: join(__dirname, 'browser-policy.responses'), + settings: { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + sessionMode: 'isolated', + allowedDomains: ['example.com'], }, }, - browser: { - headless: true, - sessionMode: 'isolated', - allowedDomains: ['example.com'], - }, }, - }, - }); + }); - // Manually trust the folder to avoid the dialog and enable option 3 - const geminiDir = join(rig.homeDir!, '.gemini'); - mkdirSync(geminiDir, { recursive: true }); + // Manually trust the folder to avoid the dialog and enable option 3 + const geminiDir = join(rig.homeDir!, '.gemini'); + mkdirSync(geminiDir, { recursive: true }); - // Write to trustedFolders.json - const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); - const trustedFolders = { - [rig.testDir!]: 'TRUST_FOLDER', - }; - writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2)); + // Write to trustedFolders.json + const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); + const trustedFolders = { + [rig.testDir!]: 'TRUST_FOLDER', + }; + writeFileSync( + trustedFoldersPath, + JSON.stringify(trustedFolders, null, 2), + ); - // Force confirmation for browser agent. - // NOTE: We don't force confirm browser tools here because "Allow all server tools" - // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by - // a rule in the user tier (4.x) like the one from this TOML. - // By removing the explicit mcp rule, the first MCP tool will still prompt - // due to default approvalMode = 'default', and then "Allow all" will correctly - // bypass subsequent tools. - const policyFile = join(rig.testDir!, 'force-confirm.toml'); - writeFileSync( - policyFile, - ` + // Force confirmation for browser agent. + // NOTE: We don't force confirm browser tools here because "Allow all server tools" + // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by + // a rule in the user tier (4.x) like the one from this TOML. + // By removing the explicit mcp rule, the first MCP tool will still prompt + // due to default approvalMode = 'default', and then "Allow all" will correctly + // bypass subsequent tools. + const policyFile = join(rig.testDir!, 'force-confirm.toml'); + writeFileSync( + policyFile, + ` [[rule]] name = "Force confirm browser_agent" toolName = "browser_agent" decision = "ask_user" priority = 200 `, - ); + ); - // Update settings.json in both project and home directories to point to the policy file - for (const baseDir of [rig.testDir!, rig.homeDir!]) { - const settingsPath = join(baseDir, '.gemini', 'settings.json'); - if (existsSync(settingsPath)) { - const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); - settings.policyPaths = [policyFile]; - // Ensure folder trust is enabled - settings.security = settings.security || {}; - settings.security.folderTrust = settings.security.folderTrust || {}; - settings.security.folderTrust.enabled = true; - writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + // Update settings.json in both project and home directories to point to the policy file + for (const baseDir of [rig.testDir!, rig.homeDir!]) { + const settingsPath = join(baseDir, '.gemini', 'settings.json'); + if (existsSync(settingsPath)) { + const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); + settings.policyPaths = [policyFile]; + // Ensure folder trust is enabled + settings.security = settings.security || {}; + settings.security.folderTrust = settings.security.folderTrust || {}; + settings.security.folderTrust.enabled = true; + writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + } } - } - const run = await rig.runInteractive({ - approvalMode: 'default', - env: { - GEMINI_CLI_INTEGRATION_TEST: 'true', - }, - }); + const run = await rig.runInteractive({ + approvalMode: 'default', + env: { + GEMINI_CLI_INTEGRATION_TEST: 'true', + }, + }); - await run.sendKeys( - 'Open https://example.com and check if there is a heading\r', - ); - await run.sendKeys('\r'); + await run.sendKeys( + 'Open https://example.com and check if there is a heading\r', + ); + await run.sendKeys('\r'); - // Handle confirmations. - // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) - await poll( - () => stripAnsi(run.output).toLowerCase().includes('action required'), - 60000, - 1000, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 2000)); + // Handle confirmations. + // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) + await poll( + () => stripAnsi(run.output).toLowerCase().includes('action required'), + 60000, + 1000, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 2000)); - // Handle privacy notice - await poll( - () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), - 5000, - 100, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 5000)); + // Handle privacy notice + await poll( + () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), + 5000, + 100, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 5000)); - // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) - await poll( - () => { - const stripped = stripAnsi(run.output).toLowerCase(); - return ( - stripped.includes('new_page') && - stripped.includes('allow all server tools for this session') - ); - }, - 60000, - 1000, - ); + // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) + await poll( + () => { + const stripped = stripAnsi(run.output).toLowerCase(); + return ( + stripped.includes('new_page') && + stripped.includes('allow all server tools for this session') + ); + }, + 60000, + 1000, + ); - // Select "Allow all server tools for this session" (option 3) - await run.sendKeys('3\r'); - await new Promise((r) => setTimeout(r, 30000)); + // Select "Allow all server tools for this session" (option 3) + await run.sendKeys('3\r'); + await new Promise((r) => setTimeout(r, 30000)); - const output = stripAnsi(run.output).toLowerCase(); + const output = stripAnsi(run.output).toLowerCase(); - expect(output).toContain('browser_agent'); - expect(output).toContain('completed successfully'); - }); + expect(output).toContain('browser_agent'); + expect(output).toContain('completed successfully'); + }, + ); it('should show the visible warning when browser agent starts in existing session mode', async () => { rig.setup('browser-session-warning', { diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index 64481068c2..80552cfd68 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -121,6 +121,7 @@ describe('file-system', () => { const result = await rig.run({ args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`, + timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox }); const foundToolCall = await rig.waitForToolCall('write_file'); diff --git a/packages/core/src/agents/browser/browserAgentFactory.test.ts b/packages/core/src/agents/browser/browserAgentFactory.test.ts index 22a99edab2..79e38c5361 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.test.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.test.ts @@ -166,7 +166,7 @@ describe('browserAgentFactory', () => { expect(browserManager).toBeDefined(); }); - it('should call printOutput when provided', async () => { + it('should not call printOutput for internal setup messages', async () => { const printOutput = vi.fn(); await createBrowserAgentDefinition( @@ -175,7 +175,7 @@ describe('browserAgentFactory', () => { printOutput, ); - expect(printOutput).toHaveBeenCalled(); + expect(printOutput).not.toHaveBeenCalled(); }); it('should create definition with correct structure', async () => { diff --git a/packages/core/src/agents/browser/browserAgentFactory.ts b/packages/core/src/agents/browser/browserAgentFactory.ts index 94632354d7..a1f34a127d 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.ts @@ -53,7 +53,7 @@ import { export async function createBrowserAgentDefinition( config: Config, messageBus: MessageBus, - printOutput?: (msg: string) => void, + _printOutput?: (msg: string) => void, ): Promise<{ definition: LocalAgentDefinition; browserManager: BrowserManager; @@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition( const browserManager = BrowserManager.getInstance(config); await browserManager.ensureConnection(); - if (printOutput) { - printOutput('Browser connected with isolated MCP client.'); - } + debugLogger.log('Browser connected with isolated MCP client.'); // Determine if input blocker should be active (non-headless + enabled) const shouldDisableInput = config.shouldDisableBrowserUserInput(); // Inject automation overlay and input blocker if not in headless mode const browserConfig = config.getBrowserAgentConfig(); if (!browserConfig?.customConfig?.headless) { - if (printOutput) { - printOutput('Injecting automation overlay...'); - } + debugLogger.log('Injecting automation overlay...'); await injectAutomationOverlay(browserManager); if (shouldDisableInput) { - if (printOutput) { - printOutput('Injecting input blocker...'); - } + debugLogger.log('Injecting input blocker...'); await injectInputBlocker(browserManager); } } diff --git a/packages/core/src/agents/browser/browserAgentInvocation.ts b/packages/core/src/agents/browser/browserAgentInvocation.ts index 586baf7d5a..86191c7cc3 100644 --- a/packages/core/src/agents/browser/browserAgentInvocation.ts +++ b/packages/core/src/agents/browser/browserAgentInvocation.ts @@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto'; import type { Config } from '../../config/config.js'; import { type AgentLoopContext } from '../../config/agent-loop-context.js'; import { LocalAgentExecutor } from '../local-executor.js'; -import { safeJsonToMarkdown } from '../../utils/markdownUtils.js'; import { BaseToolInvocation, type ToolResult, @@ -30,6 +29,7 @@ import { type SubagentActivityEvent, type SubagentProgress, type SubagentActivityItem, + AgentTerminateMode, isToolActivityError, } from '../types.js'; import type { MessageBus } from '../../confirmation-bus/message-bus.js'; @@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation< AgentInputs, ToolResult > { + private readonly agentName: string; + constructor( private readonly context: AgentLoopContext, params: AgentInputs, @@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation< _toolName?: string, _toolDisplayName?: string, ) { + const resolvedName = _toolName ?? 'browser_agent'; // Note: BrowserAgentDefinition is a factory function, so we use hardcoded names super( params, messageBus, - _toolName ?? 'browser_agent', + resolvedName, _toolDisplayName ?? 'Browser Agent', ); + this.agentName = resolvedName; } private get config(): Config { @@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation< // Send initial state const initialProgress: SubagentProgress = { isSubagentProgress: true, - agentName: this['_toolName'] ?? 'browser_agent', + agentName: this.agentName, recentActivity: [], state: 'running', }; @@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation< } updateOutput({ isSubagentProgress: true, - agentName: this['_toolName'] ?? 'browser_agent', + agentName: this.agentName, recentActivity: [...recentActivity], state: 'running', } as SubagentProgress); @@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation< const progress: SubagentProgress = { isSubagentProgress: true, - agentName: this['_toolName'] ?? 'browser_agent', + agentName: this.agentName, recentActivity: [...recentActivity], state: 'running', }; @@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation< const output = await executor.run(this.params, signal); - const displayResult = safeJsonToMarkdown(output.result); - const resultContent = `Browser agent finished. Termination Reason: ${output.terminate_reason} Result: ${output.result}`; - const displayContent = ` -Browser Agent Finished + // Map terminate_reason to the correct SubagentProgress state. + // GOAL = agent completed its task normally. + // ABORTED = user cancelled. + // Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error. + let progressState: SubagentProgress['state']; + if (output.terminate_reason === AgentTerminateMode.ABORTED) { + progressState = 'cancelled'; + } else if (output.terminate_reason === AgentTerminateMode.GOAL) { + progressState = 'completed'; + } else { + progressState = 'error'; + } -Termination Reason: ${output.terminate_reason} - -Result: -${displayResult} -`; + const progress: SubagentProgress = { + isSubagentProgress: true, + agentName: this.agentName, + recentActivity: [...recentActivity], + state: progressState, + result: output.result, + terminateReason: output.terminate_reason, + }; if (updateOutput) { - updateOutput({ - isSubagentProgress: true, - agentName: this['_toolName'] ?? 'browser_agent', - recentActivity: [...recentActivity], - state: 'completed', - } as SubagentProgress); + updateOutput(progress); } return { llmContent: [{ text: resultContent }], - returnDisplay: displayContent, + returnDisplay: progress, }; } catch (error) { const rawErrorMessage = @@ -343,7 +353,7 @@ ${displayResult} const progress: SubagentProgress = { isSubagentProgress: true, - agentName: this['_toolName'] ?? 'browser_agent', + agentName: this.agentName, recentActivity: [...recentActivity], state: isAbort ? 'cancelled' : 'error', };