fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)

This commit is contained in:
Gaurav
2026-04-01 07:00:16 +08:00
committed by GitHub
parent 718659fef4
commit 41abd1ef65
5 changed files with 144 additions and 128 deletions
+105 -94
View File
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
import { execSync } from 'node:child_process'; import { execSync } from 'node:child_process';
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs'; import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
import { env } from 'node:process';
import stripAnsi from 'strip-ansi'; import stripAnsi from 'strip-ansi';
// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
// See: https://github.com/google-gemini/gemini-cli/issues/24382
const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
const __filename = fileURLToPath(import.meta.url); const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename); const __dirname = dirname(__filename);
@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
await rig.cleanup(); await rig.cleanup();
}); });
it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => { it.skipIf(isDockerSandbox)(
rig.setup('browser-policy-skip-confirmation', { 'should skip confirmation when "Allow all server tools for this session" is chosen',
fakeResponsesPath: join(__dirname, 'browser-policy.responses'), async () => {
settings: { rig.setup('browser-policy-skip-confirmation', {
agents: { fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
overrides: { settings: {
browser_agent: { agents: {
enabled: true, overrides: {
browser_agent: {
enabled: true,
},
},
browser: {
headless: true,
sessionMode: 'isolated',
allowedDomains: ['example.com'],
}, },
}, },
browser: {
headless: true,
sessionMode: 'isolated',
allowedDomains: ['example.com'],
},
}, },
}, });
});
// Manually trust the folder to avoid the dialog and enable option 3 // Manually trust the folder to avoid the dialog and enable option 3
const geminiDir = join(rig.homeDir!, '.gemini'); const geminiDir = join(rig.homeDir!, '.gemini');
mkdirSync(geminiDir, { recursive: true }); mkdirSync(geminiDir, { recursive: true });
// Write to trustedFolders.json // Write to trustedFolders.json
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
const trustedFolders = { const trustedFolders = {
[rig.testDir!]: 'TRUST_FOLDER', [rig.testDir!]: 'TRUST_FOLDER',
}; };
writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2)); writeFileSync(
trustedFoldersPath,
JSON.stringify(trustedFolders, null, 2),
);
// Force confirmation for browser agent. // Force confirmation for browser agent.
// NOTE: We don't force confirm browser tools here because "Allow all server tools" // NOTE: We don't force confirm browser tools here because "Allow all server tools"
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
// a rule in the user tier (4.x) like the one from this TOML. // a rule in the user tier (4.x) like the one from this TOML.
// By removing the explicit mcp rule, the first MCP tool will still prompt // By removing the explicit mcp rule, the first MCP tool will still prompt
// due to default approvalMode = 'default', and then "Allow all" will correctly // due to default approvalMode = 'default', and then "Allow all" will correctly
// bypass subsequent tools. // bypass subsequent tools.
const policyFile = join(rig.testDir!, 'force-confirm.toml'); const policyFile = join(rig.testDir!, 'force-confirm.toml');
writeFileSync( writeFileSync(
policyFile, policyFile,
` `
[[rule]] [[rule]]
name = "Force confirm browser_agent" name = "Force confirm browser_agent"
toolName = "browser_agent" toolName = "browser_agent"
decision = "ask_user" decision = "ask_user"
priority = 200 priority = 200
`, `,
); );
// Update settings.json in both project and home directories to point to the policy file // Update settings.json in both project and home directories to point to the policy file
for (const baseDir of [rig.testDir!, rig.homeDir!]) { for (const baseDir of [rig.testDir!, rig.homeDir!]) {
const settingsPath = join(baseDir, '.gemini', 'settings.json'); const settingsPath = join(baseDir, '.gemini', 'settings.json');
if (existsSync(settingsPath)) { if (existsSync(settingsPath)) {
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
settings.policyPaths = [policyFile]; settings.policyPaths = [policyFile];
// Ensure folder trust is enabled // Ensure folder trust is enabled
settings.security = settings.security || {}; settings.security = settings.security || {};
settings.security.folderTrust = settings.security.folderTrust || {}; settings.security.folderTrust = settings.security.folderTrust || {};
settings.security.folderTrust.enabled = true; settings.security.folderTrust.enabled = true;
writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
}
} }
}
const run = await rig.runInteractive({ const run = await rig.runInteractive({
approvalMode: 'default', approvalMode: 'default',
env: { env: {
GEMINI_CLI_INTEGRATION_TEST: 'true', GEMINI_CLI_INTEGRATION_TEST: 'true',
}, },
}); });
await run.sendKeys( await run.sendKeys(
'Open https://example.com and check if there is a heading\r', 'Open https://example.com and check if there is a heading\r',
); );
await run.sendKeys('\r'); await run.sendKeys('\r');
// Handle confirmations. // Handle confirmations.
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
await poll( await poll(
() => stripAnsi(run.output).toLowerCase().includes('action required'), () => stripAnsi(run.output).toLowerCase().includes('action required'),
60000, 60000,
1000, 1000,
); );
await run.sendKeys('1\r'); await run.sendKeys('1\r');
await new Promise((r) => setTimeout(r, 2000)); await new Promise((r) => setTimeout(r, 2000));
// Handle privacy notice // Handle privacy notice
await poll( await poll(
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'), () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
5000, 5000,
100, 100,
); );
await run.sendKeys('1\r'); await run.sendKeys('1\r');
await new Promise((r) => setTimeout(r, 5000)); await new Promise((r) => setTimeout(r, 5000));
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
await poll( await poll(
() => { () => {
const stripped = stripAnsi(run.output).toLowerCase(); const stripped = stripAnsi(run.output).toLowerCase();
return ( return (
stripped.includes('new_page') && stripped.includes('new_page') &&
stripped.includes('allow all server tools for this session') stripped.includes('allow all server tools for this session')
); );
}, },
60000, 60000,
1000, 1000,
); );
// Select "Allow all server tools for this session" (option 3) // Select "Allow all server tools for this session" (option 3)
await run.sendKeys('3\r'); await run.sendKeys('3\r');
await new Promise((r) => setTimeout(r, 30000)); await new Promise((r) => setTimeout(r, 30000));
const output = stripAnsi(run.output).toLowerCase(); const output = stripAnsi(run.output).toLowerCase();
expect(output).toContain('browser_agent'); expect(output).toContain('browser_agent');
expect(output).toContain('completed successfully'); expect(output).toContain('completed successfully');
}); },
);
it('should show the visible warning when browser agent starts in existing session mode', async () => { it('should show the visible warning when browser agent starts in existing session mode', async () => {
rig.setup('browser-session-warning', { rig.setup('browser-session-warning', {
+1
View File
@@ -121,6 +121,7 @@ describe('file-system', () => {
const result = await rig.run({ const result = await rig.run({
args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`, args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
}); });
const foundToolCall = await rig.waitForToolCall('write_file'); const foundToolCall = await rig.waitForToolCall('write_file');
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
expect(browserManager).toBeDefined(); expect(browserManager).toBeDefined();
}); });
it('should call printOutput when provided', async () => { it('should not call printOutput for internal setup messages', async () => {
const printOutput = vi.fn(); const printOutput = vi.fn();
await createBrowserAgentDefinition( await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
printOutput, printOutput,
); );
expect(printOutput).toHaveBeenCalled(); expect(printOutput).not.toHaveBeenCalled();
}); });
it('should create definition with correct structure', async () => { it('should create definition with correct structure', async () => {
@@ -53,7 +53,7 @@ import {
export async function createBrowserAgentDefinition( export async function createBrowserAgentDefinition(
config: Config, config: Config,
messageBus: MessageBus, messageBus: MessageBus,
printOutput?: (msg: string) => void, _printOutput?: (msg: string) => void,
): Promise<{ ): Promise<{
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>; definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
browserManager: BrowserManager; browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
const browserManager = BrowserManager.getInstance(config); const browserManager = BrowserManager.getInstance(config);
await browserManager.ensureConnection(); await browserManager.ensureConnection();
if (printOutput) { debugLogger.log('Browser connected with isolated MCP client.');
printOutput('Browser connected with isolated MCP client.');
}
// Determine if input blocker should be active (non-headless + enabled) // Determine if input blocker should be active (non-headless + enabled)
const shouldDisableInput = config.shouldDisableBrowserUserInput(); const shouldDisableInput = config.shouldDisableBrowserUserInput();
// Inject automation overlay and input blocker if not in headless mode // Inject automation overlay and input blocker if not in headless mode
const browserConfig = config.getBrowserAgentConfig(); const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig?.customConfig?.headless) { if (!browserConfig?.customConfig?.headless) {
if (printOutput) { debugLogger.log('Injecting automation overlay...');
printOutput('Injecting automation overlay...');
}
await injectAutomationOverlay(browserManager); await injectAutomationOverlay(browserManager);
if (shouldDisableInput) { if (shouldDisableInput) {
if (printOutput) { debugLogger.log('Injecting input blocker...');
printOutput('Injecting input blocker...');
}
await injectInputBlocker(browserManager); await injectInputBlocker(browserManager);
} }
} }
@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
import type { Config } from '../../config/config.js'; import type { Config } from '../../config/config.js';
import { type AgentLoopContext } from '../../config/agent-loop-context.js'; import { type AgentLoopContext } from '../../config/agent-loop-context.js';
import { LocalAgentExecutor } from '../local-executor.js'; import { LocalAgentExecutor } from '../local-executor.js';
import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
import { import {
BaseToolInvocation, BaseToolInvocation,
type ToolResult, type ToolResult,
@@ -30,6 +29,7 @@ import {
type SubagentActivityEvent, type SubagentActivityEvent,
type SubagentProgress, type SubagentProgress,
type SubagentActivityItem, type SubagentActivityItem,
AgentTerminateMode,
isToolActivityError, isToolActivityError,
} from '../types.js'; } from '../types.js';
import type { MessageBus } from '../../confirmation-bus/message-bus.js'; import type { MessageBus } from '../../confirmation-bus/message-bus.js';
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
AgentInputs, AgentInputs,
ToolResult ToolResult
> { > {
private readonly agentName: string;
constructor( constructor(
private readonly context: AgentLoopContext, private readonly context: AgentLoopContext,
params: AgentInputs, params: AgentInputs,
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
_toolName?: string, _toolName?: string,
_toolDisplayName?: string, _toolDisplayName?: string,
) { ) {
const resolvedName = _toolName ?? 'browser_agent';
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names // Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
super( super(
params, params,
messageBus, messageBus,
_toolName ?? 'browser_agent', resolvedName,
_toolDisplayName ?? 'Browser Agent', _toolDisplayName ?? 'Browser Agent',
); );
this.agentName = resolvedName;
} }
private get config(): Config { private get config(): Config {
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
// Send initial state // Send initial state
const initialProgress: SubagentProgress = { const initialProgress: SubagentProgress = {
isSubagentProgress: true, isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent', agentName: this.agentName,
recentActivity: [], recentActivity: [],
state: 'running', state: 'running',
}; };
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
} }
updateOutput({ updateOutput({
isSubagentProgress: true, isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent', agentName: this.agentName,
recentActivity: [...recentActivity], recentActivity: [...recentActivity],
state: 'running', state: 'running',
} as SubagentProgress); } as SubagentProgress);
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
const progress: SubagentProgress = { const progress: SubagentProgress = {
isSubagentProgress: true, isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent', agentName: this.agentName,
recentActivity: [...recentActivity], recentActivity: [...recentActivity],
state: 'running', state: 'running',
}; };
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
const output = await executor.run(this.params, signal); const output = await executor.run(this.params, signal);
const displayResult = safeJsonToMarkdown(output.result);
const resultContent = `Browser agent finished. const resultContent = `Browser agent finished.
Termination Reason: ${output.terminate_reason} Termination Reason: ${output.terminate_reason}
Result: Result:
${output.result}`; ${output.result}`;
const displayContent = ` // Map terminate_reason to the correct SubagentProgress state.
Browser Agent Finished // GOAL = agent completed its task normally.
// ABORTED = user cancelled.
// Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
let progressState: SubagentProgress['state'];
if (output.terminate_reason === AgentTerminateMode.ABORTED) {
progressState = 'cancelled';
} else if (output.terminate_reason === AgentTerminateMode.GOAL) {
progressState = 'completed';
} else {
progressState = 'error';
}
Termination Reason: ${output.terminate_reason} const progress: SubagentProgress = {
isSubagentProgress: true,
Result: agentName: this.agentName,
${displayResult} recentActivity: [...recentActivity],
`; state: progressState,
result: output.result,
terminateReason: output.terminate_reason,
};
if (updateOutput) { if (updateOutput) {
updateOutput({ updateOutput(progress);
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
recentActivity: [...recentActivity],
state: 'completed',
} as SubagentProgress);
} }
return { return {
llmContent: [{ text: resultContent }], llmContent: [{ text: resultContent }],
returnDisplay: displayContent, returnDisplay: progress,
}; };
} catch (error) { } catch (error) {
const rawErrorMessage = const rawErrorMessage =
@@ -343,7 +353,7 @@ ${displayResult}
const progress: SubagentProgress = { const progress: SubagentProgress = {
isSubagentProgress: true, isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent', agentName: this.agentName,
recentActivity: [...recentActivity], recentActivity: [...recentActivity],
state: isAbort ? 'cancelled' : 'error', state: isAbort ? 'cancelled' : 'error',
}; };