mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-02 17:31:05 -07:00
fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)
This commit is contained in:
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
|
||||
import { env } from 'node:process';
|
||||
import stripAnsi from 'strip-ansi';
|
||||
|
||||
// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
|
||||
// See: https://github.com/google-gemini/gemini-cli/issues/24382
|
||||
const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
|
||||
await rig.cleanup();
|
||||
});
|
||||
|
||||
it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
|
||||
rig.setup('browser-policy-skip-confirmation', {
|
||||
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
|
||||
settings: {
|
||||
agents: {
|
||||
overrides: {
|
||||
browser_agent: {
|
||||
enabled: true,
|
||||
it.skipIf(isDockerSandbox)(
|
||||
'should skip confirmation when "Allow all server tools for this session" is chosen',
|
||||
async () => {
|
||||
rig.setup('browser-policy-skip-confirmation', {
|
||||
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
|
||||
settings: {
|
||||
agents: {
|
||||
overrides: {
|
||||
browser_agent: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
browser: {
|
||||
headless: true,
|
||||
sessionMode: 'isolated',
|
||||
allowedDomains: ['example.com'],
|
||||
},
|
||||
},
|
||||
browser: {
|
||||
headless: true,
|
||||
sessionMode: 'isolated',
|
||||
allowedDomains: ['example.com'],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// Manually trust the folder to avoid the dialog and enable option 3
|
||||
const geminiDir = join(rig.homeDir!, '.gemini');
|
||||
mkdirSync(geminiDir, { recursive: true });
|
||||
// Manually trust the folder to avoid the dialog and enable option 3
|
||||
const geminiDir = join(rig.homeDir!, '.gemini');
|
||||
mkdirSync(geminiDir, { recursive: true });
|
||||
|
||||
// Write to trustedFolders.json
|
||||
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
|
||||
const trustedFolders = {
|
||||
[rig.testDir!]: 'TRUST_FOLDER',
|
||||
};
|
||||
writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
|
||||
// Write to trustedFolders.json
|
||||
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
|
||||
const trustedFolders = {
|
||||
[rig.testDir!]: 'TRUST_FOLDER',
|
||||
};
|
||||
writeFileSync(
|
||||
trustedFoldersPath,
|
||||
JSON.stringify(trustedFolders, null, 2),
|
||||
);
|
||||
|
||||
// Force confirmation for browser agent.
|
||||
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
|
||||
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
|
||||
// a rule in the user tier (4.x) like the one from this TOML.
|
||||
// By removing the explicit mcp rule, the first MCP tool will still prompt
|
||||
// due to default approvalMode = 'default', and then "Allow all" will correctly
|
||||
// bypass subsequent tools.
|
||||
const policyFile = join(rig.testDir!, 'force-confirm.toml');
|
||||
writeFileSync(
|
||||
policyFile,
|
||||
`
|
||||
// Force confirmation for browser agent.
|
||||
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
|
||||
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
|
||||
// a rule in the user tier (4.x) like the one from this TOML.
|
||||
// By removing the explicit mcp rule, the first MCP tool will still prompt
|
||||
// due to default approvalMode = 'default', and then "Allow all" will correctly
|
||||
// bypass subsequent tools.
|
||||
const policyFile = join(rig.testDir!, 'force-confirm.toml');
|
||||
writeFileSync(
|
||||
policyFile,
|
||||
`
|
||||
[[rule]]
|
||||
name = "Force confirm browser_agent"
|
||||
toolName = "browser_agent"
|
||||
decision = "ask_user"
|
||||
priority = 200
|
||||
`,
|
||||
);
|
||||
);
|
||||
|
||||
// Update settings.json in both project and home directories to point to the policy file
|
||||
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
|
||||
const settingsPath = join(baseDir, '.gemini', 'settings.json');
|
||||
if (existsSync(settingsPath)) {
|
||||
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
|
||||
settings.policyPaths = [policyFile];
|
||||
// Ensure folder trust is enabled
|
||||
settings.security = settings.security || {};
|
||||
settings.security.folderTrust = settings.security.folderTrust || {};
|
||||
settings.security.folderTrust.enabled = true;
|
||||
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
|
||||
// Update settings.json in both project and home directories to point to the policy file
|
||||
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
|
||||
const settingsPath = join(baseDir, '.gemini', 'settings.json');
|
||||
if (existsSync(settingsPath)) {
|
||||
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
|
||||
settings.policyPaths = [policyFile];
|
||||
// Ensure folder trust is enabled
|
||||
settings.security = settings.security || {};
|
||||
settings.security.folderTrust = settings.security.folderTrust || {};
|
||||
settings.security.folderTrust.enabled = true;
|
||||
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const run = await rig.runInteractive({
|
||||
approvalMode: 'default',
|
||||
env: {
|
||||
GEMINI_CLI_INTEGRATION_TEST: 'true',
|
||||
},
|
||||
});
|
||||
const run = await rig.runInteractive({
|
||||
approvalMode: 'default',
|
||||
env: {
|
||||
GEMINI_CLI_INTEGRATION_TEST: 'true',
|
||||
},
|
||||
});
|
||||
|
||||
await run.sendKeys(
|
||||
'Open https://example.com and check if there is a heading\r',
|
||||
);
|
||||
await run.sendKeys('\r');
|
||||
await run.sendKeys(
|
||||
'Open https://example.com and check if there is a heading\r',
|
||||
);
|
||||
await run.sendKeys('\r');
|
||||
|
||||
// Handle confirmations.
|
||||
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
|
||||
await poll(
|
||||
() => stripAnsi(run.output).toLowerCase().includes('action required'),
|
||||
60000,
|
||||
1000,
|
||||
);
|
||||
await run.sendKeys('1\r');
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
// Handle confirmations.
|
||||
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
|
||||
await poll(
|
||||
() => stripAnsi(run.output).toLowerCase().includes('action required'),
|
||||
60000,
|
||||
1000,
|
||||
);
|
||||
await run.sendKeys('1\r');
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
|
||||
// Handle privacy notice
|
||||
await poll(
|
||||
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
|
||||
5000,
|
||||
100,
|
||||
);
|
||||
await run.sendKeys('1\r');
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
// Handle privacy notice
|
||||
await poll(
|
||||
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
|
||||
5000,
|
||||
100,
|
||||
);
|
||||
await run.sendKeys('1\r');
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
|
||||
await poll(
|
||||
() => {
|
||||
const stripped = stripAnsi(run.output).toLowerCase();
|
||||
return (
|
||||
stripped.includes('new_page') &&
|
||||
stripped.includes('allow all server tools for this session')
|
||||
);
|
||||
},
|
||||
60000,
|
||||
1000,
|
||||
);
|
||||
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
|
||||
await poll(
|
||||
() => {
|
||||
const stripped = stripAnsi(run.output).toLowerCase();
|
||||
return (
|
||||
stripped.includes('new_page') &&
|
||||
stripped.includes('allow all server tools for this session')
|
||||
);
|
||||
},
|
||||
60000,
|
||||
1000,
|
||||
);
|
||||
|
||||
// Select "Allow all server tools for this session" (option 3)
|
||||
await run.sendKeys('3\r');
|
||||
await new Promise((r) => setTimeout(r, 30000));
|
||||
// Select "Allow all server tools for this session" (option 3)
|
||||
await run.sendKeys('3\r');
|
||||
await new Promise((r) => setTimeout(r, 30000));
|
||||
|
||||
const output = stripAnsi(run.output).toLowerCase();
|
||||
const output = stripAnsi(run.output).toLowerCase();
|
||||
|
||||
expect(output).toContain('browser_agent');
|
||||
expect(output).toContain('completed successfully');
|
||||
});
|
||||
expect(output).toContain('browser_agent');
|
||||
expect(output).toContain('completed successfully');
|
||||
},
|
||||
);
|
||||
|
||||
it('should show the visible warning when browser agent starts in existing session mode', async () => {
|
||||
rig.setup('browser-session-warning', {
|
||||
|
||||
@@ -121,6 +121,7 @@ describe('file-system', () => {
|
||||
|
||||
const result = await rig.run({
|
||||
args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
|
||||
timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
|
||||
});
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('write_file');
|
||||
|
||||
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
|
||||
expect(browserManager).toBeDefined();
|
||||
});
|
||||
|
||||
it('should call printOutput when provided', async () => {
|
||||
it('should not call printOutput for internal setup messages', async () => {
|
||||
const printOutput = vi.fn();
|
||||
|
||||
await createBrowserAgentDefinition(
|
||||
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
|
||||
printOutput,
|
||||
);
|
||||
|
||||
expect(printOutput).toHaveBeenCalled();
|
||||
expect(printOutput).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should create definition with correct structure', async () => {
|
||||
|
||||
@@ -53,7 +53,7 @@ import {
|
||||
export async function createBrowserAgentDefinition(
|
||||
config: Config,
|
||||
messageBus: MessageBus,
|
||||
printOutput?: (msg: string) => void,
|
||||
_printOutput?: (msg: string) => void,
|
||||
): Promise<{
|
||||
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
|
||||
browserManager: BrowserManager;
|
||||
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
|
||||
const browserManager = BrowserManager.getInstance(config);
|
||||
await browserManager.ensureConnection();
|
||||
|
||||
if (printOutput) {
|
||||
printOutput('Browser connected with isolated MCP client.');
|
||||
}
|
||||
debugLogger.log('Browser connected with isolated MCP client.');
|
||||
|
||||
// Determine if input blocker should be active (non-headless + enabled)
|
||||
const shouldDisableInput = config.shouldDisableBrowserUserInput();
|
||||
// Inject automation overlay and input blocker if not in headless mode
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig?.customConfig?.headless) {
|
||||
if (printOutput) {
|
||||
printOutput('Injecting automation overlay...');
|
||||
}
|
||||
debugLogger.log('Injecting automation overlay...');
|
||||
await injectAutomationOverlay(browserManager);
|
||||
if (shouldDisableInput) {
|
||||
if (printOutput) {
|
||||
printOutput('Injecting input blocker...');
|
||||
}
|
||||
debugLogger.log('Injecting input blocker...');
|
||||
await injectInputBlocker(browserManager);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { type AgentLoopContext } from '../../config/agent-loop-context.js';
|
||||
import { LocalAgentExecutor } from '../local-executor.js';
|
||||
import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
|
||||
import {
|
||||
BaseToolInvocation,
|
||||
type ToolResult,
|
||||
@@ -30,6 +29,7 @@ import {
|
||||
type SubagentActivityEvent,
|
||||
type SubagentProgress,
|
||||
type SubagentActivityItem,
|
||||
AgentTerminateMode,
|
||||
isToolActivityError,
|
||||
} from '../types.js';
|
||||
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
||||
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
AgentInputs,
|
||||
ToolResult
|
||||
> {
|
||||
private readonly agentName: string;
|
||||
|
||||
constructor(
|
||||
private readonly context: AgentLoopContext,
|
||||
params: AgentInputs,
|
||||
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
_toolName?: string,
|
||||
_toolDisplayName?: string,
|
||||
) {
|
||||
const resolvedName = _toolName ?? 'browser_agent';
|
||||
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
|
||||
super(
|
||||
params,
|
||||
messageBus,
|
||||
_toolName ?? 'browser_agent',
|
||||
resolvedName,
|
||||
_toolDisplayName ?? 'Browser Agent',
|
||||
);
|
||||
this.agentName = resolvedName;
|
||||
}
|
||||
|
||||
private get config(): Config {
|
||||
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
// Send initial state
|
||||
const initialProgress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
agentName: this.agentName,
|
||||
recentActivity: [],
|
||||
state: 'running',
|
||||
};
|
||||
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
}
|
||||
updateOutput({
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
agentName: this.agentName,
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'running',
|
||||
} as SubagentProgress);
|
||||
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
|
||||
const progress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
agentName: this.agentName,
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'running',
|
||||
};
|
||||
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
|
||||
const output = await executor.run(this.params, signal);
|
||||
|
||||
const displayResult = safeJsonToMarkdown(output.result);
|
||||
|
||||
const resultContent = `Browser agent finished.
|
||||
Termination Reason: ${output.terminate_reason}
|
||||
Result:
|
||||
${output.result}`;
|
||||
|
||||
const displayContent = `
|
||||
Browser Agent Finished
|
||||
// Map terminate_reason to the correct SubagentProgress state.
|
||||
// GOAL = agent completed its task normally.
|
||||
// ABORTED = user cancelled.
|
||||
// Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
|
||||
let progressState: SubagentProgress['state'];
|
||||
if (output.terminate_reason === AgentTerminateMode.ABORTED) {
|
||||
progressState = 'cancelled';
|
||||
} else if (output.terminate_reason === AgentTerminateMode.GOAL) {
|
||||
progressState = 'completed';
|
||||
} else {
|
||||
progressState = 'error';
|
||||
}
|
||||
|
||||
Termination Reason: ${output.terminate_reason}
|
||||
|
||||
Result:
|
||||
${displayResult}
|
||||
`;
|
||||
const progress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this.agentName,
|
||||
recentActivity: [...recentActivity],
|
||||
state: progressState,
|
||||
result: output.result,
|
||||
terminateReason: output.terminate_reason,
|
||||
};
|
||||
|
||||
if (updateOutput) {
|
||||
updateOutput({
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'completed',
|
||||
} as SubagentProgress);
|
||||
updateOutput(progress);
|
||||
}
|
||||
|
||||
return {
|
||||
llmContent: [{ text: resultContent }],
|
||||
returnDisplay: displayContent,
|
||||
returnDisplay: progress,
|
||||
};
|
||||
} catch (error) {
|
||||
const rawErrorMessage =
|
||||
@@ -343,7 +353,7 @@ ${displayResult}
|
||||
|
||||
const progress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
agentName: this.agentName,
|
||||
recentActivity: [...recentActivity],
|
||||
state: isAbort ? 'cancelled' : 'error',
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user