mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-07-01 21:57:10 -07:00
fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)
This commit is contained in:
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
|
|||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { execSync } from 'node:child_process';
|
import { execSync } from 'node:child_process';
|
||||||
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
|
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
|
||||||
|
import { env } from 'node:process';
|
||||||
import stripAnsi from 'strip-ansi';
|
import stripAnsi from 'strip-ansi';
|
||||||
|
|
||||||
|
// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
|
||||||
|
// See: https://github.com/google-gemini/gemini-cli/issues/24382
|
||||||
|
const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
|
|||||||
await rig.cleanup();
|
await rig.cleanup();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
|
it.skipIf(isDockerSandbox)(
|
||||||
rig.setup('browser-policy-skip-confirmation', {
|
'should skip confirmation when "Allow all server tools for this session" is chosen',
|
||||||
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
|
async () => {
|
||||||
settings: {
|
rig.setup('browser-policy-skip-confirmation', {
|
||||||
agents: {
|
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
|
||||||
overrides: {
|
settings: {
|
||||||
browser_agent: {
|
agents: {
|
||||||
enabled: true,
|
overrides: {
|
||||||
|
browser_agent: {
|
||||||
|
enabled: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
browser: {
|
||||||
|
headless: true,
|
||||||
|
sessionMode: 'isolated',
|
||||||
|
allowedDomains: ['example.com'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
browser: {
|
|
||||||
headless: true,
|
|
||||||
sessionMode: 'isolated',
|
|
||||||
allowedDomains: ['example.com'],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
});
|
||||||
});
|
|
||||||
|
|
||||||
// Manually trust the folder to avoid the dialog and enable option 3
|
// Manually trust the folder to avoid the dialog and enable option 3
|
||||||
const geminiDir = join(rig.homeDir!, '.gemini');
|
const geminiDir = join(rig.homeDir!, '.gemini');
|
||||||
mkdirSync(geminiDir, { recursive: true });
|
mkdirSync(geminiDir, { recursive: true });
|
||||||
|
|
||||||
// Write to trustedFolders.json
|
// Write to trustedFolders.json
|
||||||
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
|
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
|
||||||
const trustedFolders = {
|
const trustedFolders = {
|
||||||
[rig.testDir!]: 'TRUST_FOLDER',
|
[rig.testDir!]: 'TRUST_FOLDER',
|
||||||
};
|
};
|
||||||
writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
|
writeFileSync(
|
||||||
|
trustedFoldersPath,
|
||||||
|
JSON.stringify(trustedFolders, null, 2),
|
||||||
|
);
|
||||||
|
|
||||||
// Force confirmation for browser agent.
|
// Force confirmation for browser agent.
|
||||||
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
|
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
|
||||||
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
|
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
|
||||||
// a rule in the user tier (4.x) like the one from this TOML.
|
// a rule in the user tier (4.x) like the one from this TOML.
|
||||||
// By removing the explicit mcp rule, the first MCP tool will still prompt
|
// By removing the explicit mcp rule, the first MCP tool will still prompt
|
||||||
// due to default approvalMode = 'default', and then "Allow all" will correctly
|
// due to default approvalMode = 'default', and then "Allow all" will correctly
|
||||||
// bypass subsequent tools.
|
// bypass subsequent tools.
|
||||||
const policyFile = join(rig.testDir!, 'force-confirm.toml');
|
const policyFile = join(rig.testDir!, 'force-confirm.toml');
|
||||||
writeFileSync(
|
writeFileSync(
|
||||||
policyFile,
|
policyFile,
|
||||||
`
|
`
|
||||||
[[rule]]
|
[[rule]]
|
||||||
name = "Force confirm browser_agent"
|
name = "Force confirm browser_agent"
|
||||||
toolName = "browser_agent"
|
toolName = "browser_agent"
|
||||||
decision = "ask_user"
|
decision = "ask_user"
|
||||||
priority = 200
|
priority = 200
|
||||||
`,
|
`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Update settings.json in both project and home directories to point to the policy file
|
// Update settings.json in both project and home directories to point to the policy file
|
||||||
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
|
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
|
||||||
const settingsPath = join(baseDir, '.gemini', 'settings.json');
|
const settingsPath = join(baseDir, '.gemini', 'settings.json');
|
||||||
if (existsSync(settingsPath)) {
|
if (existsSync(settingsPath)) {
|
||||||
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
|
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
|
||||||
settings.policyPaths = [policyFile];
|
settings.policyPaths = [policyFile];
|
||||||
// Ensure folder trust is enabled
|
// Ensure folder trust is enabled
|
||||||
settings.security = settings.security || {};
|
settings.security = settings.security || {};
|
||||||
settings.security.folderTrust = settings.security.folderTrust || {};
|
settings.security.folderTrust = settings.security.folderTrust || {};
|
||||||
settings.security.folderTrust.enabled = true;
|
settings.security.folderTrust.enabled = true;
|
||||||
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
|
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const run = await rig.runInteractive({
|
const run = await rig.runInteractive({
|
||||||
approvalMode: 'default',
|
approvalMode: 'default',
|
||||||
env: {
|
env: {
|
||||||
GEMINI_CLI_INTEGRATION_TEST: 'true',
|
GEMINI_CLI_INTEGRATION_TEST: 'true',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
await run.sendKeys(
|
await run.sendKeys(
|
||||||
'Open https://example.com and check if there is a heading\r',
|
'Open https://example.com and check if there is a heading\r',
|
||||||
);
|
);
|
||||||
await run.sendKeys('\r');
|
await run.sendKeys('\r');
|
||||||
|
|
||||||
// Handle confirmations.
|
// Handle confirmations.
|
||||||
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
|
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
|
||||||
await poll(
|
await poll(
|
||||||
() => stripAnsi(run.output).toLowerCase().includes('action required'),
|
() => stripAnsi(run.output).toLowerCase().includes('action required'),
|
||||||
60000,
|
60000,
|
||||||
1000,
|
1000,
|
||||||
);
|
);
|
||||||
await run.sendKeys('1\r');
|
await run.sendKeys('1\r');
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
await new Promise((r) => setTimeout(r, 2000));
|
||||||
|
|
||||||
// Handle privacy notice
|
// Handle privacy notice
|
||||||
await poll(
|
await poll(
|
||||||
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
|
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
|
||||||
5000,
|
5000,
|
||||||
100,
|
100,
|
||||||
);
|
);
|
||||||
await run.sendKeys('1\r');
|
await run.sendKeys('1\r');
|
||||||
await new Promise((r) => setTimeout(r, 5000));
|
await new Promise((r) => setTimeout(r, 5000));
|
||||||
|
|
||||||
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
|
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
|
||||||
await poll(
|
await poll(
|
||||||
() => {
|
() => {
|
||||||
const stripped = stripAnsi(run.output).toLowerCase();
|
const stripped = stripAnsi(run.output).toLowerCase();
|
||||||
return (
|
return (
|
||||||
stripped.includes('new_page') &&
|
stripped.includes('new_page') &&
|
||||||
stripped.includes('allow all server tools for this session')
|
stripped.includes('allow all server tools for this session')
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
60000,
|
60000,
|
||||||
1000,
|
1000,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Select "Allow all server tools for this session" (option 3)
|
// Select "Allow all server tools for this session" (option 3)
|
||||||
await run.sendKeys('3\r');
|
await run.sendKeys('3\r');
|
||||||
await new Promise((r) => setTimeout(r, 30000));
|
await new Promise((r) => setTimeout(r, 30000));
|
||||||
|
|
||||||
const output = stripAnsi(run.output).toLowerCase();
|
const output = stripAnsi(run.output).toLowerCase();
|
||||||
|
|
||||||
expect(output).toContain('browser_agent');
|
expect(output).toContain('browser_agent');
|
||||||
expect(output).toContain('completed successfully');
|
expect(output).toContain('completed successfully');
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
it('should show the visible warning when browser agent starts in existing session mode', async () => {
|
it('should show the visible warning when browser agent starts in existing session mode', async () => {
|
||||||
rig.setup('browser-session-warning', {
|
rig.setup('browser-session-warning', {
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ describe('file-system', () => {
|
|||||||
|
|
||||||
const result = await rig.run({
|
const result = await rig.run({
|
||||||
args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
|
args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
|
||||||
|
timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
|
||||||
});
|
});
|
||||||
|
|
||||||
const foundToolCall = await rig.waitForToolCall('write_file');
|
const foundToolCall = await rig.waitForToolCall('write_file');
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
|
|||||||
expect(browserManager).toBeDefined();
|
expect(browserManager).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should call printOutput when provided', async () => {
|
it('should not call printOutput for internal setup messages', async () => {
|
||||||
const printOutput = vi.fn();
|
const printOutput = vi.fn();
|
||||||
|
|
||||||
await createBrowserAgentDefinition(
|
await createBrowserAgentDefinition(
|
||||||
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
|
|||||||
printOutput,
|
printOutput,
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(printOutput).toHaveBeenCalled();
|
expect(printOutput).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should create definition with correct structure', async () => {
|
it('should create definition with correct structure', async () => {
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ import {
|
|||||||
export async function createBrowserAgentDefinition(
|
export async function createBrowserAgentDefinition(
|
||||||
config: Config,
|
config: Config,
|
||||||
messageBus: MessageBus,
|
messageBus: MessageBus,
|
||||||
printOutput?: (msg: string) => void,
|
_printOutput?: (msg: string) => void,
|
||||||
): Promise<{
|
): Promise<{
|
||||||
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
|
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
|
||||||
browserManager: BrowserManager;
|
browserManager: BrowserManager;
|
||||||
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
|
|||||||
const browserManager = BrowserManager.getInstance(config);
|
const browserManager = BrowserManager.getInstance(config);
|
||||||
await browserManager.ensureConnection();
|
await browserManager.ensureConnection();
|
||||||
|
|
||||||
if (printOutput) {
|
debugLogger.log('Browser connected with isolated MCP client.');
|
||||||
printOutput('Browser connected with isolated MCP client.');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine if input blocker should be active (non-headless + enabled)
|
// Determine if input blocker should be active (non-headless + enabled)
|
||||||
const shouldDisableInput = config.shouldDisableBrowserUserInput();
|
const shouldDisableInput = config.shouldDisableBrowserUserInput();
|
||||||
// Inject automation overlay and input blocker if not in headless mode
|
// Inject automation overlay and input blocker if not in headless mode
|
||||||
const browserConfig = config.getBrowserAgentConfig();
|
const browserConfig = config.getBrowserAgentConfig();
|
||||||
if (!browserConfig?.customConfig?.headless) {
|
if (!browserConfig?.customConfig?.headless) {
|
||||||
if (printOutput) {
|
debugLogger.log('Injecting automation overlay...');
|
||||||
printOutput('Injecting automation overlay...');
|
|
||||||
}
|
|
||||||
await injectAutomationOverlay(browserManager);
|
await injectAutomationOverlay(browserManager);
|
||||||
if (shouldDisableInput) {
|
if (shouldDisableInput) {
|
||||||
if (printOutput) {
|
debugLogger.log('Injecting input blocker...');
|
||||||
printOutput('Injecting input blocker...');
|
|
||||||
}
|
|
||||||
await injectInputBlocker(browserManager);
|
await injectInputBlocker(browserManager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
|
|||||||
import type { Config } from '../../config/config.js';
|
import type { Config } from '../../config/config.js';
|
||||||
import { type AgentLoopContext } from '../../config/agent-loop-context.js';
|
import { type AgentLoopContext } from '../../config/agent-loop-context.js';
|
||||||
import { LocalAgentExecutor } from '../local-executor.js';
|
import { LocalAgentExecutor } from '../local-executor.js';
|
||||||
import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
|
|
||||||
import {
|
import {
|
||||||
BaseToolInvocation,
|
BaseToolInvocation,
|
||||||
type ToolResult,
|
type ToolResult,
|
||||||
@@ -30,6 +29,7 @@ import {
|
|||||||
type SubagentActivityEvent,
|
type SubagentActivityEvent,
|
||||||
type SubagentProgress,
|
type SubagentProgress,
|
||||||
type SubagentActivityItem,
|
type SubagentActivityItem,
|
||||||
|
AgentTerminateMode,
|
||||||
isToolActivityError,
|
isToolActivityError,
|
||||||
} from '../types.js';
|
} from '../types.js';
|
||||||
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
||||||
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
AgentInputs,
|
AgentInputs,
|
||||||
ToolResult
|
ToolResult
|
||||||
> {
|
> {
|
||||||
|
private readonly agentName: string;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly context: AgentLoopContext,
|
private readonly context: AgentLoopContext,
|
||||||
params: AgentInputs,
|
params: AgentInputs,
|
||||||
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
_toolName?: string,
|
_toolName?: string,
|
||||||
_toolDisplayName?: string,
|
_toolDisplayName?: string,
|
||||||
) {
|
) {
|
||||||
|
const resolvedName = _toolName ?? 'browser_agent';
|
||||||
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
|
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
|
||||||
super(
|
super(
|
||||||
params,
|
params,
|
||||||
messageBus,
|
messageBus,
|
||||||
_toolName ?? 'browser_agent',
|
resolvedName,
|
||||||
_toolDisplayName ?? 'Browser Agent',
|
_toolDisplayName ?? 'Browser Agent',
|
||||||
);
|
);
|
||||||
|
this.agentName = resolvedName;
|
||||||
}
|
}
|
||||||
|
|
||||||
private get config(): Config {
|
private get config(): Config {
|
||||||
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
// Send initial state
|
// Send initial state
|
||||||
const initialProgress: SubagentProgress = {
|
const initialProgress: SubagentProgress = {
|
||||||
isSubagentProgress: true,
|
isSubagentProgress: true,
|
||||||
agentName: this['_toolName'] ?? 'browser_agent',
|
agentName: this.agentName,
|
||||||
recentActivity: [],
|
recentActivity: [],
|
||||||
state: 'running',
|
state: 'running',
|
||||||
};
|
};
|
||||||
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
}
|
}
|
||||||
updateOutput({
|
updateOutput({
|
||||||
isSubagentProgress: true,
|
isSubagentProgress: true,
|
||||||
agentName: this['_toolName'] ?? 'browser_agent',
|
agentName: this.agentName,
|
||||||
recentActivity: [...recentActivity],
|
recentActivity: [...recentActivity],
|
||||||
state: 'running',
|
state: 'running',
|
||||||
} as SubagentProgress);
|
} as SubagentProgress);
|
||||||
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
|
|
||||||
const progress: SubagentProgress = {
|
const progress: SubagentProgress = {
|
||||||
isSubagentProgress: true,
|
isSubagentProgress: true,
|
||||||
agentName: this['_toolName'] ?? 'browser_agent',
|
agentName: this.agentName,
|
||||||
recentActivity: [...recentActivity],
|
recentActivity: [...recentActivity],
|
||||||
state: 'running',
|
state: 'running',
|
||||||
};
|
};
|
||||||
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
|||||||
|
|
||||||
const output = await executor.run(this.params, signal);
|
const output = await executor.run(this.params, signal);
|
||||||
|
|
||||||
const displayResult = safeJsonToMarkdown(output.result);
|
|
||||||
|
|
||||||
const resultContent = `Browser agent finished.
|
const resultContent = `Browser agent finished.
|
||||||
Termination Reason: ${output.terminate_reason}
|
Termination Reason: ${output.terminate_reason}
|
||||||
Result:
|
Result:
|
||||||
${output.result}`;
|
${output.result}`;
|
||||||
|
|
||||||
const displayContent = `
|
// Map terminate_reason to the correct SubagentProgress state.
|
||||||
Browser Agent Finished
|
// GOAL = agent completed its task normally.
|
||||||
|
// ABORTED = user cancelled.
|
||||||
|
// Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
|
||||||
|
let progressState: SubagentProgress['state'];
|
||||||
|
if (output.terminate_reason === AgentTerminateMode.ABORTED) {
|
||||||
|
progressState = 'cancelled';
|
||||||
|
} else if (output.terminate_reason === AgentTerminateMode.GOAL) {
|
||||||
|
progressState = 'completed';
|
||||||
|
} else {
|
||||||
|
progressState = 'error';
|
||||||
|
}
|
||||||
|
|
||||||
Termination Reason: ${output.terminate_reason}
|
const progress: SubagentProgress = {
|
||||||
|
isSubagentProgress: true,
|
||||||
Result:
|
agentName: this.agentName,
|
||||||
${displayResult}
|
recentActivity: [...recentActivity],
|
||||||
`;
|
state: progressState,
|
||||||
|
result: output.result,
|
||||||
|
terminateReason: output.terminate_reason,
|
||||||
|
};
|
||||||
|
|
||||||
if (updateOutput) {
|
if (updateOutput) {
|
||||||
updateOutput({
|
updateOutput(progress);
|
||||||
isSubagentProgress: true,
|
|
||||||
agentName: this['_toolName'] ?? 'browser_agent',
|
|
||||||
recentActivity: [...recentActivity],
|
|
||||||
state: 'completed',
|
|
||||||
} as SubagentProgress);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
llmContent: [{ text: resultContent }],
|
llmContent: [{ text: resultContent }],
|
||||||
returnDisplay: displayContent,
|
returnDisplay: progress,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const rawErrorMessage =
|
const rawErrorMessage =
|
||||||
@@ -343,7 +353,7 @@ ${displayResult}
|
|||||||
|
|
||||||
const progress: SubagentProgress = {
|
const progress: SubagentProgress = {
|
||||||
isSubagentProgress: true,
|
isSubagentProgress: true,
|
||||||
agentName: this['_toolName'] ?? 'browser_agent',
|
agentName: this.agentName,
|
||||||
recentActivity: [...recentActivity],
|
recentActivity: [...recentActivity],
|
||||||
state: isAbort ? 'cancelled' : 'error',
|
state: isAbort ? 'cancelled' : 'error',
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user