feat(agent): replace the runtime npx for browser agent chrome devtool mcp with pre-built bundle (#22213)

Co-authored-by: Gaurav Ghosh <gaghosh@google.com>
Co-authored-by: Gaurav <39389231+gsquared94@users.noreply.github.com>
This commit is contained in:
cynthialong0-0
2026-03-16 01:05:38 -07:00
committed by GitHub
parent 17b37144a9
commit 366aa84395
12 changed files with 763 additions and 252 deletions
@@ -0,0 +1,22 @@
{
"description": "Explicitly promoted tools from chrome-devtools-mcp for the gemini-cli browser agent.",
"targetVersion": "0.19.0",
"exclude": [
{
"name": "lighthouse",
"reason": "3.5 MB pre-built bundle — not needed for gemini-cli browser agent's core tasks."
},
{
"name": "performance",
"reason": "Depends on chrome-devtools-frontend TraceEngine (~800 KB) — not needed for core tasks."
},
{
"name": "screencast",
"reason": "Requires ffmpeg at runtime — not a common browser agent use case and adds external dependency."
},
{
"name": "extensions",
"reason": "Extension management not relevant for the gemini-cli browser agent's current scope."
}
]
}
@@ -24,6 +24,7 @@ const mockBrowserManager = {
{ name: 'click', description: 'Click element' },
{ name: 'fill', description: 'Fill form field' },
{ name: 'navigate_page', description: 'Navigate to URL' },
{ name: 'type_text', description: 'Type text into an element' },
// Visual tools (from --experimental-vision)
{ name: 'click_at', description: 'Click at coordinates' },
]),
@@ -70,6 +71,7 @@ describe('browserAgentFactory', () => {
{ name: 'click', description: 'Click element' },
{ name: 'fill', description: 'Fill form field' },
{ name: 'navigate_page', description: 'Navigate to URL' },
{ name: 'type_text', description: 'Type text into an element' },
// Visual tools (from --experimental-vision)
{ name: 'click_at', description: 'Click at coordinates' },
]);
@@ -135,7 +137,7 @@ describe('browserAgentFactory', () => {
);
expect(definition.name).toBe(BROWSER_AGENT_NAME);
// 5 MCP tools + 1 type_text composite tool (no analyze_screenshot without visualModel)
// 6 MCP tools (no analyze_screenshot without visualModel)
expect(definition.toolConfig?.tools).toHaveLength(6);
});
@@ -228,7 +230,7 @@ describe('browserAgentFactory', () => {
mockMessageBus,
);
// 5 MCP tools + 1 type_text + 1 analyze_screenshot
// 6 MCP tools + 1 analyze_screenshot
expect(definition.toolConfig?.tools).toHaveLength(7);
const toolNames =
definition.toolConfig?.tools
@@ -268,6 +270,7 @@ describe('browserAgentFactory', () => {
{ name: 'close_page', description: 'Close page' },
{ name: 'select_page', description: 'Select page' },
{ name: 'press_key', description: 'Press key' },
{ name: 'type_text', description: 'Type text into an element' },
{ name: 'hover', description: 'Hover element' },
]);
@@ -291,7 +294,6 @@ describe('browserAgentFactory', () => {
expect(toolNames).toContain('click');
expect(toolNames).toContain('take_snapshot');
expect(toolNames).toContain('press_key');
// Custom composite tool must also be present
expect(toolNames).toContain('type_text');
// Total: 9 MCP + 1 type_text (no analyze_screenshot without visualModel)
expect(definition.toolConfig?.tools).toHaveLength(10);
@@ -39,6 +39,7 @@ vi.mock('@modelcontextprotocol/sdk/client/stdio.js', () => ({
vi.mock('../../utils/debugLogger.js', () => ({
debugLogger: {
log: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
},
}));
@@ -47,6 +48,20 @@ vi.mock('./automationOverlay.js', () => ({
injectAutomationOverlay: vi.fn().mockResolvedValue(undefined),
}));
vi.mock('node:fs', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:fs')>();
return {
...actual,
existsSync: vi.fn((p: string) => {
if (p.endsWith('bundled/chrome-devtools-mcp.mjs')) {
return false; // Default
}
return actual.existsSync(p);
}),
};
});
import * as fs from 'node:fs';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
@@ -96,6 +111,40 @@ describe('BrowserManager', () => {
vi.restoreAllMocks();
});
describe('MCP bundled path resolution', () => {
it('should use bundled path if it exists (handles bundled CLI)', async () => {
vi.mocked(fs.existsSync).mockReturnValue(true);
const manager = new BrowserManager(mockConfig);
await manager.ensureConnection();
expect(StdioClientTransport).toHaveBeenCalledWith(
expect.objectContaining({
command: 'node',
args: expect.arrayContaining([
expect.stringMatching(/bundled\/chrome-devtools-mcp\.mjs$/),
]),
}),
);
});
it('should fall back to development path if bundled path does not exist', async () => {
vi.mocked(fs.existsSync).mockReturnValue(false);
const manager = new BrowserManager(mockConfig);
await manager.ensureConnection();
expect(StdioClientTransport).toHaveBeenCalledWith(
expect.objectContaining({
command: 'node',
args: expect.arrayContaining([
expect.stringMatching(
/(dist\/)?bundled\/chrome-devtools-mcp\.mjs$/,
),
]),
}),
);
});
});
describe('getRawMcpClient', () => {
it('should ensure connection and return raw MCP client', async () => {
const manager = new BrowserManager(mockConfig);
@@ -222,10 +271,9 @@ describe('BrowserManager', () => {
// Verify StdioClientTransport was created with correct args
expect(StdioClientTransport).toHaveBeenCalledWith(
expect.objectContaining({
command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
command: 'node',
args: expect.arrayContaining([
'-y',
expect.stringMatching(/chrome-devtools-mcp@/),
expect.stringMatching(/chrome-devtools-mcp\.mjs$/),
'--experimental-vision',
]),
}),
@@ -235,6 +283,7 @@ describe('BrowserManager', () => {
?.args as string[];
expect(args).not.toContain('--isolated');
expect(args).not.toContain('--autoConnect');
expect(args).not.toContain('-y');
// Persistent mode should set the default --userDataDir under ~/.gemini
expect(args).toContain('--userDataDir');
const userDataDirIndex = args.indexOf('--userDataDir');
@@ -294,7 +343,7 @@ describe('BrowserManager', () => {
expect(StdioClientTransport).toHaveBeenCalledWith(
expect.objectContaining({
command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
command: 'node',
args: expect.arrayContaining(['--headless']),
}),
);
@@ -319,7 +368,7 @@ describe('BrowserManager', () => {
expect(StdioClientTransport).toHaveBeenCalledWith(
expect.objectContaining({
command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
command: 'node',
args: expect.arrayContaining(['--userDataDir', '/path/to/profile']),
}),
);
@@ -25,10 +25,12 @@ import type { Config } from '../../config/config.js';
import { Storage } from '../../config/storage.js';
import { injectInputBlocker } from './inputBlocker.js';
import * as path from 'node:path';
import * as fs from 'node:fs';
import { fileURLToPath } from 'node:url';
import { injectAutomationOverlay } from './automationOverlay.js';
// Pin chrome-devtools-mcp version for reproducibility.
const CHROME_DEVTOOLS_MCP_VERSION = '0.17.1';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Default browser profile directory name within ~/.gemini/
const BROWSER_PROFILE_DIR = 'cli-browser-profile';
@@ -279,7 +281,7 @@ export class BrowserManager {
this.rawMcpClient = undefined;
}
// Close transport (this terminates the npx process and browser)
// Close transport (this terminates the browser)
if (this.mcpTransport) {
try {
await this.mcpTransport.close();
@@ -297,8 +299,7 @@ export class BrowserManager {
/**
* Connects to chrome-devtools-mcp which manages the browser process.
*
* Spawns npx chrome-devtools-mcp with:
* - --isolated: Manages its own browser instance
* Spawns node with the bundled chrome-devtools-mcp.mjs.
* - --experimental-vision: Enables visual tools (click_at, etc.)
*
* IMPORTANT: This does NOT use McpClientManager and does NOT register
@@ -323,11 +324,7 @@ export class BrowserManager {
const browserConfig = this.config.getBrowserAgentConfig();
const sessionMode = browserConfig.customConfig.sessionMode ?? 'persistent';
const mcpArgs = [
'-y',
`chrome-devtools-mcp@${CHROME_DEVTOOLS_MCP_VERSION}`,
'--experimental-vision',
];
const mcpArgs = ['--experimental-vision'];
// Session mode determines how the browser is managed:
// - "isolated": Temp profile, cleaned up after session (--isolated)
@@ -373,15 +370,28 @@ export class BrowserManager {
}
debugLogger.log(
`Launching chrome-devtools-mcp (${sessionMode} mode) with args: ${mcpArgs.join(' ')}`,
`Launching bundled chrome-devtools-mcp (${sessionMode} mode) with args: ${mcpArgs.join(' ')}`,
);
// Create stdio transport to npx chrome-devtools-mcp.
// Create stdio transport to the bundled chrome-devtools-mcp.
// stderr is piped (not inherited) to prevent MCP server banners and
// warnings from corrupting the UI in alternate buffer mode.
let bundleMcpPath = path.resolve(
__dirname,
'bundled/chrome-devtools-mcp.mjs',
);
if (!fs.existsSync(bundleMcpPath)) {
bundleMcpPath = path.resolve(
__dirname,
__dirname.includes(`${path.sep}dist${path.sep}`)
? '../../../bundled/chrome-devtools-mcp.mjs'
: '../../../dist/bundled/chrome-devtools-mcp.mjs',
);
}
this.mcpTransport = new StdioClientTransport({
command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
args: mcpArgs,
command: 'node',
args: [bundleMcpPath, ...mcpArgs],
stderr: 'pipe',
});
@@ -492,8 +502,7 @@ export class BrowserManager {
`Timed out connecting to Chrome: ${message}\n\n` +
`Possible causes:\n` +
` 1. Chrome is not installed or not in PATH\n` +
` 2. npx cannot download chrome-devtools-mcp (check network/proxy)\n` +
` 3. Chrome failed to start (try setting headless: true in settings.json)`,
` 2. Chrome failed to start (try setting headless: true in settings.json)`,
);
}
@@ -68,18 +68,19 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
expect(tools).toHaveLength(3);
expect(tools).toHaveLength(2);
expect(tools[0].name).toBe('take_snapshot');
expect(tools[1].name).toBe('click');
expect(tools[2].name).toBe('type_text');
});
it('should return tools with correct description', async () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
// Descriptions include augmented hints, so we check they contain the original
@@ -93,6 +94,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const schema = tools[0].schema;
@@ -106,6 +108,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[0].build({ verbose: true });
@@ -118,6 +121,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[0].build({});
@@ -131,6 +135,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[1].build({ uid: 'elem-123' });
@@ -149,6 +154,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[0].build({ verbose: true });
@@ -167,6 +173,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[1].build({ uid: 'invalid' });
@@ -184,6 +191,7 @@ describe('mcpToolWrapper', () => {
const tools = await createMcpDeclarativeTools(
mockBrowserManager,
mockMessageBus,
false,
);
const invocation = tools[0].build({});
@@ -175,144 +175,6 @@ class McpToolInvocation extends BaseToolInvocation<
}
}
/**
* Composite tool invocation that types a full string by calling press_key
* for each character internally, avoiding N model round-trips.
*/
class TypeTextInvocation extends BaseToolInvocation<
Record<string, unknown>,
ToolResult
> {
constructor(
private readonly browserManager: BrowserManager,
private readonly text: string,
private readonly submitKey: string | undefined,
messageBus: MessageBus,
) {
super({ text, submitKey }, messageBus, 'type_text', 'type_text');
}
getDescription(): string {
const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`;
return this.submitKey
? `type_text: ${preview} + ${this.submitKey}`
: `type_text: ${preview}`;
}
protected override async getConfirmationDetails(
_abortSignal: AbortSignal,
): Promise<ToolCallConfirmationDetails | false> {
if (!this.messageBus) {
return false;
}
return {
type: 'mcp',
title: `Confirm Tool: type_text`,
serverName: 'browser-agent',
toolName: 'type_text',
toolDisplayName: 'type_text',
onConfirm: async (outcome: ToolConfirmationOutcome) => {
await this.publishPolicyUpdate(outcome);
},
};
}
override getPolicyUpdateOptions(
_outcome: ToolConfirmationOutcome,
): PolicyUpdateOptions | undefined {
return {
mcpName: 'browser-agent',
};
}
override async execute(signal: AbortSignal): Promise<ToolResult> {
try {
if (signal.aborted) {
return {
llmContent: 'Error: Operation cancelled before typing started.',
returnDisplay: 'Operation cancelled before typing started.',
error: { message: 'Operation cancelled' },
};
}
await this.typeCharByChar(signal);
// Optionally press a submit key (Enter, Tab, etc.) after typing
if (this.submitKey && !signal.aborted) {
const keyResult = await this.browserManager.callTool(
'press_key',
{ key: this.submitKey },
signal,
);
if (keyResult.isError) {
const errText = this.extractErrorText(keyResult);
debugLogger.warn(
`type_text: submitKey("${this.submitKey}") failed: ${errText}`,
);
}
}
const summary = this.submitKey
? `Successfully typed "${this.text}" and pressed ${this.submitKey}`
: `Successfully typed "${this.text}"`;
return {
llmContent: summary,
returnDisplay: summary,
};
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
// Chrome connection errors are fatal
if (errorMsg.includes('Could not connect to Chrome')) {
throw error;
}
debugLogger.error(`type_text failed: ${errorMsg}`);
return {
llmContent: `Error: ${errorMsg}`,
returnDisplay: `Error: ${errorMsg}`,
error: { message: errorMsg },
};
}
}
/** Types each character via individual press_key MCP calls. */
private async typeCharByChar(signal: AbortSignal): Promise<void> {
const chars = [...this.text]; // Handle Unicode correctly
for (const char of chars) {
if (signal.aborted) return;
// Map special characters to key names
const key = char === ' ' ? 'Space' : char;
const result = await this.browserManager.callTool(
'press_key',
{ key },
signal,
);
if (result.isError) {
debugLogger.warn(
`type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`,
);
}
}
}
/** Extract error text from an MCP tool result. */
private extractErrorText(result: McpToolCallResult): string {
return (
result.content
?.filter(
(c: { type: string; text?: string }) => c.type === 'text' && c.text,
)
.map((c: { type: string; text?: string }) => c.text)
.join('\n') || 'Unknown error'
);
}
}
/**
* DeclarativeTool wrapper for an MCP tool.
*/
@@ -353,65 +215,6 @@ class McpDeclarativeTool extends DeclarativeTool<
}
}
/**
* DeclarativeTool for the custom type_text composite tool.
*/
class TypeTextDeclarativeTool extends DeclarativeTool<
Record<string, unknown>,
ToolResult
> {
constructor(
private readonly browserManager: BrowserManager,
messageBus: MessageBus,
) {
super(
'type_text',
'type_text',
'Types a full text string into the currently focused element. ' +
'Much faster than calling press_key for each character individually. ' +
'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' +
'The element must already be focused (e.g., after a click). ' +
'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).',
Kind.Other,
{
type: 'object',
properties: {
text: {
type: 'string',
description: 'The text to type into the focused element.',
},
submitKey: {
type: 'string',
description:
'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' +
'Useful for submitting form fields or moving to the next cell in a spreadsheet.',
},
},
required: ['text'],
},
messageBus,
/* isOutputMarkdown */ true,
/* canUpdateOutput */ false,
);
}
build(
params: Record<string, unknown>,
): ToolInvocation<Record<string, unknown>, ToolResult> {
const submitKey =
// eslint-disable-next-line no-restricted-syntax
typeof params['submitKey'] === 'string' && params['submitKey']
? params['submitKey']
: undefined;
return new TypeTextInvocation(
this.browserManager,
String(params['text'] ?? ''),
submitKey,
this.messageBus,
);
}
}
/**
* Creates DeclarativeTool instances from dynamically discovered MCP tools,
* plus custom composite tools (like type_text).
@@ -423,13 +226,14 @@ class TypeTextDeclarativeTool extends DeclarativeTool<
*
* @param browserManager The browser manager with isolated MCP client
* @param messageBus Message bus for tool invocations
* @param shouldDisableInput Whether input should be disabled for this agent
* @returns Array of DeclarativeTools that dispatch to the isolated MCP client
*/
export async function createMcpDeclarativeTools(
browserManager: BrowserManager,
messageBus: MessageBus,
shouldDisableInput: boolean = false,
): Promise<Array<McpDeclarativeTool | TypeTextDeclarativeTool>> {
): Promise<McpDeclarativeTool[]> {
// Get dynamically discovered tools from the MCP server
const mcpTools = await browserManager.getDiscoveredTools();
@@ -438,29 +242,25 @@ export async function createMcpDeclarativeTools(
(shouldDisableInput ? ' (input blocker enabled)' : ''),
);
const tools: Array<McpDeclarativeTool | TypeTextDeclarativeTool> =
mcpTools.map((mcpTool) => {
const schema = convertMcpToolToFunctionDeclaration(mcpTool);
// Augment description with uid-context hints
const augmentedDescription = augmentToolDescription(
mcpTool.name,
mcpTool.description ?? '',
);
return new McpDeclarativeTool(
browserManager,
mcpTool.name,
augmentedDescription,
schema.parametersJsonSchema,
messageBus,
shouldDisableInput,
);
});
// Add custom composite tools
tools.push(new TypeTextDeclarativeTool(browserManager, messageBus));
const tools: McpDeclarativeTool[] = mcpTools.map((mcpTool) => {
const schema = convertMcpToolToFunctionDeclaration(mcpTool);
// Augment description with uid-context hints
const augmentedDescription = augmentToolDescription(
mcpTool.name,
mcpTool.description ?? '',
);
return new McpDeclarativeTool(
browserManager,
mcpTool.name,
augmentedDescription,
schema.parametersJsonSchema,
messageBus,
shouldDisableInput,
);
});
debugLogger.log(
`Total tools registered: ${tools.length} (${mcpTools.length} MCP + 1 custom)`,
`Total tools registered: ${tools.length} (${mcpTools.length} MCP)`,
);
return tools;