/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ /** * @fileoverview Creates DeclarativeTool classes for MCP tools. * * These tools are ONLY registered in the browser agent's isolated ToolRegistry, * NOT in the main agent's registry. They dispatch to the BrowserManager's * isolated MCP client directly. * * Tool definitions are dynamically discovered from chrome-devtools-mcp * at runtime, not hardcoded. */ import type { FunctionDeclaration } from '@google/genai'; import type { Tool as McpTool } from '@modelcontextprotocol/sdk/types.js'; import { type ToolConfirmationOutcome, DeclarativeTool, BaseToolInvocation, Kind, type ToolResult, type ToolInvocation, type ToolCallConfirmationDetails, type PolicyUpdateOptions, } from '../../tools/tools.js'; import type { MessageBus } from '../../confirmation-bus/message-bus.js'; import type { BrowserManager, McpToolCallResult } from './browserManager.js'; import { debugLogger } from '../../utils/debugLogger.js'; /** * Tool invocation that dispatches to BrowserManager's isolated MCP client. */ class McpToolInvocation extends BaseToolInvocation< Record, ToolResult > { constructor( protected readonly browserManager: BrowserManager, protected readonly toolName: string, params: Record, messageBus: MessageBus, ) { super(params, messageBus, toolName, toolName); } getDescription(): string { return `Calling MCP tool: ${this.toolName}`; } protected override async getConfirmationDetails( _abortSignal: AbortSignal, ): Promise { if (!this.messageBus) { return false; } return { type: 'mcp', title: `Confirm MCP Tool: ${this.toolName}`, serverName: 'browser-agent', toolName: this.toolName, toolDisplayName: this.toolName, onConfirm: async (outcome: ToolConfirmationOutcome) => { await this.publishPolicyUpdate(outcome); }, }; } override getPolicyUpdateOptions( _outcome: ToolConfirmationOutcome, ): PolicyUpdateOptions | undefined { return { mcpName: 'browser-agent', }; } async execute(signal: AbortSignal): Promise { try { const callToolPromise = this.browserManager.callTool( this.toolName, this.params, signal, ); const result: McpToolCallResult = await callToolPromise; // Extract text content from MCP response let textContent = ''; if (result.content && Array.isArray(result.content)) { textContent = result.content .filter((c) => c.type === 'text' && c.text) .map((c) => c.text) .join('\n'); } // Post-process to add contextual hints for common error patterns const processedContent = postProcessToolResult( this.toolName, textContent, ); if (result.isError) { return { llmContent: `Error: ${processedContent}`, returnDisplay: `Error: ${processedContent}`, error: { message: textContent }, }; } return { llmContent: processedContent || 'Tool executed successfully.', returnDisplay: processedContent || 'Tool executed successfully.', }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); // Chrome connection errors are fatal — re-throw to terminate the agent // immediately instead of returning a result the LLM would retry. if (errorMsg.includes('Could not connect to Chrome')) { throw error; } debugLogger.error(`MCP tool ${this.toolName} failed: ${errorMsg}`); return { llmContent: `Error: ${errorMsg}`, returnDisplay: `Error: ${errorMsg}`, error: { message: errorMsg }, }; } } } /** * Composite tool invocation that types a full string by calling press_key * for each character internally, avoiding N model round-trips. */ class TypeTextInvocation extends BaseToolInvocation< Record, ToolResult > { constructor( private readonly browserManager: BrowserManager, private readonly text: string, private readonly submitKey: string | undefined, messageBus: MessageBus, ) { super({ text, submitKey }, messageBus, 'type_text', 'type_text'); } getDescription(): string { const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`; return this.submitKey ? `type_text: ${preview} + ${this.submitKey}` : `type_text: ${preview}`; } protected override async getConfirmationDetails( _abortSignal: AbortSignal, ): Promise { if (!this.messageBus) { return false; } return { type: 'mcp', title: `Confirm Tool: type_text`, serverName: 'browser-agent', toolName: 'type_text', toolDisplayName: 'type_text', onConfirm: async (outcome: ToolConfirmationOutcome) => { await this.publishPolicyUpdate(outcome); }, }; } override getPolicyUpdateOptions( _outcome: ToolConfirmationOutcome, ): PolicyUpdateOptions | undefined { return { mcpName: 'browser-agent', }; } override async execute(signal: AbortSignal): Promise { try { if (signal.aborted) { return { llmContent: 'Error: Operation cancelled before typing started.', returnDisplay: 'Operation cancelled before typing started.', error: { message: 'Operation cancelled' }, }; } await this.typeCharByChar(signal); // Optionally press a submit key (Enter, Tab, etc.) after typing if (this.submitKey && !signal.aborted) { const keyResult = await this.browserManager.callTool( 'press_key', { key: this.submitKey }, signal, ); if (keyResult.isError) { const errText = this.extractErrorText(keyResult); debugLogger.warn( `type_text: submitKey("${this.submitKey}") failed: ${errText}`, ); } } const summary = this.submitKey ? `Successfully typed "${this.text}" and pressed ${this.submitKey}` : `Successfully typed "${this.text}"`; return { llmContent: summary, returnDisplay: summary, }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); // Chrome connection errors are fatal if (errorMsg.includes('Could not connect to Chrome')) { throw error; } debugLogger.error(`type_text failed: ${errorMsg}`); return { llmContent: `Error: ${errorMsg}`, returnDisplay: `Error: ${errorMsg}`, error: { message: errorMsg }, }; } } /** Types each character via individual press_key MCP calls. */ private async typeCharByChar(signal: AbortSignal): Promise { const chars = [...this.text]; // Handle Unicode correctly for (const char of chars) { if (signal.aborted) return; // Map special characters to key names const key = char === ' ' ? 'Space' : char; const result = await this.browserManager.callTool( 'press_key', { key }, signal, ); if (result.isError) { debugLogger.warn( `type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`, ); } } } /** Extract error text from an MCP tool result. */ private extractErrorText(result: McpToolCallResult): string { return ( result.content ?.filter( (c: { type: string; text?: string }) => c.type === 'text' && c.text, ) .map((c: { type: string; text?: string }) => c.text) .join('\n') || 'Unknown error' ); } } /** * DeclarativeTool wrapper for an MCP tool. */ class McpDeclarativeTool extends DeclarativeTool< Record, ToolResult > { constructor( protected readonly browserManager: BrowserManager, name: string, description: string, parameterSchema: unknown, messageBus: MessageBus, ) { super( name, name, description, Kind.Other, parameterSchema, messageBus, /* isOutputMarkdown */ true, /* canUpdateOutput */ false, ); } build( params: Record, ): ToolInvocation, ToolResult> { return new McpToolInvocation( this.browserManager, this.name, params, this.messageBus, ); } } /** * DeclarativeTool for the custom type_text composite tool. */ class TypeTextDeclarativeTool extends DeclarativeTool< Record, ToolResult > { constructor( private readonly browserManager: BrowserManager, messageBus: MessageBus, ) { super( 'type_text', 'type_text', 'Types a full text string into the currently focused element. ' + 'Much faster than calling press_key for each character individually. ' + 'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' + 'The element must already be focused (e.g., after a click). ' + 'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).', Kind.Other, { type: 'object', properties: { text: { type: 'string', description: 'The text to type into the focused element.', }, submitKey: { type: 'string', description: 'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' + 'Useful for submitting form fields or moving to the next cell in a spreadsheet.', }, }, required: ['text'], }, messageBus, /* isOutputMarkdown */ true, /* canUpdateOutput */ false, ); } build( params: Record, ): ToolInvocation, ToolResult> { const submitKey = // eslint-disable-next-line no-restricted-syntax typeof params['submitKey'] === 'string' && params['submitKey'] ? params['submitKey'] : undefined; return new TypeTextInvocation( this.browserManager, String(params['text'] ?? ''), submitKey, this.messageBus, ); } } /** * Creates DeclarativeTool instances from dynamically discovered MCP tools, * plus custom composite tools (like type_text). * * These tools are registered in the browser agent's isolated ToolRegistry, * NOT in the main agent's registry. * * Tool definitions are fetched dynamically from the MCP server at runtime. * * @param browserManager The browser manager with isolated MCP client * @param messageBus Message bus for tool invocations * @returns Array of DeclarativeTools that dispatch to the isolated MCP client */ export async function createMcpDeclarativeTools( browserManager: BrowserManager, messageBus: MessageBus, ): Promise> { // Get dynamically discovered tools from the MCP server const mcpTools = await browserManager.getDiscoveredTools(); debugLogger.log( `Creating ${mcpTools.length} declarative tools for browser agent`, ); const tools: Array = mcpTools.map((mcpTool) => { const schema = convertMcpToolToFunctionDeclaration(mcpTool); // Augment description with uid-context hints const augmentedDescription = augmentToolDescription( mcpTool.name, mcpTool.description ?? '', ); return new McpDeclarativeTool( browserManager, mcpTool.name, augmentedDescription, schema.parametersJsonSchema, messageBus, ); }); // Add custom composite tools tools.push(new TypeTextDeclarativeTool(browserManager, messageBus)); debugLogger.log( `Total tools registered: ${tools.length} (${mcpTools.length} MCP + 1 custom)`, ); return tools; } /** * Converts MCP tool definition to Gemini FunctionDeclaration. */ function convertMcpToolToFunctionDeclaration( mcpTool: McpTool, ): FunctionDeclaration { // MCP tool inputSchema is a JSON Schema object // We pass it directly as parametersJsonSchema return { name: mcpTool.name, description: mcpTool.description ?? '', parametersJsonSchema: mcpTool.inputSchema ?? { type: 'object', properties: {}, }, }; } /** * Augments MCP tool descriptions with usage guidance. * Adds semantic hints and usage rules directly in tool descriptions * so the model makes correct tool choices without system prompt overhead. * * Actual chrome-devtools-mcp tools: * Input: click, drag, fill, fill_form, handle_dialog, hover, press_key, upload_file * Navigation: close_page, list_pages, navigate_page, new_page, select_page, wait_for * Emulation: emulate, resize_page * Performance: performance_analyze_insight, performance_start_trace, performance_stop_trace * Network: get_network_request, list_network_requests * Debugging: evaluate_script, get_console_message, list_console_messages, take_screenshot, take_snapshot * Vision (--experimental-vision): click_at, analyze_screenshot */ function augmentToolDescription(toolName: string, description: string): string { // More-specific keys MUST come before shorter keys to prevent // partial matching from short-circuiting (e.g., fill_form before fill). const hints: Record = { fill_form: ' Fills multiple standard HTML form fields at once. Same limitations as fill — does not work on canvas/custom widgets.', fill: ' Fills standard HTML form fields (,