gemini-cli/packages/core/src/agents/browser/mcpToolWrapper.ts

/**
 * @license
 * Copyright 2026 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

/**
 * @fileoverview Creates DeclarativeTool classes for MCP tools.
 *
 * These tools are ONLY registered in the browser agent's isolated ToolRegistry,
 * NOT in the main agent's registry. They dispatch to the BrowserManager's
 * isolated MCP client directly.
 *
 * Tool definitions are dynamically discovered from chrome-devtools-mcp
 * at runtime, not hardcoded.
 */

import type { FunctionDeclaration } from '@google/genai';
import type { Tool as McpTool } from '@modelcontextprotocol/sdk/types.js';
import {
  type ToolConfirmationOutcome,
  DeclarativeTool,
  BaseToolInvocation,
  Kind,
  type ToolResult,
  type ToolInvocation,
  type ToolCallConfirmationDetails,
  type PolicyUpdateOptions,
} from '../../tools/tools.js';
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
import type { BrowserManager, McpToolCallResult } from './browserManager.js';
import { debugLogger } from '../../utils/debugLogger.js';

/**
 * Tool invocation that dispatches to BrowserManager's isolated MCP client.
 */
class McpToolInvocation extends BaseToolInvocation<
  Record<string, unknown>,
  ToolResult
> {
  constructor(
    protected readonly browserManager: BrowserManager,
    protected readonly toolName: string,
    params: Record<string, unknown>,
    messageBus: MessageBus,
  ) {
    super(params, messageBus, toolName, toolName);
  }

  getDescription(): string {
    return `Calling MCP tool: ${this.toolName}`;
  }

  protected override async getConfirmationDetails(
    _abortSignal: AbortSignal,
  ): Promise<ToolCallConfirmationDetails | false> {
    if (!this.messageBus) {
      return false;
    }

    return {
      type: 'mcp',
      title: `Confirm MCP Tool: ${this.toolName}`,
      serverName: 'browser-agent',
      toolName: this.toolName,
      toolDisplayName: this.toolName,
      onConfirm: async (outcome: ToolConfirmationOutcome) => {
        await this.publishPolicyUpdate(outcome);
      },
    };
  }

  override getPolicyUpdateOptions(
    _outcome: ToolConfirmationOutcome,
  ): PolicyUpdateOptions | undefined {
    return {
      mcpName: 'browser-agent',
    };
  }

  async execute(signal: AbortSignal): Promise<ToolResult> {
    try {
      const callToolPromise = this.browserManager.callTool(
        this.toolName,
        this.params,
        signal,
      );

      const result: McpToolCallResult = await callToolPromise;

      // Extract text content from MCP response
      let textContent = '';
      if (result.content && Array.isArray(result.content)) {
        textContent = result.content
          .filter((c) => c.type === 'text' && c.text)
          .map((c) => c.text)
          .join('\n');
      }

      // Post-process to add contextual hints for common error patterns
      const processedContent = postProcessToolResult(
        this.toolName,
        textContent,
      );

      if (result.isError) {
        return {
          llmContent: `Error: ${processedContent}`,
          returnDisplay: `Error: ${processedContent}`,
          error: { message: textContent },
        };
      }

      return {
        llmContent: processedContent || 'Tool executed successfully.',
        returnDisplay: processedContent || 'Tool executed successfully.',
      };
    } catch (error) {
      const errorMsg = error instanceof Error ? error.message : String(error);

      // Chrome connection errors are fatal — re-throw to terminate the agent
      // immediately instead of returning a result the LLM would retry.
      if (errorMsg.includes('Could not connect to Chrome')) {
        throw error;
      }

      debugLogger.error(`MCP tool ${this.toolName} failed: ${errorMsg}`);
      return {
        llmContent: `Error: ${errorMsg}`,
        returnDisplay: `Error: ${errorMsg}`,
        error: { message: errorMsg },
      };
    }
  }
}

/**
 * Composite tool invocation that types a full string by calling press_key
 * for each character internally, avoiding N model round-trips.
 */
class TypeTextInvocation extends BaseToolInvocation<
  Record<string, unknown>,
  ToolResult
> {
  constructor(
    private readonly browserManager: BrowserManager,
    private readonly text: string,
    private readonly submitKey: string | undefined,
    messageBus: MessageBus,
  ) {
    super({ text, submitKey }, messageBus, 'type_text', 'type_text');
  }

  getDescription(): string {
    const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`;
    return this.submitKey
      ? `type_text: ${preview} + ${this.submitKey}`
      : `type_text: ${preview}`;
  }

  protected override async getConfirmationDetails(
    _abortSignal: AbortSignal,
  ): Promise<ToolCallConfirmationDetails | false> {
    if (!this.messageBus) {
      return false;
    }

    return {
      type: 'mcp',
      title: `Confirm Tool: type_text`,
      serverName: 'browser-agent',
      toolName: 'type_text',
      toolDisplayName: 'type_text',
      onConfirm: async (outcome: ToolConfirmationOutcome) => {
        await this.publishPolicyUpdate(outcome);
      },
    };
  }

  override getPolicyUpdateOptions(
    _outcome: ToolConfirmationOutcome,
  ): PolicyUpdateOptions | undefined {
    return {
      mcpName: 'browser-agent',
    };
  }

  override async execute(signal: AbortSignal): Promise<ToolResult> {
    try {
      if (signal.aborted) {
        return {
          llmContent: 'Error: Operation cancelled before typing started.',
          returnDisplay: 'Operation cancelled before typing started.',
          error: { message: 'Operation cancelled' },
        };
      }

      await this.typeCharByChar(signal);

      // Optionally press a submit key (Enter, Tab, etc.) after typing
      if (this.submitKey && !signal.aborted) {
        const keyResult = await this.browserManager.callTool(
          'press_key',
          { key: this.submitKey },
          signal,
        );
        if (keyResult.isError) {
          const errText = this.extractErrorText(keyResult);
          debugLogger.warn(
            `type_text: submitKey("${this.submitKey}") failed: ${errText}`,
          );
        }
      }

      const summary = this.submitKey
        ? `Successfully typed "${this.text}" and pressed ${this.submitKey}`
        : `Successfully typed "${this.text}"`;

      return {
        llmContent: summary,
        returnDisplay: summary,
      };
    } catch (error) {
      const errorMsg = error instanceof Error ? error.message : String(error);

      // Chrome connection errors are fatal
      if (errorMsg.includes('Could not connect to Chrome')) {
        throw error;
      }

      debugLogger.error(`type_text failed: ${errorMsg}`);
      return {
        llmContent: `Error: ${errorMsg}`,
        returnDisplay: `Error: ${errorMsg}`,
        error: { message: errorMsg },
      };
    }
  }

  /** Types each character via individual press_key MCP calls. */
  private async typeCharByChar(signal: AbortSignal): Promise<void> {
    const chars = [...this.text]; // Handle Unicode correctly
    for (const char of chars) {
      if (signal.aborted) return;

      // Map special characters to key names
      const key = char === ' ' ? 'Space' : char;
      const result = await this.browserManager.callTool(
        'press_key',
        { key },
        signal,
      );

      if (result.isError) {
        debugLogger.warn(
          `type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`,
        );
      }
    }
  }

  /** Extract error text from an MCP tool result. */
  private extractErrorText(result: McpToolCallResult): string {
    return (
      result.content
        ?.filter(
          (c: { type: string; text?: string }) => c.type === 'text' && c.text,
        )
        .map((c: { type: string; text?: string }) => c.text)
        .join('\n') || 'Unknown error'
    );
  }
}

/**
 * DeclarativeTool wrapper for an MCP tool.
 */
class McpDeclarativeTool extends DeclarativeTool<
  Record<string, unknown>,
  ToolResult
> {
  constructor(
    protected readonly browserManager: BrowserManager,
    name: string,
    description: string,
    parameterSchema: unknown,
    messageBus: MessageBus,
  ) {
    super(
      name,
      name,
      description,
      Kind.Other,
      parameterSchema,
      messageBus,
      /* isOutputMarkdown */ true,
      /* canUpdateOutput */ false,
    );
  }

  build(
    params: Record<string, unknown>,
  ): ToolInvocation<Record<string, unknown>, ToolResult> {
    return new McpToolInvocation(
      this.browserManager,
      this.name,
      params,
      this.messageBus,
    );
  }
}

/**
 * DeclarativeTool for the custom type_text composite tool.
 */
class TypeTextDeclarativeTool extends DeclarativeTool<
  Record<string, unknown>,
  ToolResult
> {
  constructor(
    private readonly browserManager: BrowserManager,
    messageBus: MessageBus,
  ) {
    super(
      'type_text',
      'type_text',
      'Types a full text string into the currently focused element. ' +
        'Much faster than calling press_key for each character individually. ' +
        'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' +
        'The element must already be focused (e.g., after a click). ' +
        'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).',
      Kind.Other,
      {
        type: 'object',
        properties: {
          text: {
            type: 'string',
            description: 'The text to type into the focused element.',
          },
          submitKey: {
            type: 'string',
            description:
              'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' +
              'Useful for submitting form fields or moving to the next cell in a spreadsheet.',
          },
        },
        required: ['text'],
      },
      messageBus,
      /* isOutputMarkdown */ true,
      /* canUpdateOutput */ false,
    );
  }

  build(
    params: Record<string, unknown>,
  ): ToolInvocation<Record<string, unknown>, ToolResult> {
    const submitKey =
      // eslint-disable-next-line no-restricted-syntax
      typeof params['submitKey'] === 'string' && params['submitKey']
        ? params['submitKey']
        : undefined;
    return new TypeTextInvocation(
      this.browserManager,
      String(params['text'] ?? ''),
      submitKey,
      this.messageBus,
    );
  }
}

/**
 * Creates DeclarativeTool instances from dynamically discovered MCP tools,
 * plus custom composite tools (like type_text).
 *
 * These tools are registered in the browser agent's isolated ToolRegistry,
 * NOT in the main agent's registry.
 *
 * Tool definitions are fetched dynamically from the MCP server at runtime.
 *
 * @param browserManager The browser manager with isolated MCP client
 * @param messageBus Message bus for tool invocations
 * @returns Array of DeclarativeTools that dispatch to the isolated MCP client
 */
export async function createMcpDeclarativeTools(
  browserManager: BrowserManager,
  messageBus: MessageBus,
): Promise<Array<McpDeclarativeTool | TypeTextDeclarativeTool>> {
  // Get dynamically discovered tools from the MCP server
  const mcpTools = await browserManager.getDiscoveredTools();

  debugLogger.log(
    `Creating ${mcpTools.length} declarative tools for browser agent`,
  );

  const tools: Array<McpDeclarativeTool | TypeTextDeclarativeTool> =
    mcpTools.map((mcpTool) => {
      const schema = convertMcpToolToFunctionDeclaration(mcpTool);
      // Augment description with uid-context hints
      const augmentedDescription = augmentToolDescription(
        mcpTool.name,
        mcpTool.description ?? '',
      );
      return new McpDeclarativeTool(
        browserManager,
        mcpTool.name,
        augmentedDescription,
        schema.parametersJsonSchema,
        messageBus,
      );
    });

  // Add custom composite tools
  tools.push(new TypeTextDeclarativeTool(browserManager, messageBus));

  debugLogger.log(
    `Total tools registered: ${tools.length} (${mcpTools.length} MCP + 1 custom)`,
  );

  return tools;
}

/**
 * Converts MCP tool definition to Gemini FunctionDeclaration.
 */
function convertMcpToolToFunctionDeclaration(
  mcpTool: McpTool,
): FunctionDeclaration {
  // MCP tool inputSchema is a JSON Schema object
  // We pass it directly as parametersJsonSchema
  return {
    name: mcpTool.name,
    description: mcpTool.description ?? '',
    parametersJsonSchema: mcpTool.inputSchema ?? {
      type: 'object',
      properties: {},
    },
  };
}

/**
 * Augments MCP tool descriptions with usage guidance.
 * Adds semantic hints and usage rules directly in tool descriptions
 * so the model makes correct tool choices without system prompt overhead.
 *
 * Actual chrome-devtools-mcp tools:
 *   Input: click, drag, fill, fill_form, handle_dialog, hover, press_key, upload_file
 *   Navigation: close_page, list_pages, navigate_page, new_page, select_page, wait_for
 *   Emulation: emulate, resize_page
 *   Performance: performance_analyze_insight, performance_start_trace, performance_stop_trace
 *   Network: get_network_request, list_network_requests
 *   Debugging: evaluate_script, get_console_message, list_console_messages, take_screenshot, take_snapshot
 *   Vision (--experimental-vision): click_at, analyze_screenshot
 */
function augmentToolDescription(toolName: string, description: string): string {
  // More-specific keys MUST come before shorter keys to prevent
  // partial matching from short-circuiting (e.g., fill_form before fill).
  const hints: Record<string, string> = {
    fill_form:
      ' Fills multiple standard HTML form fields at once. Same limitations as fill — does not work on canvas/custom widgets.',
    fill: ' Fills standard HTML form fields (<input>, <textarea>, <select>) by uid. Does NOT work on custom/canvas-based widgets (e.g., Google Sheets cells, Notion blocks). If fill times out or fails, click the element first then use press_key with individual characters instead.',
    click_at:
      ' Clicks at exact pixel coordinates (x, y). Use when you have specific coordinates for visual elements.',
    click:
      ' Use the element uid from the accessibility tree snapshot (e.g., uid="87_4"). UIDs are invalidated after this action — call take_snapshot before using another uid.',
    hover:
      ' Use the element uid from the accessibility tree snapshot to hover over elements.',
    take_snapshot:
      ' Returns the accessibility tree with uid values for each element. Call this FIRST to see available elements, and AFTER every state-changing action (click, fill, press_key) before using any uid.',
    navigate_page:
      ' Navigate to the specified URL. Call take_snapshot after to see the new page.',
    new_page:
      ' Opens a new page/tab with the specified URL. Call take_snapshot after to see the new page.',
    press_key:
      ' Press a SINGLE keyboard key (e.g., "Enter", "Tab", "Escape", "ArrowDown", "a", "8"). ONLY accepts one key name — do NOT pass multi-character strings like "Hello" or "A1\\nEnter". To type text, use type_text instead of calling press_key for each character.',
  };

  // Check for partial matches — order matters! More-specific keys first.
  for (const [key, hint] of Object.entries(hints)) {
    if (toolName.toLowerCase().includes(key)) {
      return description + hint;
    }
  }

  return description;
}

/**
 * Post-processes tool results to add contextual hints for common error patterns.
 * This helps the agent recover from overlay blocking, element not found, etc.
 * Also strips embedded snapshots to prevent token bloat.
 */
export function postProcessToolResult(
  toolName: string,
  result: string,
): string {
  // Strip embedded snapshots to prevent token bloat (except for take_snapshot,
  // whose accessibility tree the model needs for uid-based interactions).
  let processedResult = result;

  if (
    toolName !== 'take_snapshot' &&
    result.includes('## Latest page snapshot')
  ) {
    const parts = result.split('## Latest page snapshot');
    processedResult = parts[0].trim();
    if (parts[1]) {
      debugLogger.log('Stripped embedded snapshot from tool response');
    }
  }

  // Detect overlay/interactable issues
  const overlayPatterns = [
    'not interactable',
    'obscured',
    'intercept',
    'blocked',
    'element is not visible',
    'element not found',
  ];

  const isOverlayIssue = overlayPatterns.some((pattern) =>
    processedResult.toLowerCase().includes(pattern),
  );

  if (isOverlayIssue && (toolName === 'click' || toolName.includes('click'))) {
    return (
      processedResult +
      '\n\n⚠️ This action may have been blocked by an overlay, popup, or tooltip. ' +
      'Look for close/dismiss buttons (×, Close, "Got it", "Accept") in the accessibility tree and click them first.'
    );
  }

  // Detect stale element references
  if (
    processedResult.toLowerCase().includes('stale') ||
    processedResult.toLowerCase().includes('detached')
  ) {
    return (
      processedResult +
      '\n\n⚠️ The element reference is stale. Call take_snapshot to get fresh element uids.'
    );
  }

  return processedResult;
}