feat(browser): implement experimental browser agent (#19284)

2026-04-16 16:21:27 -07:00 · 2026-02-24 09:22:09 -08:00
parent 182c858e67
commit 9e95b8b3c5
23 changed files with 3506 additions and 1 deletions
--- a/packages/core/src/agents/browser/browserAgentInvocation.ts
+++ b/packages/core/src/agents/browser/browserAgentInvocation.ts
@@ -0,0 +1,171 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @fileoverview Browser agent invocation that handles async tool setup.
+ *
+ * Unlike regular LocalSubagentInvocation, this invocation:
+ * 1. Uses browserAgentFactory to create definition with MCP tools
+ * 2. Cleans up browser resources after execution
+ *
+ * The MCP tools are only available in the browser agent's isolated registry.
+ */
+
+import type { Config } from '../../config/config.js';
+import { LocalAgentExecutor } from '../local-executor.js';
+import type { AnsiOutput } from '../../utils/terminalSerializer.js';
+import { BaseToolInvocation, type ToolResult } from '../../tools/tools.js';
+import { ToolErrorType } from '../../tools/tool-error.js';
+import type { AgentInputs, SubagentActivityEvent } from '../types.js';
+import type { MessageBus } from '../../confirmation-bus/message-bus.js';
+import {
+  createBrowserAgentDefinition,
+  cleanupBrowserAgent,
+} from './browserAgentFactory.js';
+
+const INPUT_PREVIEW_MAX_LENGTH = 50;
+const DESCRIPTION_MAX_LENGTH = 200;
+
+/**
+ * Browser agent invocation with async tool setup.
+ *
+ * This invocation handles the browser agent's special requirements:
+ * - MCP connection and tool wrapping at invocation time
+ * - Browser cleanup after execution
+ */
+export class BrowserAgentInvocation extends BaseToolInvocation<
+  AgentInputs,
+  ToolResult
+> {
+  constructor(
+    private readonly config: Config,
+    params: AgentInputs,
+    messageBus: MessageBus,
+    _toolName?: string,
+    _toolDisplayName?: string,
+  ) {
+    // Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
+    super(
+      params,
+      messageBus,
+      _toolName ?? 'browser_agent',
+      _toolDisplayName ?? 'Browser Agent',
+    );
+  }
+
+  /**
+   * Returns a concise, human-readable description of the invocation.
+   */
+  getDescription(): string {
+    const inputSummary = Object.entries(this.params)
+      .map(
+        ([key, value]) =>
+          `${key}: ${String(value).slice(0, INPUT_PREVIEW_MAX_LENGTH)}`,
+      )
+      .join(', ');
+
+    const description = `Running browser agent with inputs: { ${inputSummary} }`;
+    return description.slice(0, DESCRIPTION_MAX_LENGTH);
+  }
+
+  /**
+   * Executes the browser agent.
+   *
+   * This method:
+   * 1. Creates browser manager and MCP connection
+   * 2. Wraps MCP tools for the isolated registry
+   * 3. Runs the agent via LocalAgentExecutor
+   * 4. Cleans up browser resources
+   */
+  async execute(
+    signal: AbortSignal,
+    updateOutput?: (output: string | AnsiOutput) => void,
+  ): Promise<ToolResult> {
+    let browserManager;
+
+    try {
+      if (updateOutput) {
+        updateOutput('🌐 Starting browser agent...\n');
+      }
+
+      // Create definition with MCP tools
+      const printOutput = updateOutput
+        ? (msg: string) => updateOutput(`🌐 ${msg}\n`)
+        : undefined;
+
+      const result = await createBrowserAgentDefinition(
+        this.config,
+        this.messageBus,
+        printOutput,
+      );
+      const { definition } = result;
+      browserManager = result.browserManager;
+
+      if (updateOutput) {
+        updateOutput(
+          `🌐 Browser connected. Tools: ${definition.toolConfig?.tools.length ?? 0}\n`,
+        );
+      }
+
+      // Create activity callback for streaming output
+      const onActivity = (activity: SubagentActivityEvent): void => {
+        if (!updateOutput) return;
+
+        if (
+          activity.type === 'THOUGHT_CHUNK' &&
+          typeof activity.data['text'] === 'string'
+        ) {
+          updateOutput(`🌐💭 ${activity.data['text']}`);
+        }
+      };
+
+      // Create and run executor with the configured definition
+      const executor = await LocalAgentExecutor.create(
+        definition,
+        this.config,
+        onActivity,
+      );
+
+      const output = await executor.run(this.params, signal);
+
+      const resultContent = `Browser agent finished.
+Termination Reason: ${output.terminate_reason}
+Result:
+${output.result}`;
+
+      const displayContent = `
+Browser Agent Finished
+
+Termination Reason: ${output.terminate_reason}
+
+Result:
+${output.result}
+`;
+
+      return {
+        llmContent: [{ text: resultContent }],
+        returnDisplay: displayContent,
+      };
+    } catch (error) {
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+
+      return {
+        llmContent: `Browser agent failed. Error: ${errorMessage}`,
+        returnDisplay: `Browser Agent Failed\nError: ${errorMessage}`,
+        error: {
+          message: errorMessage,
+          type: ToolErrorType.EXECUTION_FAILED,
+        },
+      };
+    } finally {
+      // Always cleanup browser resources
+      if (browserManager) {
+        await cleanupBrowserAgent(browserManager);
+      }
+    }
+  }
+}