Feat/browser agent progress emission (#21218)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-06-13 04:48:09 -07:00 · 2026-03-09 22:56:00 +05:30
parent d485e08606
commit 759575faa8
4 changed files with 847 additions and 32 deletions
@@ -9,7 +9,11 @@ import { BrowserAgentInvocation } from './browserAgentInvocation.js';
 import { makeFakeConfig } from '../../test-utils/config.js';
 import type { Config } from '../../config/config.js';
 import type { MessageBus } from '../../confirmation-bus/message-bus.js';
-import type { AgentInputs } from '../types.js';
+import {
+  type AgentInputs,
+  type SubagentProgress,
+  type SubagentActivityEvent,
+} from '../types.js';

 // Mock dependencies before imports
 vi.mock('../../utils/debugLogger.js', () => ({
@@ -19,6 +23,24 @@ vi.mock('../../utils/debugLogger.js', () => ({
  },
 }));

+vi.mock('./browserAgentFactory.js', () => ({
+  createBrowserAgentDefinition: vi.fn(),
+  cleanupBrowserAgent: vi.fn(),
+}));
+
+vi.mock('../local-executor.js', () => ({
+  LocalAgentExecutor: {
+    create: vi.fn(),
+  },
+}));
+
+import {
+  createBrowserAgentDefinition,
+  cleanupBrowserAgent,
+} from './browserAgentFactory.js';
+import { LocalAgentExecutor } from '../local-executor.js';
+import type { ToolLiveOutput } from '../../tools/tools.js';
+
 describe('BrowserAgentInvocation', () => {
  let mockConfig: Config;
  let mockMessageBus: MessageBus;
@@ -136,4 +158,473 @@ describe('BrowserAgentInvocation', () => {
      expect(locations).toEqual([]);
    });
  });
+
+  describe('execute', () => {
+    let mockExecutor: { run: ReturnType<typeof vi.fn> };
+
+    beforeEach(() => {
+      vi.mocked(createBrowserAgentDefinition).mockResolvedValue({
+        definition: {
+          name: 'browser_agent',
+          description: 'mock definition',
+          kind: 'local',
+          inputConfig: {} as never,
+          outputConfig: {} as never,
+          processOutput: () => '',
+          modelConfig: { model: 'test' },
+          runConfig: {},
+          promptConfig: { query: '', systemPrompt: '' },
+          toolConfig: { tools: ['analyze_screenshot', 'click'] },
+        },
+        browserManager: {} as never,
+      });
+
+      mockExecutor = {
+        run: vi.fn().mockResolvedValue({
+          result: JSON.stringify({ success: true }),
+          terminate_reason: 'GOAL',
+        }),
+      };
+
+      vi.mocked(LocalAgentExecutor.create).mockResolvedValue(
+        mockExecutor as never,
+      );
+      vi.mocked(cleanupBrowserAgent).mockClear();
+    });
+
+    it('should return result text and call cleanup on success', async () => {
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const controller = new AbortController();
+      const updateOutput: (output: ToolLiveOutput) => void = vi.fn();
+
+      const result = await invocation.execute(controller.signal, updateOutput);
+
+      expect(Array.isArray(result.llmContent)).toBe(true);
+      expect((result.llmContent as Array<{ text: string }>)[0].text).toContain(
+        'Browser agent finished',
+      );
+      expect(cleanupBrowserAgent).toHaveBeenCalled();
+    });
+
+    it('should work without updateOutput (fire-and-forget)', async () => {
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const controller = new AbortController();
+      // Should not throw even with no updateOutput
+      await expect(
+        invocation.execute(controller.signal),
+      ).resolves.toBeDefined();
+    });
+
+    it('should return error result when executor throws', async () => {
+      mockExecutor.run.mockRejectedValue(new Error('Unexpected crash'));
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const controller = new AbortController();
+      const result = await invocation.execute(controller.signal);
+
+      expect(result.error).toBeDefined();
+      expect(cleanupBrowserAgent).toHaveBeenCalled();
+    });
+
+    // ─── Structured SubagentProgress emission tests ───────────────────────
+
+    /**
+     * Helper: sets up LocalAgentExecutor.create to capture the onActivity
+     * callback so tests can fire synthetic activity events.
+     */
+    function setupActivityCapture(): {
+      capturedOnActivity: () => SubagentActivityEvent | undefined;
+      fireActivity: (event: SubagentActivityEvent) => void;
+    } {
+      let onActivityFn: ((e: SubagentActivityEvent) => void) | undefined;
+
+      vi.mocked(LocalAgentExecutor.create).mockImplementation(
+        async (_def, _config, onActivity) => {
+          onActivityFn = onActivity;
+          return mockExecutor as never;
+        },
+      );
+
+      return {
+        capturedOnActivity: () => undefined,
+        fireActivity: (event: SubagentActivityEvent) => {
+          onActivityFn?.(event);
+        },
+      };
+    }
+
+    it('should emit initial SubagentProgress with running state', async () => {
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      await invocation.execute(new AbortController().signal, updateOutput);
+
+      const firstCall = updateOutput.mock.calls[0]?.[0] as SubagentProgress;
+      expect(firstCall.isSubagentProgress).toBe(true);
+      expect(firstCall.state).toBe('running');
+      expect(firstCall.recentActivity).toEqual([]);
+    });
+
+    it('should emit completed SubagentProgress on success', async () => {
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      await invocation.execute(new AbortController().signal, updateOutput);
+
+      const lastCall = updateOutput.mock.calls[
+        updateOutput.mock.calls.length - 1
+      ]?.[0] as SubagentProgress;
+      expect(lastCall.isSubagentProgress).toBe(true);
+      expect(lastCall.state).toBe('completed');
+    });
+
+    it('should handle THOUGHT_CHUNK and emit structured progress', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      // Allow createBrowserAgentDefinition to resolve and onActivity to be registered
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'THOUGHT_CHUNK',
+        data: { text: 'Navigating to the page...' },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const thoughtProgress = progressCalls.find((p) =>
+        p.recentActivity.some(
+          (a) =>
+            a.type === 'thought' &&
+            a.content.includes('Navigating to the page...'),
+        ),
+      );
+      expect(thoughtProgress).toBeDefined();
+    });
+
+    it('should handle TOOL_CALL_START and TOOL_CALL_END with callId tracking', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_START',
+        data: {
+          name: 'navigate_browser',
+          callId: 'call-1',
+          args: { url: 'https://example.com' },
+        },
+      });
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_END',
+        data: { name: 'navigate_browser', id: 'call-1' },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      // After TOOL_CALL_END, the tool should be completed
+      const finalProgress = progressCalls[progressCalls.length - 1];
+      const toolItem = finalProgress?.recentActivity.find(
+        (a) => a.type === 'tool_call' && a.content === 'navigate_browser',
+      );
+      expect(toolItem).toBeDefined();
+      expect(toolItem?.status).toBe('completed');
+    });
+
+    it('should sanitize sensitive data in tool call args', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_START',
+        data: {
+          name: 'fill_form',
+          callId: 'call-2',
+          args: { password: 'supersecret123', url: 'https://example.com' },
+        },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const toolItem = progressCalls
+        .flatMap((p) => p.recentActivity)
+        .find((a) => a.type === 'tool_call' && a.content === 'fill_form');
+
+      expect(toolItem).toBeDefined();
+      expect(toolItem?.args).not.toContain('supersecret123');
+      expect(toolItem?.args).toContain('[REDACTED]');
+    });
+
+    it('should handle ERROR event with callId and mark tool as errored', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_START',
+        data: { name: 'click_element', callId: 'call-3', args: {} },
+      });
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'ERROR',
+        data: { error: 'Element not found', callId: 'call-3' },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const allItems = progressCalls.flatMap((p) => p.recentActivity);
+      const toolItem = allItems.find(
+        (a) => a.type === 'tool_call' && a.content === 'click_element',
+      );
+      expect(toolItem?.status).toBe('error');
+    });
+
+    it('should sanitize sensitive data in ERROR event messages', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'ERROR',
+        data: { error: 'Auth failed: api_key=sk-secret-abc1234567890' },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const errorItem = progressCalls
+        .flatMap((p) => p.recentActivity)
+        .find((a) => a.type === 'thought' && a.status === 'error');
+
+      expect(errorItem).toBeDefined();
+      expect(errorItem?.content).not.toContain('sk-secret-abc1234567890');
+      expect(errorItem?.content).toContain('[REDACTED]');
+    });
+
+    it('should sanitize inline PEM content in error messages', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'ERROR',
+        data: {
+          error:
+            'Failed to authenticate:\n-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA12345...\n-----END RSA PRIVATE KEY-----\nPlease check credentials.',
+        },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const errorItem = progressCalls
+        .flatMap((p) => p.recentActivity)
+        .find((a) => a.type === 'thought' && a.status === 'error');
+
+      expect(errorItem).toBeDefined();
+      expect(errorItem?.content).toContain('[REDACTED_PEM]');
+      expect(errorItem?.content).not.toContain('-----BEGIN');
+    });
+
+    it('should mark all running tools as errored when ERROR has no callId', async () => {
+      const { fireActivity } = setupActivityCapture();
+      const updateOutput = vi.fn();
+
+      const invocation = new BrowserAgentInvocation(
+        mockConfig,
+        mockParams,
+        mockMessageBus,
+      );
+
+      const executePromise = invocation.execute(
+        new AbortController().signal,
+        updateOutput,
+      );
+
+      await Promise.resolve();
+      await Promise.resolve();
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_START',
+        data: { name: 'tool_a', callId: 'c1', args: {} },
+      });
+
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'TOOL_CALL_START',
+        data: { name: 'tool_b', callId: 'c2', args: {} },
+      });
+
+      // ERROR with no callId should mark ALL running tools as error
+      fireActivity({
+        isSubagentActivityEvent: true,
+        agentName: 'browser_agent',
+        type: 'ERROR',
+        data: { error: 'Agent crashed' },
+      });
+
+      await executePromise;
+
+      const progressCalls = updateOutput.mock.calls
+        .map((c) => c[0] as SubagentProgress)
+        .filter((p) => p.isSubagentProgress);
+
+      const allItems = progressCalls.flatMap((p) => p.recentActivity);
+      const toolA = allItems.find(
+        (a) => a.type === 'tool_call' && a.content === 'tool_a',
+      );
+      const toolB = allItems.find(
+        (a) => a.type === 'tool_call' && a.content === 'tool_b',
+      );
+
+      // Both should be error since no callId was specified
+      expect(toolA?.status).toBe('error');
+      expect(toolB?.status).toBe('error');
+    });
+  });
 });
@@ -14,6 +14,7 @@
 * The MCP tools are only available in the browser agent's isolated registry.
 */

+import { randomUUID } from 'node:crypto';
 import type { Config } from '../../config/config.js';
 import { LocalAgentExecutor } from '../local-executor.js';
 import {
@@ -22,7 +23,12 @@ import {
  type ToolLiveOutput,
 } from '../../tools/tools.js';
 import { ToolErrorType } from '../../tools/tool-error.js';
-import type { AgentInputs, SubagentActivityEvent } from '../types.js';
+import {
+  type AgentInputs,
+  type SubagentActivityEvent,
+  type SubagentProgress,
+  type SubagentActivityItem,
+} from '../types.js';
 import type { MessageBus } from '../../confirmation-bus/message-bus.js';
 import {
  createBrowserAgentDefinition,
@@ -31,6 +37,134 @@ import {

 const INPUT_PREVIEW_MAX_LENGTH = 50;
 const DESCRIPTION_MAX_LENGTH = 200;
+const MAX_RECENT_ACTIVITY = 20;
+
+/**
+ * Sensitive key patterns used for redaction.
+ */
+const SENSITIVE_KEY_PATTERNS = [
+  'password',
+  'pwd',
+  'apikey',
+  'api_key',
+  'api-key',
+  'token',
+  'secret',
+  'credential',
+  'auth',
+  'authorization',
+  'access_token',
+  'access_key',
+  'refresh_token',
+  'session_id',
+  'cookie',
+  'passphrase',
+  'privatekey',
+  'private_key',
+  'private-key',
+  'secret_key',
+  'client_secret',
+  'client_id',
+];
+
+/**
+ * Sanitizes tool arguments by recursively redacting sensitive fields.
+ * Supports nested objects and arrays.
+ */
+function sanitizeToolArgs(args: unknown): unknown {
+  if (typeof args === 'string') {
+    return sanitizeErrorMessage(args);
+  }
+  if (typeof args !== 'object' || args === null) {
+    return args;
+  }
+
+  if (Array.isArray(args)) {
+    return args.map(sanitizeToolArgs);
+  }
+
+  const sanitized: Record<string, unknown> = {};
+
+  for (const [key, value] of Object.entries(args)) {
+    // Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey)
+    let decodedKey = key;
+    try {
+      decodedKey = decodeURIComponent(key);
+    } catch {
+      // Ignore decoding errors
+    }
+    const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, '');
+    const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) =>
+      keyNormalized.includes(pattern.replace(/[-_]/g, '')),
+    );
+    if (isSensitive) {
+      sanitized[key] = '[REDACTED]';
+    } else {
+      sanitized[key] = sanitizeToolArgs(value);
+    }
+  }
+
+  return sanitized;
+}
+
+/**
+ * Sanitizes error messages by redacting potential sensitive data patterns.
+ * Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values.
+ */
+function sanitizeErrorMessage(message: string): string {
+  if (!message) return message;
+
+  let sanitized = message;
+
+  // 1. Redact inline PEM content
+  sanitized = sanitized.replace(
+    /-----BEGIN\s+[\w\s]+-----[\s\S]*?-----END\s+[\w\s]+-----/g,
+    '[REDACTED_PEM]',
+  );
+
+  const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`;
+  const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`;
+
+  // 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag)
+  const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) =>
+    p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
+  ).join('|');
+
+  const keyWithDelimiter = new RegExp(
+    `((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`,
+    'gi',
+  );
+  sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]');
+
+  // 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret")
+  const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`;
+  const spaceKeywords = [
+    ...SENSITIVE_KEY_PATTERNS.map((p) =>
+      p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
+    ),
+    'bearer',
+  ];
+  const spaceSeparated = new RegExp(
+    `\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`,
+    'gi',
+  );
+  sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]');
+
+  // 4. Handle file path redaction
+  sanitized = sanitized.replace(
+    /((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi,
+    '/path/to/[REDACTED].key',
+  );
+
+  return sanitized;
+}
+
+/**
+ * Sanitizes LLM thought content by redacting sensitive data patterns.
+ */
+function sanitizeThoughtContent(text: string): string {
+  return sanitizeErrorMessage(text);
+}

 /**
 * Browser agent invocation with async tool setup.
@@ -88,15 +222,41 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
    updateOutput?: (output: ToolLiveOutput) => void,
  ): Promise<ToolResult> {
    let browserManager;
+    let recentActivity: SubagentActivityItem[] = [];

    try {
      if (updateOutput) {
-        updateOutput('🌐 Starting browser agent...\n');
+        // Send initial state
+        const initialProgress: SubagentProgress = {
+          isSubagentProgress: true,
+          agentName: this['_toolName'] ?? 'browser_agent',
+          recentActivity: [],
+          state: 'running',
+        };
+        updateOutput(initialProgress);
      }

      // Create definition with MCP tools
+      // Note: printOutput is used for low-level connection logs before agent starts
      const printOutput = updateOutput
-        ? (msg: string) => updateOutput(`🌐 ${msg}\n`)
+        ? (msg: string) => {
+            const sanitizedMsg = sanitizeThoughtContent(msg);
+            recentActivity.push({
+              id: randomUUID(),
+              type: 'thought',
+              content: sanitizedMsg,
+              status: 'completed',
+            });
+            if (recentActivity.length > MAX_RECENT_ACTIVITY) {
+              recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
+            }
+            updateOutput({
+              isSubagentProgress: true,
+              agentName: this['_toolName'] ?? 'browser_agent',
+              recentActivity: [...recentActivity],
+              state: 'running',
+            } as SubagentProgress);
+          }
        : undefined;

      const result = await createBrowserAgentDefinition(
@@ -107,22 +267,141 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
      const { definition } = result;
      browserManager = result.browserManager;

-      if (updateOutput) {
-        updateOutput(
-          `🌐 Browser connected. Tools: ${definition.toolConfig?.tools.length ?? 0}\n`,
-        );
-      }
-
      // Create activity callback for streaming output
      const onActivity = (activity: SubagentActivityEvent): void => {
        if (!updateOutput) return;

-        if (
-          activity.type === 'THOUGHT_CHUNK' &&
-          // eslint-disable-next-line no-restricted-syntax
-          typeof activity.data['text'] === 'string'
-        ) {
-          updateOutput(`🌐💭 ${activity.data['text']}`);
+        let updated = false;
+
+        switch (activity.type) {
+          case 'THOUGHT_CHUNK': {
+            const text = String(activity.data['text']);
+            const lastItem = recentActivity[recentActivity.length - 1];
+            if (
+              lastItem &&
+              lastItem.type === 'thought' &&
+              lastItem.status === 'running'
+            ) {
+              lastItem.content = sanitizeThoughtContent(
+                lastItem.content + text,
+              );
+            } else {
+              recentActivity.push({
+                id: randomUUID(),
+                type: 'thought',
+                content: sanitizeThoughtContent(text),
+                status: 'running',
+              });
+            }
+            updated = true;
+            break;
+          }
+          case 'TOOL_CALL_START': {
+            const name = String(activity.data['name']);
+            const displayName = activity.data['displayName']
+              ? sanitizeErrorMessage(String(activity.data['displayName']))
+              : undefined;
+            const description = activity.data['description']
+              ? sanitizeErrorMessage(String(activity.data['description']))
+              : undefined;
+            const args = JSON.stringify(
+              sanitizeToolArgs(activity.data['args']),
+            );
+            const callId = activity.data['callId']
+              ? String(activity.data['callId'])
+              : randomUUID();
+            recentActivity.push({
+              id: callId,
+              type: 'tool_call',
+              content: name,
+              displayName,
+              description,
+              args,
+              status: 'running',
+            });
+            updated = true;
+            break;
+          }
+          case 'TOOL_CALL_END': {
+            const callId = activity.data['id']
+              ? String(activity.data['id'])
+              : undefined;
+            // Find the tool call by ID
+            // Find the tool call by ID
+            for (let i = recentActivity.length - 1; i >= 0; i--) {
+              if (
+                recentActivity[i].type === 'tool_call' &&
+                callId != null &&
+                recentActivity[i].id === callId &&
+                recentActivity[i].status === 'running'
+              ) {
+                recentActivity[i].status = 'completed';
+                updated = true;
+                break;
+              }
+            }
+            break;
+          }
+          case 'ERROR': {
+            const error = String(activity.data['error']);
+            const isCancellation = error === 'Request cancelled.';
+            const callId = activity.data['callId']
+              ? String(activity.data['callId'])
+              : undefined;
+            const newStatus = isCancellation ? 'cancelled' : 'error';
+
+            if (callId) {
+              // Mark the specific tool as error/cancelled
+              for (let i = recentActivity.length - 1; i >= 0; i--) {
+                if (
+                  recentActivity[i].type === 'tool_call' &&
+                  recentActivity[i].id === callId &&
+                  recentActivity[i].status === 'running'
+                ) {
+                  recentActivity[i].status = newStatus;
+                  updated = true;
+                  break;
+                }
+              }
+            } else {
+              // No specific tool — mark ALL running tool_call items
+              for (const item of recentActivity) {
+                if (item.type === 'tool_call' && item.status === 'running') {
+                  item.status = newStatus;
+                  updated = true;
+                }
+              }
+            }
+
+            // Sanitize the error message before emitting
+            const sanitizedError = sanitizeErrorMessage(error);
+            recentActivity.push({
+              id: randomUUID(),
+              type: 'thought',
+              content: isCancellation
+                ? sanitizedError
+                : `Error: ${sanitizedError}`,
+              status: newStatus,
+            });
+            updated = true;
+            break;
+          }
+          default:
+            break;
+        }
+
+        if (updated) {
+          if (recentActivity.length > MAX_RECENT_ACTIVITY) {
+            recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
+          }
+
+          const progress: SubagentProgress = {
+            isSubagentProgress: true,
+            agentName: this['_toolName'] ?? 'browser_agent',
+            recentActivity: [...recentActivity],
+            state: 'running',
+          };
+          updateOutput(progress);
        }
      };

@@ -149,17 +428,52 @@ Result:
 ${output.result}
 `;

+      if (updateOutput) {
+        updateOutput({
+          isSubagentProgress: true,
+          agentName: this['_toolName'] ?? 'browser_agent',
+          recentActivity: [...recentActivity],
+          state: 'completed',
+        } as SubagentProgress);
+      }
+
      return {
        llmContent: [{ text: resultContent }],
        returnDisplay: displayContent,
      };
    } catch (error) {
-      const errorMessage =
+      const rawErrorMessage =
        error instanceof Error ? error.message : String(error);
+      const isAbort =
+        (error instanceof Error && error.name === 'AbortError') ||
+        rawErrorMessage.includes('Aborted');
+      const errorMessage = sanitizeErrorMessage(rawErrorMessage);
+
+      // Mark any running items as error/cancelled
+      for (const item of recentActivity) {
+        if (item.status === 'running') {
+          item.status = isAbort ? 'cancelled' : 'error';
+        }
+      }
+
+      const progress: SubagentProgress = {
+        isSubagentProgress: true,
+        agentName: this['_toolName'] ?? 'browser_agent',
+        recentActivity: [...recentActivity],
+        state: isAbort ? 'cancelled' : 'error',
+      };
+
+      if (updateOutput) {
+        updateOutput(progress);
+      }
+
+      const llmContent = isAbort
+        ? 'Browser agent execution was aborted.'
+        : `Browser agent failed. Error: ${errorMessage}`;

      return {
-        llmContent: `Browser agent failed. Error: ${errorMessage}`,
-        returnDisplay: `Browser Agent Failed\nError: ${errorMessage}`,
+        llmContent: [{ text: llmContent }],
+        returnDisplay: progress,
        error: {
          message: errorMessage,
          type: ToolErrorType.EXECUTION_FAILED,
@@ -927,11 +927,11 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'ERROR',
-          data: {
+          data: expect.objectContaining({
            context: 'tool_call',
            name: TASK_COMPLETE_TOOL_NAME,
            error: expectedError,
-          },
+          }),
        }),
      );

@@ -1213,11 +1213,11 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'ERROR',
-          data: {
+          data: expect.objectContaining({
            context: 'tool_call',
            name: TASK_COMPLETE_TOOL_NAME,
            error: expect.stringContaining('Output validation failed'),
-          },
+          }),
        }),
      );

@@ -1338,11 +1338,11 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'ERROR',
-          data: {
+          data: expect.objectContaining({
            context: 'tool_call',
            name: LS_TOOL_NAME,
            error: toolErrorMessage,
-          },
+          }),
        }),
      );

@@ -1699,15 +1699,17 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'THOUGHT_CHUNK',
-          data: {
+          data: expect.objectContaining({
            text: 'Execution limit reached (MAX_TURNS). Attempting one final recovery turn with a grace period.',
-          },
+          }),
        }),
      );
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'THOUGHT_CHUNK',
-          data: { text: 'Graceful recovery succeeded.' },
+          data: expect.objectContaining({
+            text: 'Graceful recovery succeeded.',
+          }),
        }),
      );
    });
@@ -1784,9 +1786,9 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'THOUGHT_CHUNK',
-          data: {
+          data: expect.objectContaining({
            text: 'Execution limit reached (ERROR_NO_COMPLETE_TASK_CALL). Attempting one final recovery turn with a grace period.',
-          },
+          }),
        }),
      );
    });
@@ -1882,9 +1884,9 @@ describe('LocalAgentExecutor', () => {
      expect(activities).toContainEqual(
        expect.objectContaining({
          type: 'THOUGHT_CHUNK',
-          data: {
+          data: expect.objectContaining({
            text: 'Execution limit reached (TIMEOUT). Attempting one final recovery turn with a grace period.',
-          },
+          }),
        }),
      );
    });
@@ -902,6 +902,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
        displayName,
        description,
        args,
+        callId,
      });

      if (toolName === TASK_COMPLETE_TOOL_NAME) {
@@ -969,6 +970,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
            });
            this.emitActivity('TOOL_CALL_END', {
              name: toolName,
+              id: callId,
              output: 'Output submitted and task completed.',
            });
          } else {
@@ -985,6 +987,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
            this.emitActivity('ERROR', {
              context: 'tool_call',
              name: toolName,
+              callId,
              error,
            });
          }
@@ -1009,6 +1012,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
            });
            this.emitActivity('TOOL_CALL_END', {
              name: toolName,
+              id: callId,
              output: 'Result submitted and task completed.',
            });
          } else {
@@ -1026,6 +1030,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
            this.emitActivity('ERROR', {
              context: 'tool_call',
              name: toolName,
+              callId,
              error,
            });
          }
@@ -1086,18 +1091,21 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
        if (call.status === 'success') {
          this.emitActivity('TOOL_CALL_END', {
            name: toolName,
+            id: call.request.callId,
            output: call.response.resultDisplay,
          });
        } else if (call.status === 'error') {
          this.emitActivity('ERROR', {
            context: 'tool_call',
            name: toolName,
+            callId: call.request.callId,
            error: call.response.error?.message || 'Unknown error',
          });
        } else if (call.status === 'cancelled') {
          this.emitActivity('ERROR', {
            context: 'tool_call',
            name: toolName,
+            callId: call.request.callId,
            error: 'Request cancelled.',
          });
          aborted = true;