diff --git a/packages/core/src/agents/browser/browserAgentInvocation.test.ts b/packages/core/src/agents/browser/browserAgentInvocation.test.ts index b58a9c409e..daf5309479 100644 --- a/packages/core/src/agents/browser/browserAgentInvocation.test.ts +++ b/packages/core/src/agents/browser/browserAgentInvocation.test.ts @@ -9,7 +9,11 @@ import { BrowserAgentInvocation } from './browserAgentInvocation.js'; import { makeFakeConfig } from '../../test-utils/config.js'; import type { Config } from '../../config/config.js'; import type { MessageBus } from '../../confirmation-bus/message-bus.js'; -import type { AgentInputs } from '../types.js'; +import { + type AgentInputs, + type SubagentProgress, + type SubagentActivityEvent, +} from '../types.js'; // Mock dependencies before imports vi.mock('../../utils/debugLogger.js', () => ({ @@ -19,6 +23,24 @@ vi.mock('../../utils/debugLogger.js', () => ({ }, })); +vi.mock('./browserAgentFactory.js', () => ({ + createBrowserAgentDefinition: vi.fn(), + cleanupBrowserAgent: vi.fn(), +})); + +vi.mock('../local-executor.js', () => ({ + LocalAgentExecutor: { + create: vi.fn(), + }, +})); + +import { + createBrowserAgentDefinition, + cleanupBrowserAgent, +} from './browserAgentFactory.js'; +import { LocalAgentExecutor } from '../local-executor.js'; +import type { ToolLiveOutput } from '../../tools/tools.js'; + describe('BrowserAgentInvocation', () => { let mockConfig: Config; let mockMessageBus: MessageBus; @@ -136,4 +158,473 @@ describe('BrowserAgentInvocation', () => { expect(locations).toEqual([]); }); }); + + describe('execute', () => { + let mockExecutor: { run: ReturnType }; + + beforeEach(() => { + vi.mocked(createBrowserAgentDefinition).mockResolvedValue({ + definition: { + name: 'browser_agent', + description: 'mock definition', + kind: 'local', + inputConfig: {} as never, + outputConfig: {} as never, + processOutput: () => '', + modelConfig: { model: 'test' }, + runConfig: {}, + promptConfig: { query: '', systemPrompt: '' }, + toolConfig: { tools: ['analyze_screenshot', 'click'] }, + }, + browserManager: {} as never, + }); + + mockExecutor = { + run: vi.fn().mockResolvedValue({ + result: JSON.stringify({ success: true }), + terminate_reason: 'GOAL', + }), + }; + + vi.mocked(LocalAgentExecutor.create).mockResolvedValue( + mockExecutor as never, + ); + vi.mocked(cleanupBrowserAgent).mockClear(); + }); + + it('should return result text and call cleanup on success', async () => { + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const controller = new AbortController(); + const updateOutput: (output: ToolLiveOutput) => void = vi.fn(); + + const result = await invocation.execute(controller.signal, updateOutput); + + expect(Array.isArray(result.llmContent)).toBe(true); + expect((result.llmContent as Array<{ text: string }>)[0].text).toContain( + 'Browser agent finished', + ); + expect(cleanupBrowserAgent).toHaveBeenCalled(); + }); + + it('should work without updateOutput (fire-and-forget)', async () => { + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const controller = new AbortController(); + // Should not throw even with no updateOutput + await expect( + invocation.execute(controller.signal), + ).resolves.toBeDefined(); + }); + + it('should return error result when executor throws', async () => { + mockExecutor.run.mockRejectedValue(new Error('Unexpected crash')); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const controller = new AbortController(); + const result = await invocation.execute(controller.signal); + + expect(result.error).toBeDefined(); + expect(cleanupBrowserAgent).toHaveBeenCalled(); + }); + + // ─── Structured SubagentProgress emission tests ─────────────────────── + + /** + * Helper: sets up LocalAgentExecutor.create to capture the onActivity + * callback so tests can fire synthetic activity events. + */ + function setupActivityCapture(): { + capturedOnActivity: () => SubagentActivityEvent | undefined; + fireActivity: (event: SubagentActivityEvent) => void; + } { + let onActivityFn: ((e: SubagentActivityEvent) => void) | undefined; + + vi.mocked(LocalAgentExecutor.create).mockImplementation( + async (_def, _config, onActivity) => { + onActivityFn = onActivity; + return mockExecutor as never; + }, + ); + + return { + capturedOnActivity: () => undefined, + fireActivity: (event: SubagentActivityEvent) => { + onActivityFn?.(event); + }, + }; + } + + it('should emit initial SubagentProgress with running state', async () => { + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + await invocation.execute(new AbortController().signal, updateOutput); + + const firstCall = updateOutput.mock.calls[0]?.[0] as SubagentProgress; + expect(firstCall.isSubagentProgress).toBe(true); + expect(firstCall.state).toBe('running'); + expect(firstCall.recentActivity).toEqual([]); + }); + + it('should emit completed SubagentProgress on success', async () => { + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + await invocation.execute(new AbortController().signal, updateOutput); + + const lastCall = updateOutput.mock.calls[ + updateOutput.mock.calls.length - 1 + ]?.[0] as SubagentProgress; + expect(lastCall.isSubagentProgress).toBe(true); + expect(lastCall.state).toBe('completed'); + }); + + it('should handle THOUGHT_CHUNK and emit structured progress', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + // Allow createBrowserAgentDefinition to resolve and onActivity to be registered + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'THOUGHT_CHUNK', + data: { text: 'Navigating to the page...' }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const thoughtProgress = progressCalls.find((p) => + p.recentActivity.some( + (a) => + a.type === 'thought' && + a.content.includes('Navigating to the page...'), + ), + ); + expect(thoughtProgress).toBeDefined(); + }); + + it('should handle TOOL_CALL_START and TOOL_CALL_END with callId tracking', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_START', + data: { + name: 'navigate_browser', + callId: 'call-1', + args: { url: 'https://example.com' }, + }, + }); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_END', + data: { name: 'navigate_browser', id: 'call-1' }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + // After TOOL_CALL_END, the tool should be completed + const finalProgress = progressCalls[progressCalls.length - 1]; + const toolItem = finalProgress?.recentActivity.find( + (a) => a.type === 'tool_call' && a.content === 'navigate_browser', + ); + expect(toolItem).toBeDefined(); + expect(toolItem?.status).toBe('completed'); + }); + + it('should sanitize sensitive data in tool call args', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_START', + data: { + name: 'fill_form', + callId: 'call-2', + args: { password: 'supersecret123', url: 'https://example.com' }, + }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const toolItem = progressCalls + .flatMap((p) => p.recentActivity) + .find((a) => a.type === 'tool_call' && a.content === 'fill_form'); + + expect(toolItem).toBeDefined(); + expect(toolItem?.args).not.toContain('supersecret123'); + expect(toolItem?.args).toContain('[REDACTED]'); + }); + + it('should handle ERROR event with callId and mark tool as errored', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_START', + data: { name: 'click_element', callId: 'call-3', args: {} }, + }); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'ERROR', + data: { error: 'Element not found', callId: 'call-3' }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const allItems = progressCalls.flatMap((p) => p.recentActivity); + const toolItem = allItems.find( + (a) => a.type === 'tool_call' && a.content === 'click_element', + ); + expect(toolItem?.status).toBe('error'); + }); + + it('should sanitize sensitive data in ERROR event messages', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'ERROR', + data: { error: 'Auth failed: api_key=sk-secret-abc1234567890' }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const errorItem = progressCalls + .flatMap((p) => p.recentActivity) + .find((a) => a.type === 'thought' && a.status === 'error'); + + expect(errorItem).toBeDefined(); + expect(errorItem?.content).not.toContain('sk-secret-abc1234567890'); + expect(errorItem?.content).toContain('[REDACTED]'); + }); + + it('should sanitize inline PEM content in error messages', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'ERROR', + data: { + error: + 'Failed to authenticate:\n-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA12345...\n-----END RSA PRIVATE KEY-----\nPlease check credentials.', + }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const errorItem = progressCalls + .flatMap((p) => p.recentActivity) + .find((a) => a.type === 'thought' && a.status === 'error'); + + expect(errorItem).toBeDefined(); + expect(errorItem?.content).toContain('[REDACTED_PEM]'); + expect(errorItem?.content).not.toContain('-----BEGIN'); + }); + + it('should mark all running tools as errored when ERROR has no callId', async () => { + const { fireActivity } = setupActivityCapture(); + const updateOutput = vi.fn(); + + const invocation = new BrowserAgentInvocation( + mockConfig, + mockParams, + mockMessageBus, + ); + + const executePromise = invocation.execute( + new AbortController().signal, + updateOutput, + ); + + await Promise.resolve(); + await Promise.resolve(); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_START', + data: { name: 'tool_a', callId: 'c1', args: {} }, + }); + + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'TOOL_CALL_START', + data: { name: 'tool_b', callId: 'c2', args: {} }, + }); + + // ERROR with no callId should mark ALL running tools as error + fireActivity({ + isSubagentActivityEvent: true, + agentName: 'browser_agent', + type: 'ERROR', + data: { error: 'Agent crashed' }, + }); + + await executePromise; + + const progressCalls = updateOutput.mock.calls + .map((c) => c[0] as SubagentProgress) + .filter((p) => p.isSubagentProgress); + + const allItems = progressCalls.flatMap((p) => p.recentActivity); + const toolA = allItems.find( + (a) => a.type === 'tool_call' && a.content === 'tool_a', + ); + const toolB = allItems.find( + (a) => a.type === 'tool_call' && a.content === 'tool_b', + ); + + // Both should be error since no callId was specified + expect(toolA?.status).toBe('error'); + expect(toolB?.status).toBe('error'); + }); + }); }); diff --git a/packages/core/src/agents/browser/browserAgentInvocation.ts b/packages/core/src/agents/browser/browserAgentInvocation.ts index b503cc1214..3bdb4fa2d5 100644 --- a/packages/core/src/agents/browser/browserAgentInvocation.ts +++ b/packages/core/src/agents/browser/browserAgentInvocation.ts @@ -14,6 +14,7 @@ * The MCP tools are only available in the browser agent's isolated registry. */ +import { randomUUID } from 'node:crypto'; import type { Config } from '../../config/config.js'; import { LocalAgentExecutor } from '../local-executor.js'; import { @@ -22,7 +23,12 @@ import { type ToolLiveOutput, } from '../../tools/tools.js'; import { ToolErrorType } from '../../tools/tool-error.js'; -import type { AgentInputs, SubagentActivityEvent } from '../types.js'; +import { + type AgentInputs, + type SubagentActivityEvent, + type SubagentProgress, + type SubagentActivityItem, +} from '../types.js'; import type { MessageBus } from '../../confirmation-bus/message-bus.js'; import { createBrowserAgentDefinition, @@ -31,6 +37,134 @@ import { const INPUT_PREVIEW_MAX_LENGTH = 50; const DESCRIPTION_MAX_LENGTH = 200; +const MAX_RECENT_ACTIVITY = 20; + +/** + * Sensitive key patterns used for redaction. + */ +const SENSITIVE_KEY_PATTERNS = [ + 'password', + 'pwd', + 'apikey', + 'api_key', + 'api-key', + 'token', + 'secret', + 'credential', + 'auth', + 'authorization', + 'access_token', + 'access_key', + 'refresh_token', + 'session_id', + 'cookie', + 'passphrase', + 'privatekey', + 'private_key', + 'private-key', + 'secret_key', + 'client_secret', + 'client_id', +]; + +/** + * Sanitizes tool arguments by recursively redacting sensitive fields. + * Supports nested objects and arrays. + */ +function sanitizeToolArgs(args: unknown): unknown { + if (typeof args === 'string') { + return sanitizeErrorMessage(args); + } + if (typeof args !== 'object' || args === null) { + return args; + } + + if (Array.isArray(args)) { + return args.map(sanitizeToolArgs); + } + + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(args)) { + // Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey) + let decodedKey = key; + try { + decodedKey = decodeURIComponent(key); + } catch { + // Ignore decoding errors + } + const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, ''); + const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) => + keyNormalized.includes(pattern.replace(/[-_]/g, '')), + ); + if (isSensitive) { + sanitized[key] = '[REDACTED]'; + } else { + sanitized[key] = sanitizeToolArgs(value); + } + } + + return sanitized; +} + +/** + * Sanitizes error messages by redacting potential sensitive data patterns. + * Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values. + */ +function sanitizeErrorMessage(message: string): string { + if (!message) return message; + + let sanitized = message; + + // 1. Redact inline PEM content + sanitized = sanitized.replace( + /-----BEGIN\s+[\w\s]+-----[\s\S]*?-----END\s+[\w\s]+-----/g, + '[REDACTED_PEM]', + ); + + const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`; + const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`; + + // 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag) + const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) => + p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'), + ).join('|'); + + const keyWithDelimiter = new RegExp( + `((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`, + 'gi', + ); + sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]'); + + // 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret") + const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`; + const spaceKeywords = [ + ...SENSITIVE_KEY_PATTERNS.map((p) => + p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'), + ), + 'bearer', + ]; + const spaceSeparated = new RegExp( + `\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`, + 'gi', + ); + sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]'); + + // 4. Handle file path redaction + sanitized = sanitized.replace( + /((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi, + '/path/to/[REDACTED].key', + ); + + return sanitized; +} + +/** + * Sanitizes LLM thought content by redacting sensitive data patterns. + */ +function sanitizeThoughtContent(text: string): string { + return sanitizeErrorMessage(text); +} /** * Browser agent invocation with async tool setup. @@ -88,15 +222,41 @@ export class BrowserAgentInvocation extends BaseToolInvocation< updateOutput?: (output: ToolLiveOutput) => void, ): Promise { let browserManager; + let recentActivity: SubagentActivityItem[] = []; try { if (updateOutput) { - updateOutput('🌐 Starting browser agent...\n'); + // Send initial state + const initialProgress: SubagentProgress = { + isSubagentProgress: true, + agentName: this['_toolName'] ?? 'browser_agent', + recentActivity: [], + state: 'running', + }; + updateOutput(initialProgress); } // Create definition with MCP tools + // Note: printOutput is used for low-level connection logs before agent starts const printOutput = updateOutput - ? (msg: string) => updateOutput(`🌐 ${msg}\n`) + ? (msg: string) => { + const sanitizedMsg = sanitizeThoughtContent(msg); + recentActivity.push({ + id: randomUUID(), + type: 'thought', + content: sanitizedMsg, + status: 'completed', + }); + if (recentActivity.length > MAX_RECENT_ACTIVITY) { + recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY); + } + updateOutput({ + isSubagentProgress: true, + agentName: this['_toolName'] ?? 'browser_agent', + recentActivity: [...recentActivity], + state: 'running', + } as SubagentProgress); + } : undefined; const result = await createBrowserAgentDefinition( @@ -107,22 +267,141 @@ export class BrowserAgentInvocation extends BaseToolInvocation< const { definition } = result; browserManager = result.browserManager; - if (updateOutput) { - updateOutput( - `🌐 Browser connected. Tools: ${definition.toolConfig?.tools.length ?? 0}\n`, - ); - } - // Create activity callback for streaming output const onActivity = (activity: SubagentActivityEvent): void => { if (!updateOutput) return; - if ( - activity.type === 'THOUGHT_CHUNK' && - // eslint-disable-next-line no-restricted-syntax - typeof activity.data['text'] === 'string' - ) { - updateOutput(`🌐💭 ${activity.data['text']}`); + let updated = false; + + switch (activity.type) { + case 'THOUGHT_CHUNK': { + const text = String(activity.data['text']); + const lastItem = recentActivity[recentActivity.length - 1]; + if ( + lastItem && + lastItem.type === 'thought' && + lastItem.status === 'running' + ) { + lastItem.content = sanitizeThoughtContent( + lastItem.content + text, + ); + } else { + recentActivity.push({ + id: randomUUID(), + type: 'thought', + content: sanitizeThoughtContent(text), + status: 'running', + }); + } + updated = true; + break; + } + case 'TOOL_CALL_START': { + const name = String(activity.data['name']); + const displayName = activity.data['displayName'] + ? sanitizeErrorMessage(String(activity.data['displayName'])) + : undefined; + const description = activity.data['description'] + ? sanitizeErrorMessage(String(activity.data['description'])) + : undefined; + const args = JSON.stringify( + sanitizeToolArgs(activity.data['args']), + ); + const callId = activity.data['callId'] + ? String(activity.data['callId']) + : randomUUID(); + recentActivity.push({ + id: callId, + type: 'tool_call', + content: name, + displayName, + description, + args, + status: 'running', + }); + updated = true; + break; + } + case 'TOOL_CALL_END': { + const callId = activity.data['id'] + ? String(activity.data['id']) + : undefined; + // Find the tool call by ID + // Find the tool call by ID + for (let i = recentActivity.length - 1; i >= 0; i--) { + if ( + recentActivity[i].type === 'tool_call' && + callId != null && + recentActivity[i].id === callId && + recentActivity[i].status === 'running' + ) { + recentActivity[i].status = 'completed'; + updated = true; + break; + } + } + break; + } + case 'ERROR': { + const error = String(activity.data['error']); + const isCancellation = error === 'Request cancelled.'; + const callId = activity.data['callId'] + ? String(activity.data['callId']) + : undefined; + const newStatus = isCancellation ? 'cancelled' : 'error'; + + if (callId) { + // Mark the specific tool as error/cancelled + for (let i = recentActivity.length - 1; i >= 0; i--) { + if ( + recentActivity[i].type === 'tool_call' && + recentActivity[i].id === callId && + recentActivity[i].status === 'running' + ) { + recentActivity[i].status = newStatus; + updated = true; + break; + } + } + } else { + // No specific tool — mark ALL running tool_call items + for (const item of recentActivity) { + if (item.type === 'tool_call' && item.status === 'running') { + item.status = newStatus; + updated = true; + } + } + } + + // Sanitize the error message before emitting + const sanitizedError = sanitizeErrorMessage(error); + recentActivity.push({ + id: randomUUID(), + type: 'thought', + content: isCancellation + ? sanitizedError + : `Error: ${sanitizedError}`, + status: newStatus, + }); + updated = true; + break; + } + default: + break; + } + + if (updated) { + if (recentActivity.length > MAX_RECENT_ACTIVITY) { + recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY); + } + + const progress: SubagentProgress = { + isSubagentProgress: true, + agentName: this['_toolName'] ?? 'browser_agent', + recentActivity: [...recentActivity], + state: 'running', + }; + updateOutput(progress); } }; @@ -149,17 +428,52 @@ Result: ${output.result} `; + if (updateOutput) { + updateOutput({ + isSubagentProgress: true, + agentName: this['_toolName'] ?? 'browser_agent', + recentActivity: [...recentActivity], + state: 'completed', + } as SubagentProgress); + } + return { llmContent: [{ text: resultContent }], returnDisplay: displayContent, }; } catch (error) { - const errorMessage = + const rawErrorMessage = error instanceof Error ? error.message : String(error); + const isAbort = + (error instanceof Error && error.name === 'AbortError') || + rawErrorMessage.includes('Aborted'); + const errorMessage = sanitizeErrorMessage(rawErrorMessage); + + // Mark any running items as error/cancelled + for (const item of recentActivity) { + if (item.status === 'running') { + item.status = isAbort ? 'cancelled' : 'error'; + } + } + + const progress: SubagentProgress = { + isSubagentProgress: true, + agentName: this['_toolName'] ?? 'browser_agent', + recentActivity: [...recentActivity], + state: isAbort ? 'cancelled' : 'error', + }; + + if (updateOutput) { + updateOutput(progress); + } + + const llmContent = isAbort + ? 'Browser agent execution was aborted.' + : `Browser agent failed. Error: ${errorMessage}`; return { - llmContent: `Browser agent failed. Error: ${errorMessage}`, - returnDisplay: `Browser Agent Failed\nError: ${errorMessage}`, + llmContent: [{ text: llmContent }], + returnDisplay: progress, error: { message: errorMessage, type: ToolErrorType.EXECUTION_FAILED, diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index f056c73a68..f9a518ae56 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -927,11 +927,11 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'ERROR', - data: { + data: expect.objectContaining({ context: 'tool_call', name: TASK_COMPLETE_TOOL_NAME, error: expectedError, - }, + }), }), ); @@ -1213,11 +1213,11 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'ERROR', - data: { + data: expect.objectContaining({ context: 'tool_call', name: TASK_COMPLETE_TOOL_NAME, error: expect.stringContaining('Output validation failed'), - }, + }), }), ); @@ -1338,11 +1338,11 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'ERROR', - data: { + data: expect.objectContaining({ context: 'tool_call', name: LS_TOOL_NAME, error: toolErrorMessage, - }, + }), }), ); @@ -1699,15 +1699,17 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'THOUGHT_CHUNK', - data: { + data: expect.objectContaining({ text: 'Execution limit reached (MAX_TURNS). Attempting one final recovery turn with a grace period.', - }, + }), }), ); expect(activities).toContainEqual( expect.objectContaining({ type: 'THOUGHT_CHUNK', - data: { text: 'Graceful recovery succeeded.' }, + data: expect.objectContaining({ + text: 'Graceful recovery succeeded.', + }), }), ); }); @@ -1784,9 +1786,9 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'THOUGHT_CHUNK', - data: { + data: expect.objectContaining({ text: 'Execution limit reached (ERROR_NO_COMPLETE_TASK_CALL). Attempting one final recovery turn with a grace period.', - }, + }), }), ); }); @@ -1882,9 +1884,9 @@ describe('LocalAgentExecutor', () => { expect(activities).toContainEqual( expect.objectContaining({ type: 'THOUGHT_CHUNK', - data: { + data: expect.objectContaining({ text: 'Execution limit reached (TIMEOUT). Attempting one final recovery turn with a grace period.', - }, + }), }), ); }); diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index 7bbecdac7c..fd450c5efa 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -902,6 +902,7 @@ export class LocalAgentExecutor { displayName, description, args, + callId, }); if (toolName === TASK_COMPLETE_TOOL_NAME) { @@ -969,6 +970,7 @@ export class LocalAgentExecutor { }); this.emitActivity('TOOL_CALL_END', { name: toolName, + id: callId, output: 'Output submitted and task completed.', }); } else { @@ -985,6 +987,7 @@ export class LocalAgentExecutor { this.emitActivity('ERROR', { context: 'tool_call', name: toolName, + callId, error, }); } @@ -1009,6 +1012,7 @@ export class LocalAgentExecutor { }); this.emitActivity('TOOL_CALL_END', { name: toolName, + id: callId, output: 'Result submitted and task completed.', }); } else { @@ -1026,6 +1030,7 @@ export class LocalAgentExecutor { this.emitActivity('ERROR', { context: 'tool_call', name: toolName, + callId, error, }); } @@ -1086,18 +1091,21 @@ export class LocalAgentExecutor { if (call.status === 'success') { this.emitActivity('TOOL_CALL_END', { name: toolName, + id: call.request.callId, output: call.response.resultDisplay, }); } else if (call.status === 'error') { this.emitActivity('ERROR', { context: 'tool_call', name: toolName, + callId: call.request.callId, error: call.response.error?.message || 'Unknown error', }); } else if (call.status === 'cancelled') { this.emitActivity('ERROR', { context: 'tool_call', name: toolName, + callId: call.request.callId, error: 'Request cancelled.', }); aborted = true;