diff --git a/packages/core/src/agents/browser/browserAgentDefinition.ts b/packages/core/src/agents/browser/browserAgentDefinition.ts index 0c91d81a79..84cb4fc5a9 100644 --- a/packages/core/src/agents/browser/browserAgentDefinition.ts +++ b/packages/core/src/agents/browser/browserAgentDefinition.ts @@ -74,17 +74,14 @@ When you need to identify elements by visual attributes not in the AX tree (e.g. COMPLEX WEB APPS (spreadsheets, rich editors, canvas apps): Many web apps (Google Sheets/Docs, Notion, Figma, etc.) use custom rendering rather than standard HTML inputs. - fill does NOT work on these apps. Instead, click the target element, then use type_text to enter the value. -- Navigate cells/fields using keyboard shortcuts (Tab, Enter, ArrowDown) — more reliable than clicking cell UIDs. -- For spreadsheets: click a cell → type_text("value") → press_key("Enter") to confirm and move to the next cell. +- type_text supports a submitKey parameter to press a key after typing (e.g., submitKey="Enter" to submit, submitKey="Tab" to move to the next field). This is much faster than separate press_key calls. +- Navigate cells/fields using keyboard shortcuts (Tab, Enter, ArrowDown) — more reliable than clicking UIDs. - Use the Name Box (cell reference input, usually showing "A1") to jump to specific cells. TERMINAL FAILURES — STOP IMMEDIATELY: -Some errors are unrecoverable and retrying will never help. When you see ANY of these, call complete_task immediately with success=false and include the specific remediation steps in your summary: -- "Could not connect to Chrome" — Include ALL of these instructions in your summary: - 1. Open Chrome (version 144+) - 2. Go to chrome://inspect/#remote-debugging and enable remote debugging - 3. Or change sessionMode to "persistent" in settings.json to let the agent launch its own browser -- "Browser closed" or "Target closed" or "Session closed" — The browser process has terminated. Tell the user to restart and try again. +Some errors are unrecoverable and retrying will never help. When you see ANY of these, call complete_task immediately with success=false and include the EXACT error message (including any remediation steps it contains) in your summary: +- "Could not connect to Chrome" or "Failed to connect to Chrome" or "Timed out connecting to Chrome" — Include the full error message with its remediation steps in your summary verbatim. Do NOT paraphrase or omit instructions. +- "Browser closed" or "Target closed" or "Session closed" — The browser process has terminated. Include the error and tell the user to try again. - "net::ERR_" network errors on the SAME URL after 2 retries — the site is unreachable. Report the URL and error. - Any error that appears IDENTICALLY 3+ times in a row — it will not resolve by retrying. Do NOT keep retrying terminal errors. Report them with actionable remediation steps and exit immediately. diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index 284d240ad3..d7aedebae6 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -287,6 +287,77 @@ describe('BrowserManager', () => { /chrome:\/\/inspect\/#remote-debugging/, ); }); + + it('should throw profile-lock remediation when persistent mode hits "already running"', async () => { + vi.mocked(Client).mockImplementation( + () => + ({ + connect: vi + .fn() + .mockRejectedValue( + new Error( + 'Could not connect to Chrome. The browser is already running for the current profile.', + ), + ), + close: vi.fn().mockResolvedValue(undefined), + listTools: vi.fn(), + callTool: vi.fn(), + }) as unknown as InstanceType, + ); + + // Default config = persistent mode + const manager = new BrowserManager(mockConfig); + + await expect(manager.ensureConnection()).rejects.toThrow( + /Close all Chrome windows using this profile/, + ); + const manager2 = new BrowserManager(mockConfig); + await expect(manager2.ensureConnection()).rejects.toThrow( + /Set sessionMode to "isolated"/, + ); + }); + + it('should throw timeout-specific remediation for persistent mode', async () => { + vi.mocked(Client).mockImplementation( + () => + ({ + connect: vi + .fn() + .mockRejectedValue( + new Error('Timed out connecting to chrome-devtools-mcp'), + ), + close: vi.fn().mockResolvedValue(undefined), + listTools: vi.fn(), + callTool: vi.fn(), + }) as unknown as InstanceType, + ); + + const manager = new BrowserManager(mockConfig); + + await expect(manager.ensureConnection()).rejects.toThrow( + /Chrome is not installed/, + ); + }); + + it('should include sessionMode in generic fallback error', async () => { + vi.mocked(Client).mockImplementation( + () => + ({ + connect: vi + .fn() + .mockRejectedValue(new Error('Some unexpected error')), + close: vi.fn().mockResolvedValue(undefined), + listTools: vi.fn(), + callTool: vi.fn(), + }) as unknown as InstanceType, + ); + + const manager = new BrowserManager(mockConfig); + + await expect(manager.ensureConnection()).rejects.toThrow( + /sessionMode: persistent/, + ); + }); }); describe('MCP isolation', () => { diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts index 61203e6588..1f44fb8ea2 100644 --- a/packages/core/src/agents/browser/browserManager.ts +++ b/packages/core/src/agents/browser/browserManager.ts @@ -315,19 +315,11 @@ export class BrowserManager { }), ]); } catch (error) { - // Provide actionable error for 'existing' mode failures - if (sessionMode === 'existing') { - const message = error instanceof Error ? error.message : String(error); - throw new Error( - `Failed to connect to existing Chrome instance: ${message}\n\n` + - `To use sessionMode "existing", you must:\n` + - ` 1. Open Chrome (version 144+)\n` + - ` 2. Navigate to chrome://inspect/#remote-debugging\n` + - ` 3. Enable remote debugging\n\n` + - `Alternatively, use sessionMode "persistent" (default) to launch a dedicated browser.`, - ); - } - throw error; + // Provide error-specific, session-mode-aware remediation + throw this.createConnectionError( + error instanceof Error ? error.message : String(error), + sessionMode, + ); } finally { if (timeoutId !== undefined) { clearTimeout(timeoutId); @@ -335,6 +327,72 @@ export class BrowserManager { } } + /** + * Creates an Error with context-specific remediation based on the actual + * error message and the current sessionMode. + */ + private createConnectionError(message: string, sessionMode: string): Error { + const lowerMessage = message.toLowerCase(); + + // "already running for the current profile" — persistent mode profile lock + if (lowerMessage.includes('already running')) { + if (sessionMode === 'persistent' || sessionMode === 'isolated') { + return new Error( + `Could not connect to Chrome: ${message}\n\n` + + `The Chrome profile is locked by another running instance.\n` + + `To fix this:\n` + + ` 1. Close all Chrome windows using this profile, OR\n` + + ` 2. Set sessionMode to "isolated" in settings.json to use a temporary profile, OR\n` + + ` 3. Set chromeProfilePath in settings.json to use a different profile directory`, + ); + } + // existing mode — shouldn't normally hit this, but handle gracefully + return new Error( + `Could not connect to Chrome: ${message}\n\n` + + `The Chrome profile is locked.\n` + + `Close other Chrome instances and try again.`, + ); + } + + // Timeout errors + if (lowerMessage.includes('timed out')) { + if (sessionMode === 'existing') { + return new Error( + `Timed out connecting to Chrome: ${message}\n\n` + + `To use sessionMode "existing", you must:\n` + + ` 1. Open Chrome (version 144+)\n` + + ` 2. Navigate to chrome://inspect/#remote-debugging\n` + + ` 3. Enable remote debugging\n\n` + + `Alternatively, set sessionMode to "persistent" (default) in settings.json to launch a dedicated browser.`, + ); + } + return new Error( + `Timed out connecting to Chrome: ${message}\n\n` + + `Possible causes:\n` + + ` 1. Chrome is not installed or not in PATH\n` + + ` 2. npx cannot download chrome-devtools-mcp (check network/proxy)\n` + + ` 3. Chrome failed to start (try setting headless: true in settings.json)`, + ); + } + + // Generic "existing" mode failures (connection refused, etc.) + if (sessionMode === 'existing') { + return new Error( + `Failed to connect to existing Chrome instance: ${message}\n\n` + + `To use sessionMode "existing", you must:\n` + + ` 1. Open Chrome (version 144+)\n` + + ` 2. Navigate to chrome://inspect/#remote-debugging\n` + + ` 3. Enable remote debugging\n\n` + + `Alternatively, set sessionMode to "persistent" (default) in settings.json to launch a dedicated browser.`, + ); + } + + // Generic fallback — include sessionMode for debugging context + return new Error( + `Failed to connect to Chrome (sessionMode: ${sessionMode}): ${message}`, + ); + } + /** * Discovers tools from the connected MCP server. */ diff --git a/packages/core/src/agents/browser/mcpToolWrapper.ts b/packages/core/src/agents/browser/mcpToolWrapper.ts index 7db274c58e..163f42a3ff 100644 --- a/packages/core/src/agents/browser/mcpToolWrapper.ts +++ b/packages/core/src/agents/browser/mcpToolWrapper.ts @@ -156,13 +156,17 @@ class TypeTextInvocation extends BaseToolInvocation< constructor( private readonly browserManager: BrowserManager, private readonly text: string, + private readonly submitKey: string | undefined, messageBus: MessageBus, ) { - super({ text }, messageBus, 'type_text', 'type_text'); + super({ text, submitKey }, messageBus, 'type_text', 'type_text'); } getDescription(): string { - return `type_text: "${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`; + const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`; + return this.submitKey + ? `type_text: ${preview} + ${this.submitKey}` + : `type_text: ${preview}`; } protected override async getConfirmationDetails( @@ -194,48 +198,32 @@ class TypeTextInvocation extends BaseToolInvocation< override async execute(signal: AbortSignal): Promise { try { - const chars = [...this.text]; // Handle Unicode correctly - let successCount = 0; - let lastError: string | undefined; + if (signal.aborted) { + return { + llmContent: 'Error: Operation cancelled before typing started.', + returnDisplay: 'Operation cancelled before typing started.', + error: { message: 'Operation cancelled' }, + }; + } - for (const char of chars) { - if (signal.aborted) { - return { - llmContent: `Error: Operation cancelled after typing ${successCount}/${chars.length} characters.`, - returnDisplay: `Operation cancelled after typing ${successCount}/${chars.length} characters.`, - error: { message: 'Operation cancelled' }, - }; - } + await this.typeCharByChar(signal); - // Map special characters to key names - const key = char === ' ' ? 'Space' : char; - const result: McpToolCallResult = await this.browserManager.callTool( - 'press_key', - { key }, - ); - - if (result.isError) { - const errorText = result.content - ?.filter( - (c: { type: string; text?: string }) => - c.type === 'text' && c.text, - ) - .map((c: { type: string; text?: string }) => c.text) - .join('\n'); - lastError = errorText || 'Unknown error'; - // Continue typing remaining characters on soft errors + // Optionally press a submit key (Enter, Tab, etc.) after typing + if (this.submitKey && !signal.aborted) { + const keyResult = await this.browserManager.callTool('press_key', { + key: this.submitKey, + }); + if (keyResult.isError) { + const errText = this.extractErrorText(keyResult); debugLogger.warn( - `type_text: press_key("${key}") failed: ${lastError}`, + `type_text: submitKey("${this.submitKey}") failed: ${errText}`, ); - } else { - successCount++; } } - const summary = - successCount === chars.length - ? `Successfully typed ${chars.length} characters: "${this.text}"` - : `Typed ${successCount}/${chars.length} characters. Last error: ${lastError}`; + const summary = this.submitKey + ? `Successfully typed "${this.text}" and pressed ${this.submitKey}` + : `Successfully typed "${this.text}"`; return { llmContent: summary, @@ -257,6 +245,36 @@ class TypeTextInvocation extends BaseToolInvocation< }; } } + + /** Types each character via individual press_key MCP calls. */ + private async typeCharByChar(signal: AbortSignal): Promise { + const chars = [...this.text]; // Handle Unicode correctly + for (const char of chars) { + if (signal.aborted) return; + + // Map special characters to key names + const key = char === ' ' ? 'Space' : char; + const result = await this.browserManager.callTool('press_key', { key }); + + if (result.isError) { + debugLogger.warn( + `type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`, + ); + } + } + } + + /** Extract error text from an MCP tool result. */ + private extractErrorText(result: McpToolCallResult): string { + return ( + result.content + ?.filter( + (c: { type: string; text?: string }) => c.type === 'text' && c.text, + ) + .map((c: { type: string; text?: string }) => c.text) + .join('\n') || 'Unknown error' + ); + } } /** @@ -311,19 +329,24 @@ class TypeTextDeclarativeTool extends DeclarativeTool< super( 'type_text', 'type_text', - 'Types a full text string into the currently focused element by pressing each key in sequence. ' + + 'Types a full text string into the currently focused element. ' + 'Much faster than calling press_key for each character individually. ' + - 'Use this to enter text into form fields, spreadsheet cells, or any focused input. ' + + 'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' + 'The element must already be focused (e.g., after a click). ' + - 'Does NOT press Enter at the end — call press_key("Enter") separately if needed.', + 'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).', Kind.Other, { type: 'object', properties: { text: { + type: 'string', + description: 'The text to type into the focused element.', + }, + submitKey: { type: 'string', description: - 'The text to type. Each character will be pressed in sequence.', + 'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' + + 'Useful for submitting form fields or moving to the next cell in a spreadsheet.', }, }, required: ['text'], @@ -337,9 +360,14 @@ class TypeTextDeclarativeTool extends DeclarativeTool< build( params: Record, ): ToolInvocation, ToolResult> { + const submitKey = + typeof params['submitKey'] === 'string' && params['submitKey'] + ? params['submitKey'] + : undefined; return new TypeTextInvocation( this.browserManager, String(params['text'] ?? ''), + submitKey, this.messageBus, ); } @@ -448,7 +476,7 @@ function augmentToolDescription(toolName: string, description: string): string { new_page: ' Opens a new page/tab with the specified URL. Call take_snapshot after to see the new page.', press_key: - ' Press a SINGLE keyboard key (e.g., "Enter", "Tab", "Escape", "ArrowDown", "a", "8"). ONLY accepts one key name — do NOT pass multi-character strings like "Hello" or "A1\\nEnter". To type text, call press_key once per character.', + ' Press a SINGLE keyboard key (e.g., "Enter", "Tab", "Escape", "ArrowDown", "a", "8"). ONLY accepts one key name — do NOT pass multi-character strings like "Hello" or "A1\\nEnter". To type text, use type_text instead of calling press_key for each character.', }; // Check for partial matches — order matters! More-specific keys first.