feat(browser): add submitKey param to type_text and improve connection errors

- Add submitKey parameter to type_text tool for pressing Enter/Tab/etc
  after typing, eliminating a separate model round-trip per value entry
- Update system prompt and tool hints to guide model toward type_text
  with submitKey instead of per-character press_key calls
- Refactor connection error handling into createConnectionError() with
  session-mode-aware remediation messages for profile locks, timeouts,
  and generic failures
- Update terminal failure prompts to pass through error remediation
  verbatim instead of hardcoding instructions
- Add tests for profile-lock, timeout, and generic connection errors
This commit is contained in:
Gaurav Ghosh
2026-02-17 09:44:41 -08:00
parent c7ec983d31
commit 4e2856c4dd
4 changed files with 217 additions and 63 deletions
@@ -74,17 +74,14 @@ When you need to identify elements by visual attributes not in the AX tree (e.g.
COMPLEX WEB APPS (spreadsheets, rich editors, canvas apps):
Many web apps (Google Sheets/Docs, Notion, Figma, etc.) use custom rendering rather than standard HTML inputs.
- fill does NOT work on these apps. Instead, click the target element, then use type_text to enter the value.
- Navigate cells/fields using keyboard shortcuts (Tab, Enter, ArrowDown) — more reliable than clicking cell UIDs.
- For spreadsheets: click a cell → type_text("value") → press_key("Enter") to confirm and move to the next cell.
- type_text supports a submitKey parameter to press a key after typing (e.g., submitKey="Enter" to submit, submitKey="Tab" to move to the next field). This is much faster than separate press_key calls.
- Navigate cells/fields using keyboard shortcuts (Tab, Enter, ArrowDown) — more reliable than clicking UIDs.
- Use the Name Box (cell reference input, usually showing "A1") to jump to specific cells.
TERMINAL FAILURES — STOP IMMEDIATELY:
Some errors are unrecoverable and retrying will never help. When you see ANY of these, call complete_task immediately with success=false and include the specific remediation steps in your summary:
- "Could not connect to Chrome" — Include ALL of these instructions in your summary:
1. Open Chrome (version 144+)
2. Go to chrome://inspect/#remote-debugging and enable remote debugging
3. Or change sessionMode to "persistent" in settings.json to let the agent launch its own browser
- "Browser closed" or "Target closed" or "Session closed" — The browser process has terminated. Tell the user to restart and try again.
Some errors are unrecoverable and retrying will never help. When you see ANY of these, call complete_task immediately with success=false and include the EXACT error message (including any remediation steps it contains) in your summary:
- "Could not connect to Chrome" or "Failed to connect to Chrome" or "Timed out connecting to Chrome" — Include the full error message with its remediation steps in your summary verbatim. Do NOT paraphrase or omit instructions.
- "Browser closed" or "Target closed" or "Session closed" — The browser process has terminated. Include the error and tell the user to try again.
- "net::ERR_" network errors on the SAME URL after 2 retries — the site is unreachable. Report the URL and error.
- Any error that appears IDENTICALLY 3+ times in a row — it will not resolve by retrying.
Do NOT keep retrying terminal errors. Report them with actionable remediation steps and exit immediately.
@@ -287,6 +287,77 @@ describe('BrowserManager', () => {
/chrome:\/\/inspect\/#remote-debugging/,
);
});
it('should throw profile-lock remediation when persistent mode hits "already running"', async () => {
vi.mocked(Client).mockImplementation(
() =>
({
connect: vi
.fn()
.mockRejectedValue(
new Error(
'Could not connect to Chrome. The browser is already running for the current profile.',
),
),
close: vi.fn().mockResolvedValue(undefined),
listTools: vi.fn(),
callTool: vi.fn(),
}) as unknown as InstanceType<typeof Client>,
);
// Default config = persistent mode
const manager = new BrowserManager(mockConfig);
await expect(manager.ensureConnection()).rejects.toThrow(
/Close all Chrome windows using this profile/,
);
const manager2 = new BrowserManager(mockConfig);
await expect(manager2.ensureConnection()).rejects.toThrow(
/Set sessionMode to "isolated"/,
);
});
it('should throw timeout-specific remediation for persistent mode', async () => {
vi.mocked(Client).mockImplementation(
() =>
({
connect: vi
.fn()
.mockRejectedValue(
new Error('Timed out connecting to chrome-devtools-mcp'),
),
close: vi.fn().mockResolvedValue(undefined),
listTools: vi.fn(),
callTool: vi.fn(),
}) as unknown as InstanceType<typeof Client>,
);
const manager = new BrowserManager(mockConfig);
await expect(manager.ensureConnection()).rejects.toThrow(
/Chrome is not installed/,
);
});
it('should include sessionMode in generic fallback error', async () => {
vi.mocked(Client).mockImplementation(
() =>
({
connect: vi
.fn()
.mockRejectedValue(new Error('Some unexpected error')),
close: vi.fn().mockResolvedValue(undefined),
listTools: vi.fn(),
callTool: vi.fn(),
}) as unknown as InstanceType<typeof Client>,
);
const manager = new BrowserManager(mockConfig);
await expect(manager.ensureConnection()).rejects.toThrow(
/sessionMode: persistent/,
);
});
});
describe('MCP isolation', () => {
@@ -315,19 +315,11 @@ export class BrowserManager {
}),
]);
} catch (error) {
// Provide actionable error for 'existing' mode failures
if (sessionMode === 'existing') {
const message = error instanceof Error ? error.message : String(error);
throw new Error(
`Failed to connect to existing Chrome instance: ${message}\n\n` +
`To use sessionMode "existing", you must:\n` +
` 1. Open Chrome (version 144+)\n` +
` 2. Navigate to chrome://inspect/#remote-debugging\n` +
` 3. Enable remote debugging\n\n` +
`Alternatively, use sessionMode "persistent" (default) to launch a dedicated browser.`,
);
}
throw error;
// Provide error-specific, session-mode-aware remediation
throw this.createConnectionError(
error instanceof Error ? error.message : String(error),
sessionMode,
);
} finally {
if (timeoutId !== undefined) {
clearTimeout(timeoutId);
@@ -335,6 +327,72 @@ export class BrowserManager {
}
}
/**
* Creates an Error with context-specific remediation based on the actual
* error message and the current sessionMode.
*/
private createConnectionError(message: string, sessionMode: string): Error {
const lowerMessage = message.toLowerCase();
// "already running for the current profile" — persistent mode profile lock
if (lowerMessage.includes('already running')) {
if (sessionMode === 'persistent' || sessionMode === 'isolated') {
return new Error(
`Could not connect to Chrome: ${message}\n\n` +
`The Chrome profile is locked by another running instance.\n` +
`To fix this:\n` +
` 1. Close all Chrome windows using this profile, OR\n` +
` 2. Set sessionMode to "isolated" in settings.json to use a temporary profile, OR\n` +
` 3. Set chromeProfilePath in settings.json to use a different profile directory`,
);
}
// existing mode — shouldn't normally hit this, but handle gracefully
return new Error(
`Could not connect to Chrome: ${message}\n\n` +
`The Chrome profile is locked.\n` +
`Close other Chrome instances and try again.`,
);
}
// Timeout errors
if (lowerMessage.includes('timed out')) {
if (sessionMode === 'existing') {
return new Error(
`Timed out connecting to Chrome: ${message}\n\n` +
`To use sessionMode "existing", you must:\n` +
` 1. Open Chrome (version 144+)\n` +
` 2. Navigate to chrome://inspect/#remote-debugging\n` +
` 3. Enable remote debugging\n\n` +
`Alternatively, set sessionMode to "persistent" (default) in settings.json to launch a dedicated browser.`,
);
}
return new Error(
`Timed out connecting to Chrome: ${message}\n\n` +
`Possible causes:\n` +
` 1. Chrome is not installed or not in PATH\n` +
` 2. npx cannot download chrome-devtools-mcp (check network/proxy)\n` +
` 3. Chrome failed to start (try setting headless: true in settings.json)`,
);
}
// Generic "existing" mode failures (connection refused, etc.)
if (sessionMode === 'existing') {
return new Error(
`Failed to connect to existing Chrome instance: ${message}\n\n` +
`To use sessionMode "existing", you must:\n` +
` 1. Open Chrome (version 144+)\n` +
` 2. Navigate to chrome://inspect/#remote-debugging\n` +
` 3. Enable remote debugging\n\n` +
`Alternatively, set sessionMode to "persistent" (default) in settings.json to launch a dedicated browser.`,
);
}
// Generic fallback — include sessionMode for debugging context
return new Error(
`Failed to connect to Chrome (sessionMode: ${sessionMode}): ${message}`,
);
}
/**
* Discovers tools from the connected MCP server.
*/
@@ -156,13 +156,17 @@ class TypeTextInvocation extends BaseToolInvocation<
constructor(
private readonly browserManager: BrowserManager,
private readonly text: string,
private readonly submitKey: string | undefined,
messageBus: MessageBus,
) {
super({ text }, messageBus, 'type_text', 'type_text');
super({ text, submitKey }, messageBus, 'type_text', 'type_text');
}
getDescription(): string {
return `type_text: "${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`;
const preview = `"${this.text.substring(0, 50)}${this.text.length > 50 ? '...' : ''}"`;
return this.submitKey
? `type_text: ${preview} + ${this.submitKey}`
: `type_text: ${preview}`;
}
protected override async getConfirmationDetails(
@@ -194,48 +198,32 @@ class TypeTextInvocation extends BaseToolInvocation<
override async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const chars = [...this.text]; // Handle Unicode correctly
let successCount = 0;
let lastError: string | undefined;
if (signal.aborted) {
return {
llmContent: 'Error: Operation cancelled before typing started.',
returnDisplay: 'Operation cancelled before typing started.',
error: { message: 'Operation cancelled' },
};
}
for (const char of chars) {
if (signal.aborted) {
return {
llmContent: `Error: Operation cancelled after typing ${successCount}/${chars.length} characters.`,
returnDisplay: `Operation cancelled after typing ${successCount}/${chars.length} characters.`,
error: { message: 'Operation cancelled' },
};
}
await this.typeCharByChar(signal);
// Map special characters to key names
const key = char === ' ' ? 'Space' : char;
const result: McpToolCallResult = await this.browserManager.callTool(
'press_key',
{ key },
);
if (result.isError) {
const errorText = result.content
?.filter(
(c: { type: string; text?: string }) =>
c.type === 'text' && c.text,
)
.map((c: { type: string; text?: string }) => c.text)
.join('\n');
lastError = errorText || 'Unknown error';
// Continue typing remaining characters on soft errors
// Optionally press a submit key (Enter, Tab, etc.) after typing
if (this.submitKey && !signal.aborted) {
const keyResult = await this.browserManager.callTool('press_key', {
key: this.submitKey,
});
if (keyResult.isError) {
const errText = this.extractErrorText(keyResult);
debugLogger.warn(
`type_text: press_key("${key}") failed: ${lastError}`,
`type_text: submitKey("${this.submitKey}") failed: ${errText}`,
);
} else {
successCount++;
}
}
const summary =
successCount === chars.length
? `Successfully typed ${chars.length} characters: "${this.text}"`
: `Typed ${successCount}/${chars.length} characters. Last error: ${lastError}`;
const summary = this.submitKey
? `Successfully typed "${this.text}" and pressed ${this.submitKey}`
: `Successfully typed "${this.text}"`;
return {
llmContent: summary,
@@ -257,6 +245,36 @@ class TypeTextInvocation extends BaseToolInvocation<
};
}
}
/** Types each character via individual press_key MCP calls. */
private async typeCharByChar(signal: AbortSignal): Promise<void> {
const chars = [...this.text]; // Handle Unicode correctly
for (const char of chars) {
if (signal.aborted) return;
// Map special characters to key names
const key = char === ' ' ? 'Space' : char;
const result = await this.browserManager.callTool('press_key', { key });
if (result.isError) {
debugLogger.warn(
`type_text: press_key("${key}") failed: ${this.extractErrorText(result)}`,
);
}
}
}
/** Extract error text from an MCP tool result. */
private extractErrorText(result: McpToolCallResult): string {
return (
result.content
?.filter(
(c: { type: string; text?: string }) => c.type === 'text' && c.text,
)
.map((c: { type: string; text?: string }) => c.text)
.join('\n') || 'Unknown error'
);
}
}
/**
@@ -311,19 +329,24 @@ class TypeTextDeclarativeTool extends DeclarativeTool<
super(
'type_text',
'type_text',
'Types a full text string into the currently focused element by pressing each key in sequence. ' +
'Types a full text string into the currently focused element. ' +
'Much faster than calling press_key for each character individually. ' +
'Use this to enter text into form fields, spreadsheet cells, or any focused input. ' +
'Use this to enter text into form fields, search boxes, spreadsheet cells, or any focused input. ' +
'The element must already be focused (e.g., after a click). ' +
'Does NOT press Enter at the end — call press_key("Enter") separately if needed.',
'Use submitKey to press a key after typing (e.g., submitKey="Enter" to submit a form or confirm a value, submitKey="Tab" to move to the next field).',
Kind.Other,
{
type: 'object',
properties: {
text: {
type: 'string',
description: 'The text to type into the focused element.',
},
submitKey: {
type: 'string',
description:
'The text to type. Each character will be pressed in sequence.',
'Optional key to press after typing (e.g., "Enter", "Tab", "Escape"). ' +
'Useful for submitting form fields or moving to the next cell in a spreadsheet.',
},
},
required: ['text'],
@@ -337,9 +360,14 @@ class TypeTextDeclarativeTool extends DeclarativeTool<
build(
params: Record<string, unknown>,
): ToolInvocation<Record<string, unknown>, ToolResult> {
const submitKey =
typeof params['submitKey'] === 'string' && params['submitKey']
? params['submitKey']
: undefined;
return new TypeTextInvocation(
this.browserManager,
String(params['text'] ?? ''),
submitKey,
this.messageBus,
);
}
@@ -448,7 +476,7 @@ function augmentToolDescription(toolName: string, description: string): string {
new_page:
' Opens a new page/tab with the specified URL. Call take_snapshot after to see the new page.',
press_key:
' Press a SINGLE keyboard key (e.g., "Enter", "Tab", "Escape", "ArrowDown", "a", "8"). ONLY accepts one key name — do NOT pass multi-character strings like "Hello" or "A1\\nEnter". To type text, call press_key once per character.',
' Press a SINGLE keyboard key (e.g., "Enter", "Tab", "Escape", "ArrowDown", "a", "8"). ONLY accepts one key name — do NOT pass multi-character strings like "Hello" or "A1\\nEnter". To type text, use type_text instead of calling press_key for each character.',
};
// Check for partial matches — order matters! More-specific keys first.