feat: add pulsating blue border automation overlay to browser agent (#21173)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Gaurav <39389231+gsquared94@users.noreply.github.com>
This commit is contained in:
Aditya Bijalwan
2026-03-11 01:45:03 +05:30
committed by GitHub
parent 9eae91a489
commit 5caa192cfc
7 changed files with 331 additions and 22 deletions

View File

@@ -0,0 +1,133 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @fileoverview Automation overlay utilities for visual indication during browser automation.
*
* Provides functions to inject and remove a pulsating blue border overlay
* that indicates when the browser is under AI agent control.
*
* Uses the Web Animations API instead of injected <style> tags so the
* animation works on sites with strict Content Security Policies (e.g. google.com).
*
* The script strings are passed to chrome-devtools-mcp's evaluate_script tool
* which expects a plain function expression (NOT an IIFE).
*/
import type { BrowserManager } from './browserManager.js';
import { debugLogger } from '../../utils/debugLogger.js';
const OVERLAY_ELEMENT_ID = '__gemini_automation_overlay';
/**
* Builds the JavaScript function string that injects the automation overlay.
*
* Returns a plain arrow-function expression (no trailing invocation) because
* chrome-devtools-mcp's evaluate_script tool invokes it internally.
*
* Avoids nested template literals by using string concatenation for cssText.
*/
function buildInjectionScript(): string {
return `() => {
const id = '${OVERLAY_ELEMENT_ID}';
const existing = document.getElementById(id);
if (existing) existing.remove();
const overlay = document.createElement('div');
overlay.id = id;
overlay.setAttribute('aria-hidden', 'true');
overlay.setAttribute('role', 'presentation');
Object.assign(overlay.style, {
position: 'fixed',
top: '0',
left: '0',
right: '0',
bottom: '0',
zIndex: '2147483647',
pointerEvents: 'none',
border: '6px solid rgba(66, 133, 244, 1.0)',
});
document.documentElement.appendChild(overlay);
try {
overlay.animate([
{ borderColor: 'rgba(66,133,244,0.3)', boxShadow: 'inset 0 0 8px rgba(66,133,244,0.15)' },
{ borderColor: 'rgba(66,133,244,1.0)', boxShadow: 'inset 0 0 16px rgba(66,133,244,0.5)' },
{ borderColor: 'rgba(66,133,244,0.3)', boxShadow: 'inset 0 0 8px rgba(66,133,244,0.15)' }
], { duration: 2000, iterations: Infinity, easing: 'ease-in-out' });
} catch (e) {
// Silently ignore animation errors, as they can happen on sites with strict CSP.
// The border itself is the most important visual indicator.
}
return 'overlay-injected';
}`;
}
/**
* Builds the JavaScript function string that removes the automation overlay.
*/
function buildRemovalScript(): string {
return `() => {
const el = document.getElementById('${OVERLAY_ELEMENT_ID}');
if (el) el.remove();
return 'overlay-removed';
}`;
}
/**
* Injects the automation overlay into the current page.
*/
export async function injectAutomationOverlay(
browserManager: BrowserManager,
signal?: AbortSignal,
): Promise<void> {
try {
debugLogger.log('Injecting automation overlay...');
const result = await browserManager.callTool(
'evaluate_script',
{ function: buildInjectionScript() },
signal,
);
if (result.isError) {
debugLogger.warn('Failed to inject automation overlay:', result);
} else {
debugLogger.log('Automation overlay injected successfully');
}
} catch (error) {
debugLogger.warn('Error injecting automation overlay:', error);
}
}
/**
* Removes the automation overlay from the current page.
*/
export async function removeAutomationOverlay(
browserManager: BrowserManager,
signal?: AbortSignal,
): Promise<void> {
try {
debugLogger.log('Removing automation overlay...');
const result = await browserManager.callTool(
'evaluate_script',
{ function: buildRemovalScript() },
signal,
);
if (result.isError) {
debugLogger.warn('Failed to remove automation overlay:', result);
} else {
debugLogger.log('Automation overlay removed successfully');
}
} catch (error) {
debugLogger.warn('Error removing automation overlay:', error);
}
}

View File

@@ -9,6 +9,7 @@ import {
createBrowserAgentDefinition,
cleanupBrowserAgent,
} from './browserAgentFactory.js';
import { injectAutomationOverlay } from './automationOverlay.js';
import { makeFakeConfig } from '../../test-utils/config.js';
import type { Config } from '../../config/config.js';
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
@@ -35,6 +36,10 @@ vi.mock('./browserManager.js', () => ({
BrowserManager: vi.fn(() => mockBrowserManager),
}));
vi.mock('./automationOverlay.js', () => ({
injectAutomationOverlay: vi.fn().mockResolvedValue(undefined),
}));
vi.mock('../../utils/debugLogger.js', () => ({
debugLogger: {
log: vi.fn(),
@@ -55,6 +60,8 @@ describe('browserAgentFactory', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(injectAutomationOverlay).mockClear();
// Reset mock implementations
mockBrowserManager.ensureConnection.mockResolvedValue(undefined);
mockBrowserManager.getDiscoveredTools.mockResolvedValue([
@@ -99,6 +106,28 @@ describe('browserAgentFactory', () => {
expect(mockBrowserManager.ensureConnection).toHaveBeenCalled();
});
it('should inject automation overlay when not in headless mode', async () => {
await createBrowserAgentDefinition(mockConfig, mockMessageBus);
expect(injectAutomationOverlay).toHaveBeenCalledWith(mockBrowserManager);
});
it('should not inject automation overlay when in headless mode', async () => {
const headlessConfig = makeFakeConfig({
agents: {
overrides: {
browser_agent: {
enabled: true,
},
},
browser: {
headless: true,
},
},
});
await createBrowserAgentDefinition(headlessConfig, mockMessageBus);
expect(injectAutomationOverlay).not.toHaveBeenCalled();
});
it('should return agent definition with discovered tools', async () => {
const { definition } = await createBrowserAgentDefinition(
mockConfig,

View File

@@ -27,6 +27,7 @@ import {
} from './browserAgentDefinition.js';
import { createMcpDeclarativeTools } from './mcpToolWrapper.js';
import { createAnalyzeScreenshotTool } from './analyzeScreenshot.js';
import { injectAutomationOverlay } from './automationOverlay.js';
import { debugLogger } from '../../utils/debugLogger.js';
/**
@@ -61,6 +62,15 @@ export async function createBrowserAgentDefinition(
printOutput('Browser connected with isolated MCP client.');
}
// Inject automation overlay if not in headless mode
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig?.customConfig?.headless) {
if (printOutput) {
printOutput('Injecting automation overlay...');
}
await injectAutomationOverlay(browserManager);
}
// Create declarative tools from dynamically discovered MCP tools
// These tools dispatch to browserManager's isolated client
const mcpTools = await createMcpDeclarativeTools(browserManager, messageBus);

View File

@@ -8,6 +8,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { BrowserManager } from './browserManager.js';
import { makeFakeConfig } from '../../test-utils/config.js';
import type { Config } from '../../config/config.js';
import { injectAutomationOverlay } from './automationOverlay.js';
// Mock the MCP SDK
vi.mock('@modelcontextprotocol/sdk/client/index.js', () => ({
@@ -42,6 +43,10 @@ vi.mock('../../utils/debugLogger.js', () => ({
},
}));
vi.mock('./automationOverlay.js', () => ({
injectAutomationOverlay: vi.fn().mockResolvedValue(undefined),
}));
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
@@ -50,6 +55,7 @@ describe('BrowserManager', () => {
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(injectAutomationOverlay).mockClear();
// Setup mock config
mockConfig = makeFakeConfig({
@@ -411,4 +417,81 @@ describe('BrowserManager', () => {
expect(client.close).toHaveBeenCalled();
});
});
describe('overlay re-injection in callTool', () => {
it('should re-inject overlay after click in non-headless mode', async () => {
const manager = new BrowserManager(mockConfig);
await manager.callTool('click', { uid: '1_2' });
expect(injectAutomationOverlay).toHaveBeenCalledWith(manager, undefined);
});
it('should re-inject overlay after navigate_page in non-headless mode', async () => {
const manager = new BrowserManager(mockConfig);
await manager.callTool('navigate_page', { url: 'https://example.com' });
expect(injectAutomationOverlay).toHaveBeenCalledWith(manager, undefined);
});
it('should re-inject overlay after click_at, new_page, press_key, handle_dialog', async () => {
const manager = new BrowserManager(mockConfig);
for (const tool of [
'click_at',
'new_page',
'press_key',
'handle_dialog',
]) {
vi.mocked(injectAutomationOverlay).mockClear();
await manager.callTool(tool, {});
expect(injectAutomationOverlay).toHaveBeenCalledTimes(1);
}
});
it('should NOT re-inject overlay after read-only tools', async () => {
const manager = new BrowserManager(mockConfig);
for (const tool of [
'take_snapshot',
'take_screenshot',
'get_console_message',
'fill',
]) {
vi.mocked(injectAutomationOverlay).mockClear();
await manager.callTool(tool, {});
expect(injectAutomationOverlay).not.toHaveBeenCalled();
}
});
it('should NOT re-inject overlay when headless is true', async () => {
const headlessConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { headless: true },
},
});
const manager = new BrowserManager(headlessConfig);
await manager.callTool('click', { uid: '1_2' });
expect(injectAutomationOverlay).not.toHaveBeenCalled();
});
it('should NOT re-inject overlay when tool returns an error result', async () => {
vi.mocked(Client).mockImplementation(
() =>
({
connect: vi.fn().mockResolvedValue(undefined),
close: vi.fn().mockResolvedValue(undefined),
listTools: vi.fn().mockResolvedValue({ tools: [] }),
callTool: vi.fn().mockResolvedValue({
content: [{ type: 'text', text: 'Element not found' }],
isError: true,
}),
}) as unknown as InstanceType<typeof Client>,
);
const manager = new BrowserManager(mockConfig);
await manager.callTool('click', { uid: 'bad' });
expect(injectAutomationOverlay).not.toHaveBeenCalled();
});
});
});

View File

@@ -24,6 +24,7 @@ import { debugLogger } from '../../utils/debugLogger.js';
import type { Config } from '../../config/config.js';
import { Storage } from '../../config/storage.js';
import * as path from 'node:path';
import { injectAutomationOverlay } from './automationOverlay.js';
// Pin chrome-devtools-mcp version for reproducibility.
const CHROME_DEVTOOLS_MCP_VERSION = '0.17.1';
@@ -34,6 +35,27 @@ const BROWSER_PROFILE_DIR = 'cli-browser-profile';
// Default timeout for MCP operations
const MCP_TIMEOUT_MS = 60_000;
/**
* Tools that can cause a full-page navigation (explicitly or implicitly).
*
* When any of these completes successfully, the current page DOM is replaced
* and the injected automation overlay is lost. BrowserManager re-injects the
* overlay after every successful call to one of these tools.
*
* Note: chrome-devtools-mcp is a pure request/response server and emits no
* MCP notifications, so listening for page-load events via the protocol is
* not possible. Intercepting at callTool() is the equivalent mechanism.
*/
const POTENTIALLY_NAVIGATING_TOOLS = new Set([
'click', // clicking a link navigates
'click_at', // coordinate click can also follow a link
'navigate_page',
'new_page',
'select_page', // switching pages can lose the overlay
'press_key', // Enter on a focused link/form triggers navigation
'handle_dialog', // confirming beforeunload can trigger navigation
]);
/**
* Content item from an MCP tool call response.
* Can be text or image (for take_screenshot).
@@ -70,7 +92,16 @@ export class BrowserManager {
private mcpTransport: StdioClientTransport | undefined;
private discoveredTools: McpTool[] = [];
constructor(private config: Config) {}
/**
* Whether to inject the automation overlay.
* Always false in headless mode (no visible window to decorate).
*/
private readonly shouldInjectOverlay: boolean;
constructor(private config: Config) {
const browserConfig = config.getBrowserAgentConfig();
this.shouldInjectOverlay = !browserConfig?.customConfig?.headless;
}
/**
* Gets the raw MCP SDK Client for direct tool calls.
@@ -120,28 +151,49 @@ export class BrowserManager {
{ timeout: MCP_TIMEOUT_MS },
);
let result: McpToolCallResult;
// If no signal, just await directly
if (!signal) {
return this.toResult(await callPromise);
}
// Race the call against the abort signal
let onAbort: (() => void) | undefined;
try {
const result = await Promise.race([
callPromise,
new Promise<never>((_resolve, reject) => {
onAbort = () =>
reject(signal.reason ?? new Error('Operation cancelled'));
signal.addEventListener('abort', onAbort, { once: true });
}),
]);
return this.toResult(result);
} finally {
if (onAbort) {
signal.removeEventListener('abort', onAbort);
result = this.toResult(await callPromise);
} else {
// Race the call against the abort signal
let onAbort: (() => void) | undefined;
try {
const raw = await Promise.race([
callPromise,
new Promise<never>((_resolve, reject) => {
onAbort = () =>
reject(signal.reason ?? new Error('Operation cancelled'));
signal.addEventListener('abort', onAbort, { once: true });
}),
]);
result = this.toResult(raw);
} finally {
if (onAbort) {
signal.removeEventListener('abort', onAbort);
}
}
}
// Re-inject the automation overlay after any tool that can cause a
// full-page navigation (including implicit navigations from clicking links).
// chrome-devtools-mcp emits no MCP notifications, so callTool() is the
// only interception point we have — equivalent to a page-load listener.
if (
this.shouldInjectOverlay &&
!result.isError &&
POTENTIALLY_NAVIGATING_TOOLS.has(toolName) &&
!signal?.aborted
) {
try {
await injectAutomationOverlay(this, signal);
} catch {
// Never let overlay failures interrupt the tool result
}
}
return result;
}
/**

View File

@@ -39,8 +39,8 @@ class McpToolInvocation extends BaseToolInvocation<
ToolResult
> {
constructor(
private readonly browserManager: BrowserManager,
private readonly toolName: string,
protected readonly browserManager: BrowserManager,
protected readonly toolName: string,
params: Record<string, unknown>,
messageBus: MessageBus,
) {
@@ -280,7 +280,7 @@ class McpDeclarativeTool extends DeclarativeTool<
ToolResult
> {
constructor(
private readonly browserManager: BrowserManager,
protected readonly browserManager: BrowserManager,
name: string,
description: string,
parameterSchema: unknown,

View File

@@ -14,6 +14,7 @@ import {
type ToolCallConfirmationDetails,
type PolicyUpdateOptions,
} from '../../tools/tools.js';
import { makeFakeConfig } from '../../test-utils/config.js';
interface TestableConfirmation {
getConfirmationDetails(
@@ -29,6 +30,7 @@ describe('mcpToolWrapper Confirmation', () => {
let mockMessageBus: MessageBus;
beforeEach(() => {
makeFakeConfig(); // ensure config module is loaded
mockBrowserManager = {
getDiscoveredTools: vi
.fn()