mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
feat: add pulsating blue border automation overlay to browser agent (#21173)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Gaurav <39389231+gsquared94@users.noreply.github.com>
This commit is contained in:
133
packages/core/src/agents/browser/automationOverlay.ts
Normal file
133
packages/core/src/agents/browser/automationOverlay.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview Automation overlay utilities for visual indication during browser automation.
|
||||
*
|
||||
* Provides functions to inject and remove a pulsating blue border overlay
|
||||
* that indicates when the browser is under AI agent control.
|
||||
*
|
||||
* Uses the Web Animations API instead of injected <style> tags so the
|
||||
* animation works on sites with strict Content Security Policies (e.g. google.com).
|
||||
*
|
||||
* The script strings are passed to chrome-devtools-mcp's evaluate_script tool
|
||||
* which expects a plain function expression (NOT an IIFE).
|
||||
*/
|
||||
|
||||
import type { BrowserManager } from './browserManager.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
|
||||
const OVERLAY_ELEMENT_ID = '__gemini_automation_overlay';
|
||||
|
||||
/**
|
||||
* Builds the JavaScript function string that injects the automation overlay.
|
||||
*
|
||||
* Returns a plain arrow-function expression (no trailing invocation) because
|
||||
* chrome-devtools-mcp's evaluate_script tool invokes it internally.
|
||||
*
|
||||
* Avoids nested template literals by using string concatenation for cssText.
|
||||
*/
|
||||
function buildInjectionScript(): string {
|
||||
return `() => {
|
||||
const id = '${OVERLAY_ELEMENT_ID}';
|
||||
const existing = document.getElementById(id);
|
||||
if (existing) existing.remove();
|
||||
|
||||
const overlay = document.createElement('div');
|
||||
overlay.id = id;
|
||||
overlay.setAttribute('aria-hidden', 'true');
|
||||
overlay.setAttribute('role', 'presentation');
|
||||
|
||||
Object.assign(overlay.style, {
|
||||
position: 'fixed',
|
||||
top: '0',
|
||||
left: '0',
|
||||
right: '0',
|
||||
bottom: '0',
|
||||
zIndex: '2147483647',
|
||||
pointerEvents: 'none',
|
||||
border: '6px solid rgba(66, 133, 244, 1.0)',
|
||||
});
|
||||
|
||||
document.documentElement.appendChild(overlay);
|
||||
|
||||
try {
|
||||
overlay.animate([
|
||||
{ borderColor: 'rgba(66,133,244,0.3)', boxShadow: 'inset 0 0 8px rgba(66,133,244,0.15)' },
|
||||
{ borderColor: 'rgba(66,133,244,1.0)', boxShadow: 'inset 0 0 16px rgba(66,133,244,0.5)' },
|
||||
{ borderColor: 'rgba(66,133,244,0.3)', boxShadow: 'inset 0 0 8px rgba(66,133,244,0.15)' }
|
||||
], { duration: 2000, iterations: Infinity, easing: 'ease-in-out' });
|
||||
} catch (e) {
|
||||
// Silently ignore animation errors, as they can happen on sites with strict CSP.
|
||||
// The border itself is the most important visual indicator.
|
||||
}
|
||||
|
||||
return 'overlay-injected';
|
||||
}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the JavaScript function string that removes the automation overlay.
|
||||
*/
|
||||
function buildRemovalScript(): string {
|
||||
return `() => {
|
||||
const el = document.getElementById('${OVERLAY_ELEMENT_ID}');
|
||||
if (el) el.remove();
|
||||
return 'overlay-removed';
|
||||
}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Injects the automation overlay into the current page.
|
||||
*/
|
||||
export async function injectAutomationOverlay(
|
||||
browserManager: BrowserManager,
|
||||
signal?: AbortSignal,
|
||||
): Promise<void> {
|
||||
try {
|
||||
debugLogger.log('Injecting automation overlay...');
|
||||
|
||||
const result = await browserManager.callTool(
|
||||
'evaluate_script',
|
||||
{ function: buildInjectionScript() },
|
||||
signal,
|
||||
);
|
||||
|
||||
if (result.isError) {
|
||||
debugLogger.warn('Failed to inject automation overlay:', result);
|
||||
} else {
|
||||
debugLogger.log('Automation overlay injected successfully');
|
||||
}
|
||||
} catch (error) {
|
||||
debugLogger.warn('Error injecting automation overlay:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the automation overlay from the current page.
|
||||
*/
|
||||
export async function removeAutomationOverlay(
|
||||
browserManager: BrowserManager,
|
||||
signal?: AbortSignal,
|
||||
): Promise<void> {
|
||||
try {
|
||||
debugLogger.log('Removing automation overlay...');
|
||||
|
||||
const result = await browserManager.callTool(
|
||||
'evaluate_script',
|
||||
{ function: buildRemovalScript() },
|
||||
signal,
|
||||
);
|
||||
|
||||
if (result.isError) {
|
||||
debugLogger.warn('Failed to remove automation overlay:', result);
|
||||
} else {
|
||||
debugLogger.log('Automation overlay removed successfully');
|
||||
}
|
||||
} catch (error) {
|
||||
debugLogger.warn('Error removing automation overlay:', error);
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
createBrowserAgentDefinition,
|
||||
cleanupBrowserAgent,
|
||||
} from './browserAgentFactory.js';
|
||||
import { injectAutomationOverlay } from './automationOverlay.js';
|
||||
import { makeFakeConfig } from '../../test-utils/config.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
||||
@@ -35,6 +36,10 @@ vi.mock('./browserManager.js', () => ({
|
||||
BrowserManager: vi.fn(() => mockBrowserManager),
|
||||
}));
|
||||
|
||||
vi.mock('./automationOverlay.js', () => ({
|
||||
injectAutomationOverlay: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../utils/debugLogger.js', () => ({
|
||||
debugLogger: {
|
||||
log: vi.fn(),
|
||||
@@ -55,6 +60,8 @@ describe('browserAgentFactory', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
|
||||
vi.mocked(injectAutomationOverlay).mockClear();
|
||||
|
||||
// Reset mock implementations
|
||||
mockBrowserManager.ensureConnection.mockResolvedValue(undefined);
|
||||
mockBrowserManager.getDiscoveredTools.mockResolvedValue([
|
||||
@@ -99,6 +106,28 @@ describe('browserAgentFactory', () => {
|
||||
expect(mockBrowserManager.ensureConnection).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should inject automation overlay when not in headless mode', async () => {
|
||||
await createBrowserAgentDefinition(mockConfig, mockMessageBus);
|
||||
expect(injectAutomationOverlay).toHaveBeenCalledWith(mockBrowserManager);
|
||||
});
|
||||
|
||||
it('should not inject automation overlay when in headless mode', async () => {
|
||||
const headlessConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: {
|
||||
browser_agent: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
browser: {
|
||||
headless: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
await createBrowserAgentDefinition(headlessConfig, mockMessageBus);
|
||||
expect(injectAutomationOverlay).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return agent definition with discovered tools', async () => {
|
||||
const { definition } = await createBrowserAgentDefinition(
|
||||
mockConfig,
|
||||
|
||||
@@ -27,6 +27,7 @@ import {
|
||||
} from './browserAgentDefinition.js';
|
||||
import { createMcpDeclarativeTools } from './mcpToolWrapper.js';
|
||||
import { createAnalyzeScreenshotTool } from './analyzeScreenshot.js';
|
||||
import { injectAutomationOverlay } from './automationOverlay.js';
|
||||
import { debugLogger } from '../../utils/debugLogger.js';
|
||||
|
||||
/**
|
||||
@@ -61,6 +62,15 @@ export async function createBrowserAgentDefinition(
|
||||
printOutput('Browser connected with isolated MCP client.');
|
||||
}
|
||||
|
||||
// Inject automation overlay if not in headless mode
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig?.customConfig?.headless) {
|
||||
if (printOutput) {
|
||||
printOutput('Injecting automation overlay...');
|
||||
}
|
||||
await injectAutomationOverlay(browserManager);
|
||||
}
|
||||
|
||||
// Create declarative tools from dynamically discovered MCP tools
|
||||
// These tools dispatch to browserManager's isolated client
|
||||
const mcpTools = await createMcpDeclarativeTools(browserManager, messageBus);
|
||||
|
||||
@@ -8,6 +8,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { BrowserManager } from './browserManager.js';
|
||||
import { makeFakeConfig } from '../../test-utils/config.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { injectAutomationOverlay } from './automationOverlay.js';
|
||||
|
||||
// Mock the MCP SDK
|
||||
vi.mock('@modelcontextprotocol/sdk/client/index.js', () => ({
|
||||
@@ -42,6 +43,10 @@ vi.mock('../../utils/debugLogger.js', () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock('./automationOverlay.js', () => ({
|
||||
injectAutomationOverlay: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
|
||||
@@ -50,6 +55,7 @@ describe('BrowserManager', () => {
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
vi.mocked(injectAutomationOverlay).mockClear();
|
||||
|
||||
// Setup mock config
|
||||
mockConfig = makeFakeConfig({
|
||||
@@ -411,4 +417,81 @@ describe('BrowserManager', () => {
|
||||
expect(client.close).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('overlay re-injection in callTool', () => {
|
||||
it('should re-inject overlay after click in non-headless mode', async () => {
|
||||
const manager = new BrowserManager(mockConfig);
|
||||
await manager.callTool('click', { uid: '1_2' });
|
||||
|
||||
expect(injectAutomationOverlay).toHaveBeenCalledWith(manager, undefined);
|
||||
});
|
||||
|
||||
it('should re-inject overlay after navigate_page in non-headless mode', async () => {
|
||||
const manager = new BrowserManager(mockConfig);
|
||||
await manager.callTool('navigate_page', { url: 'https://example.com' });
|
||||
|
||||
expect(injectAutomationOverlay).toHaveBeenCalledWith(manager, undefined);
|
||||
});
|
||||
|
||||
it('should re-inject overlay after click_at, new_page, press_key, handle_dialog', async () => {
|
||||
const manager = new BrowserManager(mockConfig);
|
||||
for (const tool of [
|
||||
'click_at',
|
||||
'new_page',
|
||||
'press_key',
|
||||
'handle_dialog',
|
||||
]) {
|
||||
vi.mocked(injectAutomationOverlay).mockClear();
|
||||
await manager.callTool(tool, {});
|
||||
expect(injectAutomationOverlay).toHaveBeenCalledTimes(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('should NOT re-inject overlay after read-only tools', async () => {
|
||||
const manager = new BrowserManager(mockConfig);
|
||||
for (const tool of [
|
||||
'take_snapshot',
|
||||
'take_screenshot',
|
||||
'get_console_message',
|
||||
'fill',
|
||||
]) {
|
||||
vi.mocked(injectAutomationOverlay).mockClear();
|
||||
await manager.callTool(tool, {});
|
||||
expect(injectAutomationOverlay).not.toHaveBeenCalled();
|
||||
}
|
||||
});
|
||||
|
||||
it('should NOT re-inject overlay when headless is true', async () => {
|
||||
const headlessConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { headless: true },
|
||||
},
|
||||
});
|
||||
const manager = new BrowserManager(headlessConfig);
|
||||
await manager.callTool('click', { uid: '1_2' });
|
||||
|
||||
expect(injectAutomationOverlay).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should NOT re-inject overlay when tool returns an error result', async () => {
|
||||
vi.mocked(Client).mockImplementation(
|
||||
() =>
|
||||
({
|
||||
connect: vi.fn().mockResolvedValue(undefined),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
listTools: vi.fn().mockResolvedValue({ tools: [] }),
|
||||
callTool: vi.fn().mockResolvedValue({
|
||||
content: [{ type: 'text', text: 'Element not found' }],
|
||||
isError: true,
|
||||
}),
|
||||
}) as unknown as InstanceType<typeof Client>,
|
||||
);
|
||||
|
||||
const manager = new BrowserManager(mockConfig);
|
||||
await manager.callTool('click', { uid: 'bad' });
|
||||
|
||||
expect(injectAutomationOverlay).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -24,6 +24,7 @@ import { debugLogger } from '../../utils/debugLogger.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { Storage } from '../../config/storage.js';
|
||||
import * as path from 'node:path';
|
||||
import { injectAutomationOverlay } from './automationOverlay.js';
|
||||
|
||||
// Pin chrome-devtools-mcp version for reproducibility.
|
||||
const CHROME_DEVTOOLS_MCP_VERSION = '0.17.1';
|
||||
@@ -34,6 +35,27 @@ const BROWSER_PROFILE_DIR = 'cli-browser-profile';
|
||||
// Default timeout for MCP operations
|
||||
const MCP_TIMEOUT_MS = 60_000;
|
||||
|
||||
/**
|
||||
* Tools that can cause a full-page navigation (explicitly or implicitly).
|
||||
*
|
||||
* When any of these completes successfully, the current page DOM is replaced
|
||||
* and the injected automation overlay is lost. BrowserManager re-injects the
|
||||
* overlay after every successful call to one of these tools.
|
||||
*
|
||||
* Note: chrome-devtools-mcp is a pure request/response server and emits no
|
||||
* MCP notifications, so listening for page-load events via the protocol is
|
||||
* not possible. Intercepting at callTool() is the equivalent mechanism.
|
||||
*/
|
||||
const POTENTIALLY_NAVIGATING_TOOLS = new Set([
|
||||
'click', // clicking a link navigates
|
||||
'click_at', // coordinate click can also follow a link
|
||||
'navigate_page',
|
||||
'new_page',
|
||||
'select_page', // switching pages can lose the overlay
|
||||
'press_key', // Enter on a focused link/form triggers navigation
|
||||
'handle_dialog', // confirming beforeunload can trigger navigation
|
||||
]);
|
||||
|
||||
/**
|
||||
* Content item from an MCP tool call response.
|
||||
* Can be text or image (for take_screenshot).
|
||||
@@ -70,7 +92,16 @@ export class BrowserManager {
|
||||
private mcpTransport: StdioClientTransport | undefined;
|
||||
private discoveredTools: McpTool[] = [];
|
||||
|
||||
constructor(private config: Config) {}
|
||||
/**
|
||||
* Whether to inject the automation overlay.
|
||||
* Always false in headless mode (no visible window to decorate).
|
||||
*/
|
||||
private readonly shouldInjectOverlay: boolean;
|
||||
|
||||
constructor(private config: Config) {
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
this.shouldInjectOverlay = !browserConfig?.customConfig?.headless;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the raw MCP SDK Client for direct tool calls.
|
||||
@@ -120,28 +151,49 @@ export class BrowserManager {
|
||||
{ timeout: MCP_TIMEOUT_MS },
|
||||
);
|
||||
|
||||
let result: McpToolCallResult;
|
||||
|
||||
// If no signal, just await directly
|
||||
if (!signal) {
|
||||
return this.toResult(await callPromise);
|
||||
}
|
||||
|
||||
// Race the call against the abort signal
|
||||
let onAbort: (() => void) | undefined;
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
callPromise,
|
||||
new Promise<never>((_resolve, reject) => {
|
||||
onAbort = () =>
|
||||
reject(signal.reason ?? new Error('Operation cancelled'));
|
||||
signal.addEventListener('abort', onAbort, { once: true });
|
||||
}),
|
||||
]);
|
||||
return this.toResult(result);
|
||||
} finally {
|
||||
if (onAbort) {
|
||||
signal.removeEventListener('abort', onAbort);
|
||||
result = this.toResult(await callPromise);
|
||||
} else {
|
||||
// Race the call against the abort signal
|
||||
let onAbort: (() => void) | undefined;
|
||||
try {
|
||||
const raw = await Promise.race([
|
||||
callPromise,
|
||||
new Promise<never>((_resolve, reject) => {
|
||||
onAbort = () =>
|
||||
reject(signal.reason ?? new Error('Operation cancelled'));
|
||||
signal.addEventListener('abort', onAbort, { once: true });
|
||||
}),
|
||||
]);
|
||||
result = this.toResult(raw);
|
||||
} finally {
|
||||
if (onAbort) {
|
||||
signal.removeEventListener('abort', onAbort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-inject the automation overlay after any tool that can cause a
|
||||
// full-page navigation (including implicit navigations from clicking links).
|
||||
// chrome-devtools-mcp emits no MCP notifications, so callTool() is the
|
||||
// only interception point we have — equivalent to a page-load listener.
|
||||
if (
|
||||
this.shouldInjectOverlay &&
|
||||
!result.isError &&
|
||||
POTENTIALLY_NAVIGATING_TOOLS.has(toolName) &&
|
||||
!signal?.aborted
|
||||
) {
|
||||
try {
|
||||
await injectAutomationOverlay(this, signal);
|
||||
} catch {
|
||||
// Never let overlay failures interrupt the tool result
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -39,8 +39,8 @@ class McpToolInvocation extends BaseToolInvocation<
|
||||
ToolResult
|
||||
> {
|
||||
constructor(
|
||||
private readonly browserManager: BrowserManager,
|
||||
private readonly toolName: string,
|
||||
protected readonly browserManager: BrowserManager,
|
||||
protected readonly toolName: string,
|
||||
params: Record<string, unknown>,
|
||||
messageBus: MessageBus,
|
||||
) {
|
||||
@@ -280,7 +280,7 @@ class McpDeclarativeTool extends DeclarativeTool<
|
||||
ToolResult
|
||||
> {
|
||||
constructor(
|
||||
private readonly browserManager: BrowserManager,
|
||||
protected readonly browserManager: BrowserManager,
|
||||
name: string,
|
||||
description: string,
|
||||
parameterSchema: unknown,
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
type ToolCallConfirmationDetails,
|
||||
type PolicyUpdateOptions,
|
||||
} from '../../tools/tools.js';
|
||||
import { makeFakeConfig } from '../../test-utils/config.js';
|
||||
|
||||
interface TestableConfirmation {
|
||||
getConfirmationDetails(
|
||||
@@ -29,6 +30,7 @@ describe('mcpToolWrapper Confirmation', () => {
|
||||
let mockMessageBus: MessageBus;
|
||||
|
||||
beforeEach(() => {
|
||||
makeFakeConfig(); // ensure config module is loaded
|
||||
mockBrowserManager = {
|
||||
getDiscoveredTools: vi
|
||||
.fn()
|
||||
|
||||
Reference in New Issue
Block a user