From fb1b2891cc8431dae38a6575dcc44c61f441eb68 Mon Sep 17 00:00:00 2001 From: Gaurav Ghosh Date: Tue, 17 Feb 2026 06:42:30 -0800 Subject: [PATCH] feat(browser): gate vision on visualModel setting Vision (screenshot analysis + coordinate-based interactions) is now disabled by default. Set visualModel in browser_agent customConfig to enable it, e.g. visualModel: 'gemini-2.5-computer-use-preview-10-2025'. --- .../src/agents/browser/browserAgentFactory.ts | 65 ++++++++----------- .../src/agents/browser/browserManager.test.ts | 13 ++-- 2 files changed, 33 insertions(+), 45 deletions(-) diff --git a/packages/core/src/agents/browser/browserAgentFactory.ts b/packages/core/src/agents/browser/browserAgentFactory.ts index 93d1caf90e..cacc15760b 100644 --- a/packages/core/src/agents/browser/browserAgentFactory.ts +++ b/packages/core/src/agents/browser/browserAgentFactory.ts @@ -89,52 +89,39 @@ export async function createBrowserAgentDefinition( (t) => !availableToolNames.includes(t), ); - // Check if visual agent model is available for current auth type. - // The visual agent model (computer-use) is only available via Gemini API key - // or Vertex AI, not via GCA/OAuth or Cloud Shell. - const isVisualModelAvailable = (() => { + // Check whether vision can be enabled; returns undefined if all gates pass. + function getVisionDisabledReason(): string | undefined { + const browserConfig = config.getBrowserAgentConfig(); + if (!browserConfig.customConfig.visualModel) { + return 'No visualModel configured.'; + } + if (missingVisualTools.length > 0) { + return ( + `Visual tools missing (${missingVisualTools.join(', ')}). ` + + `Ensure chrome-devtools-mcp is started with --experimental-vision.` + ); + } const authType = config.getContentGeneratorConfig()?.authType; - if ( - authType === AuthType.LOGIN_WITH_GOOGLE || - authType === AuthType.LEGACY_CLOUD_SHELL || - authType === AuthType.COMPUTE_ADC - ) { - return false; + const blockedAuthTypes = new Set([ + AuthType.LOGIN_WITH_GOOGLE, + AuthType.LEGACY_CLOUD_SHELL, + AuthType.COMPUTE_ADC, + ]); + if (authType && blockedAuthTypes.has(authType)) { + return 'Visual agent model not available for current auth type.'; } - return true; - })(); + return undefined; + } - // Create all tools - visual delegation only if visual tools are available const allTools: AnyDeclarativeTool[] = [...mcpTools]; + const visionDisabledReason = getVisionDisabledReason(); - if (missingVisualTools.length > 0) { - debugLogger.log( - `Visual tools missing (${missingVisualTools.join(', ')}). ` + - `Visual agent delegation disabled. Ensure chrome-devtools-mcp is started with --experimental-vision.`, - ); - if (printOutput) { - printOutput( - `⚠️ Visual tools unavailable - coordinate-based actions disabled.`, - ); - } - } else if (!isVisualModelAvailable) { - debugLogger.log( - `Visual agent model not available for current auth type. ` + - `Visual agent delegation disabled.`, - ); - if (printOutput) { - printOutput( - `⚠️ Visual agent unavailable for current auth type - coordinate-based actions disabled.`, - ); - } + if (visionDisabledReason) { + debugLogger.log(`Vision disabled: ${visionDisabledReason}`); } else { - // Create visual analysis tool only if visual tools are available - const visualDelegationTool = createAnalyzeScreenshotTool( - browserManager, - config, - messageBus, + allTools.push( + createAnalyzeScreenshotTool(browserManager, config, messageBus), ); - allTools.push(visualDelegationTool); } debugLogger.log( diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts index 389e7da70d..a8445c9bc9 100644 --- a/packages/core/src/agents/browser/browserManager.test.ts +++ b/packages/core/src/agents/browser/browserManager.test.ts @@ -152,7 +152,8 @@ describe('BrowserManager', () => { ]), }); // Persistent mode should NOT include --isolated or --autoConnect - const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[]; + const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0] + ?.args as string[]; expect(args).not.toContain('--isolated'); expect(args).not.toContain('--autoConnect'); }); @@ -220,7 +221,8 @@ describe('BrowserManager', () => { const manager = new BrowserManager(isolatedConfig); await manager.ensureConnection(); - const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[]; + const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0] + ?.args as string[]; expect(args).toContain('--isolated'); expect(args).not.toContain('--autoConnect'); }); @@ -242,7 +244,8 @@ describe('BrowserManager', () => { const manager = new BrowserManager(existingConfig); await manager.ensureConnection(); - const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[]; + const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0] + ?.args as string[]; expect(args).toContain('--autoConnect'); expect(args).not.toContain('--isolated'); }); @@ -252,9 +255,7 @@ describe('BrowserManager', () => { vi.mocked(Client).mockImplementation( () => ({ - connect: vi - .fn() - .mockRejectedValue(new Error('Connection refused')), + connect: vi.fn().mockRejectedValue(new Error('Connection refused')), close: vi.fn().mockResolvedValue(undefined), listTools: vi.fn(), callTool: vi.fn(),