feat(browser): gate vision on visualModel setting

Vision (screenshot analysis + coordinate-based interactions) is now
disabled by default. Set visualModel in browser_agent customConfig
to enable it, e.g. visualModel: 'gemini-2.5-computer-use-preview-10-2025'.
This commit is contained in:
Gaurav Ghosh
2026-02-17 06:42:30 -08:00
parent 2bc2945d14
commit fb1b2891cc
2 changed files with 33 additions and 45 deletions
@@ -89,52 +89,39 @@ export async function createBrowserAgentDefinition(
(t) => !availableToolNames.includes(t),
);
// Check if visual agent model is available for current auth type.
// The visual agent model (computer-use) is only available via Gemini API key
// or Vertex AI, not via GCA/OAuth or Cloud Shell.
const isVisualModelAvailable = (() => {
// Check whether vision can be enabled; returns undefined if all gates pass.
function getVisionDisabledReason(): string | undefined {
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig.customConfig.visualModel) {
return 'No visualModel configured.';
}
if (missingVisualTools.length > 0) {
return (
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
`Ensure chrome-devtools-mcp is started with --experimental-vision.`
);
}
const authType = config.getContentGeneratorConfig()?.authType;
if (
authType === AuthType.LOGIN_WITH_GOOGLE ||
authType === AuthType.LEGACY_CLOUD_SHELL ||
authType === AuthType.COMPUTE_ADC
) {
return false;
const blockedAuthTypes = new Set([
AuthType.LOGIN_WITH_GOOGLE,
AuthType.LEGACY_CLOUD_SHELL,
AuthType.COMPUTE_ADC,
]);
if (authType && blockedAuthTypes.has(authType)) {
return 'Visual agent model not available for current auth type.';
}
return true;
})();
return undefined;
}
// Create all tools - visual delegation only if visual tools are available
const allTools: AnyDeclarativeTool[] = [...mcpTools];
const visionDisabledReason = getVisionDisabledReason();
if (missingVisualTools.length > 0) {
debugLogger.log(
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
`Visual agent delegation disabled. Ensure chrome-devtools-mcp is started with --experimental-vision.`,
);
if (printOutput) {
printOutput(
`⚠️ Visual tools unavailable - coordinate-based actions disabled.`,
);
}
} else if (!isVisualModelAvailable) {
debugLogger.log(
`Visual agent model not available for current auth type. ` +
`Visual agent delegation disabled.`,
);
if (printOutput) {
printOutput(
`⚠️ Visual agent unavailable for current auth type - coordinate-based actions disabled.`,
);
}
if (visionDisabledReason) {
debugLogger.log(`Vision disabled: ${visionDisabledReason}`);
} else {
// Create visual analysis tool only if visual tools are available
const visualDelegationTool = createAnalyzeScreenshotTool(
browserManager,
config,
messageBus,
allTools.push(
createAnalyzeScreenshotTool(browserManager, config, messageBus),
);
allTools.push(visualDelegationTool);
}
debugLogger.log(
@@ -152,7 +152,8 @@ describe('BrowserManager', () => {
]),
});
// Persistent mode should NOT include --isolated or --autoConnect
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
?.args as string[];
expect(args).not.toContain('--isolated');
expect(args).not.toContain('--autoConnect');
});
@@ -220,7 +221,8 @@ describe('BrowserManager', () => {
const manager = new BrowserManager(isolatedConfig);
await manager.ensureConnection();
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
?.args as string[];
expect(args).toContain('--isolated');
expect(args).not.toContain('--autoConnect');
});
@@ -242,7 +244,8 @@ describe('BrowserManager', () => {
const manager = new BrowserManager(existingConfig);
await manager.ensureConnection();
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
?.args as string[];
expect(args).toContain('--autoConnect');
expect(args).not.toContain('--isolated');
});
@@ -252,9 +255,7 @@ describe('BrowserManager', () => {
vi.mocked(Client).mockImplementation(
() =>
({
connect: vi
.fn()
.mockRejectedValue(new Error('Connection refused')),
connect: vi.fn().mockRejectedValue(new Error('Connection refused')),
close: vi.fn().mockResolvedValue(undefined),
listTools: vi.fn(),
callTool: vi.fn(),