mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-06-09 02:22:43 -07:00
feat(browser): gate vision on visualModel setting
Vision (screenshot analysis + coordinate-based interactions) is now disabled by default. Set visualModel in browser_agent customConfig to enable it, e.g. visualModel: 'gemini-2.5-computer-use-preview-10-2025'.
This commit is contained in:
@@ -89,52 +89,39 @@ export async function createBrowserAgentDefinition(
|
||||
(t) => !availableToolNames.includes(t),
|
||||
);
|
||||
|
||||
// Check if visual agent model is available for current auth type.
|
||||
// The visual agent model (computer-use) is only available via Gemini API key
|
||||
// or Vertex AI, not via GCA/OAuth or Cloud Shell.
|
||||
const isVisualModelAvailable = (() => {
|
||||
// Check whether vision can be enabled; returns undefined if all gates pass.
|
||||
function getVisionDisabledReason(): string | undefined {
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig.customConfig.visualModel) {
|
||||
return 'No visualModel configured.';
|
||||
}
|
||||
if (missingVisualTools.length > 0) {
|
||||
return (
|
||||
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
|
||||
`Ensure chrome-devtools-mcp is started with --experimental-vision.`
|
||||
);
|
||||
}
|
||||
const authType = config.getContentGeneratorConfig()?.authType;
|
||||
if (
|
||||
authType === AuthType.LOGIN_WITH_GOOGLE ||
|
||||
authType === AuthType.LEGACY_CLOUD_SHELL ||
|
||||
authType === AuthType.COMPUTE_ADC
|
||||
) {
|
||||
return false;
|
||||
const blockedAuthTypes = new Set([
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
AuthType.LEGACY_CLOUD_SHELL,
|
||||
AuthType.COMPUTE_ADC,
|
||||
]);
|
||||
if (authType && blockedAuthTypes.has(authType)) {
|
||||
return 'Visual agent model not available for current auth type.';
|
||||
}
|
||||
return true;
|
||||
})();
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Create all tools - visual delegation only if visual tools are available
|
||||
const allTools: AnyDeclarativeTool[] = [...mcpTools];
|
||||
const visionDisabledReason = getVisionDisabledReason();
|
||||
|
||||
if (missingVisualTools.length > 0) {
|
||||
debugLogger.log(
|
||||
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
|
||||
`Visual agent delegation disabled. Ensure chrome-devtools-mcp is started with --experimental-vision.`,
|
||||
);
|
||||
if (printOutput) {
|
||||
printOutput(
|
||||
`⚠️ Visual tools unavailable - coordinate-based actions disabled.`,
|
||||
);
|
||||
}
|
||||
} else if (!isVisualModelAvailable) {
|
||||
debugLogger.log(
|
||||
`Visual agent model not available for current auth type. ` +
|
||||
`Visual agent delegation disabled.`,
|
||||
);
|
||||
if (printOutput) {
|
||||
printOutput(
|
||||
`⚠️ Visual agent unavailable for current auth type - coordinate-based actions disabled.`,
|
||||
);
|
||||
}
|
||||
if (visionDisabledReason) {
|
||||
debugLogger.log(`Vision disabled: ${visionDisabledReason}`);
|
||||
} else {
|
||||
// Create visual analysis tool only if visual tools are available
|
||||
const visualDelegationTool = createAnalyzeScreenshotTool(
|
||||
browserManager,
|
||||
config,
|
||||
messageBus,
|
||||
allTools.push(
|
||||
createAnalyzeScreenshotTool(browserManager, config, messageBus),
|
||||
);
|
||||
allTools.push(visualDelegationTool);
|
||||
}
|
||||
|
||||
debugLogger.log(
|
||||
|
||||
@@ -152,7 +152,8 @@ describe('BrowserManager', () => {
|
||||
]),
|
||||
});
|
||||
// Persistent mode should NOT include --isolated or --autoConnect
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
|
||||
?.args as string[];
|
||||
expect(args).not.toContain('--isolated');
|
||||
expect(args).not.toContain('--autoConnect');
|
||||
});
|
||||
@@ -220,7 +221,8 @@ describe('BrowserManager', () => {
|
||||
const manager = new BrowserManager(isolatedConfig);
|
||||
await manager.ensureConnection();
|
||||
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
|
||||
?.args as string[];
|
||||
expect(args).toContain('--isolated');
|
||||
expect(args).not.toContain('--autoConnect');
|
||||
});
|
||||
@@ -242,7 +244,8 @@ describe('BrowserManager', () => {
|
||||
const manager = new BrowserManager(existingConfig);
|
||||
await manager.ensureConnection();
|
||||
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]?.args as string[];
|
||||
const args = vi.mocked(StdioClientTransport).mock.calls[0]?.[0]
|
||||
?.args as string[];
|
||||
expect(args).toContain('--autoConnect');
|
||||
expect(args).not.toContain('--isolated');
|
||||
});
|
||||
@@ -252,9 +255,7 @@ describe('BrowserManager', () => {
|
||||
vi.mocked(Client).mockImplementation(
|
||||
() =>
|
||||
({
|
||||
connect: vi
|
||||
.fn()
|
||||
.mockRejectedValue(new Error('Connection refused')),
|
||||
connect: vi.fn().mockRejectedValue(new Error('Connection refused')),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
listTools: vi.fn(),
|
||||
callTool: vi.fn(),
|
||||
|
||||
Reference in New Issue
Block a user