Merge branch 'main' into memory_usage3

This commit is contained in:
Spencer
2026-04-08 12:57:16 -04:00
committed by GitHub
64 changed files with 1158 additions and 620 deletions
@@ -17,6 +17,9 @@ import type {
ToolCallRequestInfo,
} from '../scheduler/types.js';
import { CoreToolCallStatus } from '../scheduler/types.js';
import type { GeminiClient } from '../core/client.js';
import type { Scheduler } from '../scheduler/scheduler.js';
import type { Config } from '../config/config.js';
// ---------------------------------------------------------------------------
// Mock helpers
@@ -24,7 +27,7 @@ import { CoreToolCallStatus } from '../scheduler/types.js';
function createMockDeps(
overrides?: Partial<LegacyAgentSessionDeps>,
): LegacyAgentSessionDeps {
): Required<LegacyAgentSessionDeps> {
const mockClient = {
sendMessageStream: vi.fn(),
getChat: vi.fn().mockReturnValue({
@@ -40,18 +43,22 @@ function createMockDeps(
const mockConfig = {
getMaxSessionTurns: vi.fn().mockReturnValue(-1),
getModel: vi.fn().mockReturnValue('gemini-2.5-pro'),
getGeminiClient: vi.fn().mockReturnValue(mockClient),
getMessageBus: vi.fn().mockImplementation(() => ({
subscribe: vi.fn(),
unsubscribe: vi.fn(),
})),
};
return {
client: mockClient as unknown as LegacyAgentSessionDeps['client'],
scheduler: mockScheduler as unknown as LegacyAgentSessionDeps['scheduler'],
config: mockConfig as unknown as LegacyAgentSessionDeps['config'],
client: mockClient as unknown as GeminiClient,
scheduler: mockScheduler as unknown as Scheduler,
config: mockConfig as unknown as Config,
promptId: 'test-prompt',
streamId: 'test-stream',
getPreferredEditor: vi.fn().mockReturnValue(undefined),
...overrides,
};
} as Required<LegacyAgentSessionDeps>;
}
async function* makeStream(
@@ -129,7 +136,7 @@ async function collectEvents(
// ---------------------------------------------------------------------------
describe('LegacyAgentSession', () => {
let deps: LegacyAgentSessionDeps;
let deps: Required<LegacyAgentSessionDeps>;
beforeEach(() => {
deps = createMockDeps();
@@ -14,10 +14,11 @@ import type { Part } from '@google/genai';
import type { GeminiClient } from '../core/client.js';
import type { Config } from '../config/config.js';
import type { ToolCallRequestInfo } from '../scheduler/types.js';
import type { Scheduler } from '../scheduler/scheduler.js';
import { Scheduler } from '../scheduler/scheduler.js';
import { recordToolCallInteractions } from '../code_assist/telemetry.js';
import { ToolErrorType, isFatalToolError } from '../tools/tool-error.js';
import { debugLogger } from '../utils/debugLogger.js';
import type { EditorType } from '../utils/editor.js';
import {
buildToolResponseData,
contentPartsToGeminiParts,
@@ -45,14 +46,17 @@ function isAbortLikeError(err: unknown): boolean {
}
export interface LegacyAgentSessionDeps {
client: GeminiClient;
scheduler: Scheduler;
config: Config;
promptId: string;
client?: GeminiClient;
scheduler?: Scheduler;
promptId?: string;
streamId?: string;
getPreferredEditor?: () => EditorType | undefined;
}
class LegacyAgentProtocol implements AgentProtocol {
const schedulerMap = new WeakMap<Config, Scheduler>();
export class LegacyAgentProtocol implements AgentProtocol {
private _events: AgentEvent[] = [];
private _subscribers = new Set<(event: AgentEvent) => void>();
private _translationState: TranslationState;
@@ -69,10 +73,26 @@ class LegacyAgentProtocol implements AgentProtocol {
constructor(deps: LegacyAgentSessionDeps) {
this._translationState = createTranslationState(deps.streamId);
this._nextStreamIdOverride = deps.streamId;
this._client = deps.client;
this._scheduler = deps.scheduler;
this._config = deps.config;
this._promptId = deps.promptId;
this._client = deps.client ?? deps.config.getGeminiClient();
this._promptId = deps.promptId ?? deps.config.promptId ?? '';
if (deps.scheduler) {
this._scheduler = deps.scheduler;
} else {
let scheduler = schedulerMap.get(deps.config);
if (!scheduler) {
const sessionId = deps.config.getSessionId();
const schedulerId = `legacy-agent-scheduler-${sessionId}`;
scheduler = new Scheduler({
context: deps.config,
schedulerId,
getPreferredEditor: deps.getPreferredEditor ?? (() => undefined),
});
schedulerMap.set(deps.config, scheduler);
}
this._scheduler = scheduler;
}
}
get events(): readonly AgentEvent[] {
@@ -15,6 +15,7 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js';
vi.mock('../scheduler/scheduler.js', () => ({
Scheduler: vi.fn().mockImplementation(() => ({
schedule: vi.fn().mockResolvedValue([{ status: 'success' }]),
dispose: vi.fn(),
})),
}));
@@ -125,6 +126,57 @@ describe('agent-scheduler', () => {
expect(schedulerConfig.toolRegistry).not.toBe(mainRegistry);
});
it('should dispose the scheduler after schedule completes', async () => {
const mockConfig = {
getPromptRegistry: vi.fn(),
getResourceRegistry: vi.fn(),
messageBus: mockMessageBus,
toolRegistry: mockToolRegistry,
} as unknown as Mocked<Config>;
const options = {
schedulerId: 'subagent-1',
toolRegistry: mockToolRegistry as unknown as ToolRegistry,
signal: new AbortController().signal,
};
await scheduleAgentTools(mockConfig as unknown as Config, [], options);
const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value;
expect(schedulerInstance.dispose).toHaveBeenCalledOnce();
});
it('should dispose the scheduler even when schedule throws', async () => {
const scheduleError = new Error('schedule failed');
vi.mocked(Scheduler).mockImplementationOnce(
() =>
({
schedule: vi.fn().mockRejectedValue(scheduleError),
dispose: vi.fn(),
}) as unknown as Scheduler,
);
const mockConfig = {
getPromptRegistry: vi.fn(),
getResourceRegistry: vi.fn(),
messageBus: mockMessageBus,
toolRegistry: mockToolRegistry,
} as unknown as Mocked<Config>;
const options = {
schedulerId: 'subagent-1',
toolRegistry: mockToolRegistry as unknown as ToolRegistry,
signal: new AbortController().signal,
};
await expect(
scheduleAgentTools(mockConfig as unknown as Config, [], options),
).rejects.toThrow('schedule failed');
const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value;
expect(schedulerInstance.dispose).toHaveBeenCalledOnce();
});
it('should create an AgentLoopContext that has a defined .config property', async () => {
const mockConfig = {
getPromptRegistry: vi.fn(),
+5 -1
View File
@@ -85,5 +85,9 @@ export async function scheduleAgentTools(
onWaitingForConfirmation,
});
return scheduler.schedule(requests, signal);
try {
return await scheduler.schedule(requests, signal);
} finally {
scheduler.dispose();
}
}
@@ -38,6 +38,8 @@ const mockBrowserManager = {
]),
callTool: vi.fn().mockResolvedValue({ content: [] }),
close: vi.fn().mockResolvedValue(undefined),
acquire: vi.fn(),
release: vi.fn(),
};
// Mock dependencies
@@ -81,207 +81,218 @@ export async function createBrowserAgentDefinition(
// Get or create browser manager singleton for this session mode/profile
const browserManager = BrowserManager.getInstance(config);
await browserManager.ensureConnection();
browserManager.acquire();
debugLogger.log('Browser connected with isolated MCP client.');
try {
await browserManager.ensureConnection();
// Determine if input blocker should be active (non-headless + enabled)
const shouldDisableInput = config.shouldDisableBrowserUserInput();
// Inject automation overlay and input blocker if not in headless mode
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig?.customConfig?.headless) {
debugLogger.log('Injecting automation overlay...');
await injectAutomationOverlay(browserManager);
if (shouldDisableInput) {
debugLogger.log('Injecting input blocker...');
await injectInputBlocker(browserManager);
}
}
debugLogger.log('Browser connected with isolated MCP client.');
// Create declarative tools from dynamically discovered MCP tools
// These tools dispatch to browserManager's isolated client
const mcpTools = await createMcpDeclarativeTools(
browserManager,
messageBus,
shouldDisableInput,
browserConfig.customConfig.blockFileUploads,
);
const availableToolNames = mcpTools.map((t) => t.name);
// Register high-priority policy rules for sensitive actions which is not
// able to be overwrite by YOLO mode.
const policyEngine = config.getPolicyEngine();
if (policyEngine) {
const existingRules = policyEngine.getRules();
const restrictedTools = ['fill', 'fill_form'];
// ASK_USER for upload_file and evaluate_script when sensitive action
// need confirmation.
if (browserConfig.customConfig.confirmSensitiveActions) {
restrictedTools.push('upload_file', 'evaluate_script');
}
for (const toolName of restrictedTools) {
const rule = generateAskUserRules(toolName);
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
policyEngine.addRule(rule);
// Determine if input blocker should be active (non-headless + enabled)
const shouldDisableInput = config.shouldDisableBrowserUserInput();
// Inject automation overlay and input blocker if not in headless mode
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig?.customConfig?.headless) {
debugLogger.log('Injecting automation overlay...');
await injectAutomationOverlay(browserManager);
if (shouldDisableInput) {
debugLogger.log('Injecting input blocker...');
await injectInputBlocker(browserManager);
}
}
// Reduce noise for read-only tools in default mode
const readOnlyTools = (await browserManager.getDiscoveredTools())
.filter((t) => !!t.annotations?.readOnlyHint)
.map((t) => t.name);
const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot'];
// Create declarative tools from dynamically discovered MCP tools
// These tools dispatch to browserManager's isolated client
const mcpTools = await createMcpDeclarativeTools(
browserManager,
messageBus,
shouldDisableInput,
browserConfig.customConfig.blockFileUploads,
);
const availableToolNames = mcpTools.map((t) => t.name);
for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) {
if (availableToolNames.includes(toolName)) {
const rule = generateAllowRules(toolName);
// Register high-priority policy rules for sensitive actions which is not
// able to be overwrite by YOLO mode.
const policyEngine = config.getPolicyEngine();
if (policyEngine) {
const existingRules = policyEngine.getRules();
const restrictedTools = ['fill', 'fill_form'];
// ASK_USER for upload_file and evaluate_script when sensitive action
// need confirmation.
if (browserConfig.customConfig.confirmSensitiveActions) {
restrictedTools.push('upload_file', 'evaluate_script');
}
for (const toolName of restrictedTools) {
const rule = generateAskUserRules(toolName);
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
policyEngine.addRule(rule);
}
}
}
}
function generateAskUserRules(toolName: string): PolicyRule {
return {
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
decision: PolicyDecision.ASK_USER,
priority: 999,
source: 'BrowserAgent (Sensitive Actions)',
mcpName: BROWSER_AGENT_NAME,
// Reduce noise for read-only tools in default mode
const readOnlyTools = (await browserManager.getDiscoveredTools())
.filter((t) => !!t.annotations?.readOnlyHint)
.map((t) => t.name);
const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot'];
for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) {
if (availableToolNames.includes(toolName)) {
const rule = generateAllowRules(toolName);
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
policyEngine.addRule(rule);
}
}
}
}
function generateAskUserRules(toolName: string): PolicyRule {
return {
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
decision: PolicyDecision.ASK_USER,
priority: 999,
source: 'BrowserAgent (Sensitive Actions)',
mcpName: BROWSER_AGENT_NAME,
};
}
function generateAllowRules(toolName: string): PolicyRule {
return {
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
decision: PolicyDecision.ALLOW,
priority: PRIORITY_SUBAGENT_TOOL,
source: 'BrowserAgent (Read-Only)',
mcpName: BROWSER_AGENT_NAME,
};
}
// Check if policy rule the same in all the attributes that we care about
function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) {
return (
rule1.toolName === rule2.toolName &&
rule1.decision === rule2.decision &&
rule1.priority === rule2.priority &&
rule1.mcpName === rule2.mcpName
);
}
// Validate required semantic tools are available
const requiredSemanticTools = [
'click',
'fill',
'navigate_page',
'take_snapshot',
];
const missingSemanticTools = requiredSemanticTools.filter(
(t) => !availableToolNames.includes(t),
);
const rawSessionMode = browserConfig?.customConfig?.sessionMode;
const sessionMode =
rawSessionMode === 'isolated' || rawSessionMode === 'existing'
? rawSessionMode
: 'persistent';
recordBrowserAgentToolDiscovery(
config,
mcpTools.length,
missingSemanticTools,
sessionMode,
);
if (missingSemanticTools.length > 0) {
debugLogger.warn(
`Semantic tools missing (${missingSemanticTools.join(', ')}). ` +
'Some browser interactions may not work correctly.',
);
}
// Only click_at is strictly required — text input can use press_key or fill.
const requiredVisualTools = ['click_at'];
const missingVisualTools = requiredVisualTools.filter(
(t) => !availableToolNames.includes(t),
);
// Check whether vision can be enabled; returns structured type with code and message.
function getVisionDisabledReason(): VisionDisabledReason {
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig.customConfig.visualModel) {
return {
code: 'no_visual_model',
message: 'No visualModel configured.',
};
}
if (missingVisualTools.length > 0) {
return {
code: 'missing_visual_tools',
message:
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
`The installed chrome-devtools-mcp version may be too old.`,
};
}
const authType = config.getContentGeneratorConfig()?.authType;
const blockedAuthTypes = new Set([
AuthType.LOGIN_WITH_GOOGLE,
AuthType.LEGACY_CLOUD_SHELL,
AuthType.COMPUTE_ADC,
]);
if (authType && blockedAuthTypes.has(authType)) {
return {
code: 'blocked_auth_type',
message: 'Visual agent model not available for current auth type.',
};
}
return undefined;
}
const allTools: AnyDeclarativeTool[] = [...mcpTools];
const visionDisabledReason = getVisionDisabledReason();
logBrowserAgentVisionStatus(config, {
enabled: !visionDisabledReason,
disabled_reason: visionDisabledReason?.code,
});
if (visionDisabledReason) {
debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`);
} else {
allTools.push(
createAnalyzeScreenshotTool(browserManager, config, messageBus),
);
}
debugLogger.log(
`Created ${allTools.length} tools for browser agent: ` +
allTools.map((t) => t.name).join(', '),
);
// Create configured definition with tools
// BrowserAgentDefinition is a factory function - call it with config
const baseDefinition = BrowserAgentDefinition(
config,
!visionDisabledReason,
);
const definition: LocalAgentDefinition<typeof BrowserTaskResultSchema> = {
...baseDefinition,
toolConfig: {
tools: allTools,
},
};
}
function generateAllowRules(toolName: string): PolicyRule {
return {
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
decision: PolicyDecision.ALLOW,
priority: PRIORITY_SUBAGENT_TOOL,
source: 'BrowserAgent (Read-Only)',
mcpName: BROWSER_AGENT_NAME,
definition,
browserManager,
visionEnabled: !visionDisabledReason,
sessionMode,
};
} catch (error) {
// Release the browser manager if setup fails, so concurrent tasks can try again.
browserManager.release();
throw error;
}
// Check if policy rule the same in all the attributes that we care about
function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) {
return (
rule1.toolName === rule2.toolName &&
rule1.decision === rule2.decision &&
rule1.priority === rule2.priority &&
rule1.mcpName === rule2.mcpName
);
}
// Validate required semantic tools are available
const requiredSemanticTools = [
'click',
'fill',
'navigate_page',
'take_snapshot',
];
const missingSemanticTools = requiredSemanticTools.filter(
(t) => !availableToolNames.includes(t),
);
const rawSessionMode = browserConfig?.customConfig?.sessionMode;
const sessionMode =
rawSessionMode === 'isolated' || rawSessionMode === 'existing'
? rawSessionMode
: 'persistent';
recordBrowserAgentToolDiscovery(
config,
mcpTools.length,
missingSemanticTools,
sessionMode,
);
if (missingSemanticTools.length > 0) {
debugLogger.warn(
`Semantic tools missing (${missingSemanticTools.join(', ')}). ` +
'Some browser interactions may not work correctly.',
);
}
// Only click_at is strictly required — text input can use press_key or fill.
const requiredVisualTools = ['click_at'];
const missingVisualTools = requiredVisualTools.filter(
(t) => !availableToolNames.includes(t),
);
// Check whether vision can be enabled; returns structured type with code and message.
function getVisionDisabledReason(): VisionDisabledReason {
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig.customConfig.visualModel) {
return {
code: 'no_visual_model',
message: 'No visualModel configured.',
};
}
if (missingVisualTools.length > 0) {
return {
code: 'missing_visual_tools',
message:
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
`The installed chrome-devtools-mcp version may be too old.`,
};
}
const authType = config.getContentGeneratorConfig()?.authType;
const blockedAuthTypes = new Set([
AuthType.LOGIN_WITH_GOOGLE,
AuthType.LEGACY_CLOUD_SHELL,
AuthType.COMPUTE_ADC,
]);
if (authType && blockedAuthTypes.has(authType)) {
return {
code: 'blocked_auth_type',
message: 'Visual agent model not available for current auth type.',
};
}
return undefined;
}
const allTools: AnyDeclarativeTool[] = [...mcpTools];
const visionDisabledReason = getVisionDisabledReason();
logBrowserAgentVisionStatus(config, {
enabled: !visionDisabledReason,
disabled_reason: visionDisabledReason?.code,
});
if (visionDisabledReason) {
debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`);
} else {
allTools.push(
createAnalyzeScreenshotTool(browserManager, config, messageBus),
);
}
debugLogger.log(
`Created ${allTools.length} tools for browser agent: ` +
allTools.map((t) => t.name).join(', '),
);
// Create configured definition with tools
// BrowserAgentDefinition is a factory function - call it with config
const baseDefinition = BrowserAgentDefinition(config, !visionDisabledReason);
const definition: LocalAgentDefinition<typeof BrowserTaskResultSchema> = {
...baseDefinition,
toolConfig: {
tools: allTools,
},
};
return {
definition,
browserManager,
visionEnabled: !visionDisabledReason,
sessionMode,
};
}
/**
@@ -192,7 +192,10 @@ describe('BrowserAgentInvocation', () => {
promptConfig: { query: '', systemPrompt: '' },
toolConfig: { tools: ['analyze_screenshot', 'click'] },
},
browserManager: {} as never,
browserManager: {
release: vi.fn(),
callTool: vi.fn().mockResolvedValue({ content: [] }),
} as never,
visionEnabled: true,
sessionMode: 'persistent',
});
@@ -766,6 +769,7 @@ describe('BrowserAgentInvocation', () => {
}
return { isError: false };
}),
release: vi.fn(),
};
vi.mocked(createBrowserAgentDefinition).mockResolvedValue({
@@ -440,6 +440,8 @@ ${output.result}`;
}
} catch {
// Ignore errors for removing the overlays.
} finally {
browserManager.release();
}
}
}
@@ -873,6 +873,122 @@ describe('BrowserManager', () => {
expect(instance1).not.toBe(instance2);
});
it('should throw when acquired instance is requested in persistent mode', () => {
// mockConfig defaults to persistent mode
const instance1 = BrowserManager.getInstance(mockConfig);
instance1.acquire();
expect(() => BrowserManager.getInstance(mockConfig)).toThrow(
/Cannot launch a concurrent browser agent in "persistent" session mode/,
);
});
it('should throw when acquired instance is requested in existing mode', () => {
const existingConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { sessionMode: 'existing' },
},
});
const instance1 = BrowserManager.getInstance(existingConfig);
instance1.acquire();
expect(() => BrowserManager.getInstance(existingConfig)).toThrow(
/Cannot launch a concurrent browser agent in "existing" session mode/,
);
});
it('should return a different instance when the primary is acquired in isolated mode', () => {
const isolatedConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { sessionMode: 'isolated' },
},
});
const instance1 = BrowserManager.getInstance(isolatedConfig);
instance1.acquire();
const instance2 = BrowserManager.getInstance(isolatedConfig);
expect(instance2).not.toBe(instance1);
expect(instance1.isAcquired()).toBe(true);
expect(instance2.isAcquired()).toBe(false);
});
it('should reuse the primary when it has been released', () => {
const instance1 = BrowserManager.getInstance(mockConfig);
instance1.acquire();
instance1.release();
const instance2 = BrowserManager.getInstance(mockConfig);
expect(instance2).toBe(instance1);
expect(instance1.isAcquired()).toBe(false);
});
it('should reuse a released parallel instance in isolated mode', () => {
const isolatedConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { sessionMode: 'isolated' },
},
});
const instance1 = BrowserManager.getInstance(isolatedConfig);
instance1.acquire();
const instance2 = BrowserManager.getInstance(isolatedConfig);
instance2.acquire();
instance2.release();
// Primary is still acquired, parallel is released — should reuse parallel
const instance3 = BrowserManager.getInstance(isolatedConfig);
expect(instance3).toBe(instance2);
});
it('should create multiple parallel instances in isolated mode', () => {
const isolatedConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { sessionMode: 'isolated' },
},
});
const instance1 = BrowserManager.getInstance(isolatedConfig);
instance1.acquire();
const instance2 = BrowserManager.getInstance(isolatedConfig);
instance2.acquire();
const instance3 = BrowserManager.getInstance(isolatedConfig);
expect(instance1).not.toBe(instance2);
expect(instance2).not.toBe(instance3);
expect(instance1).not.toBe(instance3);
});
it('should throw when MAX_PARALLEL_INSTANCES is reached in isolated mode', () => {
const isolatedConfig = makeFakeConfig({
agents: {
overrides: { browser_agent: { enabled: true } },
browser: { sessionMode: 'isolated' },
},
});
// Acquire MAX_PARALLEL_INSTANCES instances
for (let i = 0; i < BrowserManager.MAX_PARALLEL_INSTANCES; i++) {
const instance = BrowserManager.getInstance(isolatedConfig);
instance.acquire();
}
// Next call should throw
expect(() => BrowserManager.getInstance(isolatedConfig)).toThrow(
/Maximum number of parallel browser instances/,
);
});
});
describe('resetAll', () => {
@@ -114,6 +114,12 @@ export class BrowserManager {
// --- Static singleton management ---
private static instances = new Map<string, BrowserManager>();
/**
* Maximum number of parallel browser instances allowed in isolated mode.
* Prevents unbounded resource consumption from concurrent browser_agent calls.
*/
static readonly MAX_PARALLEL_INSTANCES = 5;
/**
* Returns the cache key for a given config.
* Uses `sessionMode:profilePath` so different profiles get separate instances.
@@ -128,14 +134,64 @@ export class BrowserManager {
/**
* Returns an existing BrowserManager for the current config's session mode
* and profile, or creates a new one.
*
* Concurrency rules:
* - **persistent / existing mode**: Only one instance is allowed at a time.
* If the instance is already in-use, an error is thrown instructing the
* caller to run browser tasks sequentially.
* - **isolated mode**: Parallel instances are allowed up to
* MAX_PARALLEL_INSTANCES. Each isolated instance gets its own temp profile.
*/
static getInstance(config: Config): BrowserManager {
const key = BrowserManager.getInstanceKey(config);
const sessionMode =
config.getBrowserAgentConfig().customConfig.sessionMode ?? 'persistent';
let instance = BrowserManager.instances.get(key);
if (!instance) {
instance = new BrowserManager(config);
BrowserManager.instances.set(key, instance);
debugLogger.log(`Created new BrowserManager singleton (key: ${key})`);
} else if (instance.inUse) {
// Persistent and existing modes share a browser profile directory.
// Chrome prevents multiple instances from using the same profile, so
// concurrent usage would cause "profile locked" errors.
if (sessionMode === 'persistent' || sessionMode === 'existing') {
throw new Error(
`Cannot launch a concurrent browser agent in "${sessionMode}" session mode. ` +
`The browser instance is already in use by another task. ` +
`Please run browser tasks sequentially, or switch to "isolated" session mode for concurrent browser usage.`,
);
}
// Isolated mode: allow parallel instances up to the limit.
let inUseCount = 1; // primary is already in-use
let suffix = 1;
let parallelKey = `${key}:${suffix}`;
let parallel = BrowserManager.instances.get(parallelKey);
while (parallel?.inUse) {
inUseCount++;
if (inUseCount >= BrowserManager.MAX_PARALLEL_INSTANCES) {
throw new Error(
`Maximum number of parallel browser instances (${BrowserManager.MAX_PARALLEL_INSTANCES}) reached. ` +
`Please wait for an existing browser task to complete before starting a new one.`,
);
}
suffix++;
parallelKey = `${key}:${suffix}`;
parallel = BrowserManager.instances.get(parallelKey);
}
if (!parallel) {
parallel = new BrowserManager(config);
BrowserManager.instances.set(parallelKey, parallel);
debugLogger.log(
`Created parallel BrowserManager (key: ${parallelKey})`,
);
} else {
debugLogger.log(
`Reusing released parallel BrowserManager (key: ${parallelKey})`,
);
}
instance = parallel;
} else {
debugLogger.log(
`Reusing existing BrowserManager singleton (key: ${key})`,
@@ -180,6 +236,36 @@ export class BrowserManager {
private isClosing = false;
private connectionPromise: Promise<void> | undefined;
/**
* Whether this instance is currently acquired by an active invocation.
* Used by getInstance() to avoid handing the same browser to concurrent
* browser_agent calls.
*/
private inUse = false;
/**
* Marks this instance as in-use. Call this when starting a browser agent
* invocation so concurrent calls get a separate instance.
*/
acquire(): void {
this.inUse = true;
}
/**
* Marks this instance as available for reuse. Call this in the finally
* block of a browser agent invocation.
*/
release(): void {
this.inUse = false;
}
/**
* Returns whether this instance is currently acquired by an active invocation.
*/
isAcquired(): boolean {
return this.inUse;
}
/** State for action rate limiting */
private actionCounter = 0;
private readonly maxActionsPerTask: number;
+2 -2
View File
@@ -1075,7 +1075,7 @@ describe('AgentRegistry', () => {
expect.objectContaining({
toolName: 'PolicyTestAgent',
decision: PolicyDecision.ALLOW,
priority: 1.05,
priority: 1.03,
}),
);
});
@@ -1102,7 +1102,7 @@ describe('AgentRegistry', () => {
expect.objectContaining({
toolName: 'RemotePolicyAgent',
decision: PolicyDecision.ASK_USER,
priority: 1.05,
priority: 1.03,
}),
);
});
+1 -1
View File
@@ -1224,7 +1224,7 @@ export class Config implements McpContext, AgentLoopContext {
this.useRipgrep = params.useRipgrep ?? true;
this.useBackgroundColor = params.useBackgroundColor ?? true;
this.useAlternateBuffer = params.useAlternateBuffer ?? false;
this.useTerminalBuffer = params.useTerminalBuffer ?? true;
this.useTerminalBuffer = params.useTerminalBuffer ?? false;
this.useRenderProcess = params.useRenderProcess ?? true;
this.enableInteractiveShell = params.enableInteractiveShell ?? false;
+3 -2
View File
@@ -398,9 +398,10 @@ export async function createPolicyEngineConfig(
// TOML policy priorities (before transformation):
// 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
// 15: Auto-edit tool override (becomes 1.015 in default tier)
// 30: Unknown subagents (blocked by Plan Mode's 40)
// 40: Plan mode catch-all DENY override (becomes 1.040 in default tier)
// 50: Read-only tools (becomes 1.050 in default tier)
// 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
// 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
// 70: Mode transition overrides (becomes 1.070 in default tier)
// 999: YOLO mode allow-all (becomes 1.999 in default tier)
// MCP servers that are explicitly excluded in settings.mcp.excluded
+15 -31
View File
@@ -23,8 +23,10 @@
#
# TOML policy priorities (before transformation):
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
# 30: Unknown subagents (blocked by Plan Mode's 40)
# 40: Plan mode catch-all DENY override (becomes 1.040 in default tier)
# 50: Read-only tools / Plan mode explicit ALLOW (becomes 1.050 in default tier)
# 70: Mode transition overrides (into/out of Plan Mode)
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
# Mode Transitions (into/out of Plan Mode)
@@ -59,6 +61,7 @@ interactive = true
toolName = "exit_plan_mode"
decision = "allow"
priority = 70
modes = ["plan"]
interactive = false
[[rule]]
@@ -73,18 +76,23 @@ denyMessage = "You are not currently in Plan Mode. Use enter_plan_mode first to
[[rule]]
toolName = "*"
decision = "deny"
priority = 60
priority = 40
modes = ["plan"]
denyMessage = "You are in Plan Mode with access to read-only tools. Execution of scripts (including those from skills) is blocked."
# Explicitly Allow Read-Only Tools in Plan mode.
[[rule]]
toolName = ["activate_skill"]
decision = "allow"
priority = 50
modes = ["plan"]
[[rule]]
toolName = "*"
mcpName = "*"
toolAnnotations = { readOnlyHint = true }
decision = "ask_user"
priority = 70
priority = 50
modes = ["plan"]
interactive = true
@@ -93,45 +101,21 @@ toolName = "*"
mcpName = "*"
toolAnnotations = { readOnlyHint = true }
decision = "deny"
priority = 70
priority = 50
modes = ["plan"]
interactive = false
[[rule]]
toolName = [
"glob",
"grep_search",
"list_directory",
"read_file",
"google_web_search",
"activate_skill",
"codebase_investigator",
"cli_help",
"get_internal_docs",
"complete_task"
]
decision = "allow"
priority = 70
modes = ["plan"]
# Topic grouping tool is innocuous and used for UI organization.
[[rule]]
toolName = "update_topic"
decision = "allow"
priority = 70
modes = ["plan"]
[[rule]]
toolName = ["ask_user", "save_memory", "web_fetch"]
decision = "ask_user"
priority = 70
priority = 50
modes = ["plan"]
interactive = true
[[rule]]
toolName = ["ask_user", "save_memory", "web_fetch"]
decision = "deny"
priority = 70
priority = 50
modes = ["plan"]
interactive = false
@@ -28,43 +28,26 @@
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
[[rule]]
toolName = "glob"
toolName = [
"glob",
"grep_search",
"list_directory",
"read_file",
"google_web_search",
"codebase_investigator",
"cli_help",
"get_internal_docs",
# Tracker tools for task management (safe as they only modify internal state)
"tracker_create_task",
"tracker_update_task",
"tracker_get_task",
"tracker_list_tasks",
"tracker_add_dependency",
"tracker_visualize",
# Topic grouping tool is innocuous and used for UI organization.
"update_topic",
# Core agent lifecycle tool
"complete_task"
]
decision = "allow"
priority = 50
[[rule]]
toolName = "grep_search"
decision = "allow"
priority = 50
[[rule]]
toolName = "list_directory"
decision = "allow"
priority = 50
[[rule]]
toolName = "read_file"
decision = "allow"
priority = 50
[[rule]]
toolName = "google_web_search"
decision = "allow"
priority = 50
[[rule]]
toolName = ["codebase_investigator", "cli_help", "get_internal_docs"]
decision = "allow"
priority = 50
# Topic grouping tool is innocuous and used for UI organization.
[[rule]]
toolName = "update_topic"
decision = "allow"
priority = 50
# Core agent lifecycle tool
[[rule]]
toolName = "complete_task"
decision = "allow"
priority = 50
@@ -1,34 +0,0 @@
# Priority system for policy rules:
# - Higher priority numbers win over lower priority numbers
# - When multiple rules match, the highest priority rule is applied
# - Rules are evaluated in order of priority (highest first)
#
# Priority bands (tiers):
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
#
# Settings-based and dynamic rules (all in user tier 4.x):
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
# 4.9: MCP servers excluded list (security: persistent server blocks)
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
# 4.2: MCP servers with trust=true (persistent trusted servers)
# 4.1: MCP servers allowed list (persistent general server allows)
# Allow tracker tools to execute without asking the user.
# These tools are only registered when the tracker feature is enabled,
# so this rule is a no-op when the feature is disabled.
[[rule]]
toolName = [
"tracker_create_task",
"tracker_update_task",
"tracker_get_task",
"tracker_list_tasks",
"tracker_add_dependency",
"tracker_visualize"
]
decision = "allow"
priority = 50
@@ -1715,13 +1715,13 @@ describe('PolicyEngine', () => {
describe('Plan Mode vs Subagent Priority (Regression)', () => {
it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => {
// Plan Mode Deny (1.06) > Subagent Allow (1.05)
// Plan Mode Deny (1.04) > Subagent Allow (1.03)
const fixedRules: PolicyRule[] = [
{
toolName: '*',
decision: PolicyDecision.DENY,
priority: 1.06,
priority: 1.04,
modes: [ApprovalMode.PLAN],
},
{
+20 -7
View File
@@ -890,8 +890,8 @@ priority = 100
readOnlyHint: true,
});
expect(annotationRule!.decision).toBe(PolicyDecision.ASK_USER);
// Priority 70 in tier 1 => 1.070
expect(annotationRule!.priority).toBe(1.07);
// Priority 50 in tier 1 => 1.050
expect(annotationRule!.priority).toBe(1.05);
// Verify deny rule was loaded correctly
const denyRule = result.rules.find(
@@ -904,8 +904,8 @@ priority = 100
denyRule,
'Should have loaded the catch-all deny rule',
).toBeDefined();
// Priority 60 in tier 1 => 1.060
expect(denyRule!.priority).toBe(1.06);
// Priority 40 in tier 1 => 1.040
expect(denyRule!.priority).toBe(1.04);
// 2. Initialize Policy Engine in Plan Mode
const engine = new PolicyEngine({
@@ -974,12 +974,23 @@ priority = 100
it('should override default subagent rules when in Plan Mode for unknown subagents', async () => {
const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
const fileContent = await fs.readFile(planTomlPath, 'utf-8');
const readOnlyTomlPath = path.resolve(
__dirname,
'policies',
'read-only.toml',
);
const planContent = await fs.readFile(planTomlPath, 'utf-8');
const readOnlyContent = await fs.readFile(readOnlyTomlPath, 'utf-8');
const tempPolicyDir = await fs.mkdtemp(
path.join(os.tmpdir(), 'plan-policy-test-'),
);
try {
await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent);
await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), planContent);
await fs.writeFile(
path.join(tempPolicyDir, 'read-only.toml'),
readOnlyContent,
);
const getPolicyTier = () => 1; // Default tier
// 1. Load the actual Plan Mode policies
@@ -1004,6 +1015,7 @@ priority = 100
// 4. Verify Behavior:
// The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow
// Plan Mode Deny (1.04) > Subagent Allow (1.03)
const checkResult = await engine.check(
{ name: 'unknown_subagent' },
undefined,
@@ -1015,7 +1027,7 @@ priority = 100
).toBe(PolicyDecision.DENY);
// 5. Verify Explicit Allows still work
// e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60)
// e.g. 'read_file' should be allowed because its priority in read-only.toml (50) is higher than the deny (40)
const readResult = await engine.check({ name: 'read_file' }, undefined);
expect(
readResult.decision,
@@ -1023,6 +1035,7 @@ priority = 100
).toBe(PolicyDecision.ALLOW);
// 6. Verify Built-in Research Subagents are ALLOWED
// codebase_investigator is priority 50 in read-only.toml
const codebaseResult = await engine.check(
{ name: 'codebase_investigator' },
undefined,
+4 -2
View File
@@ -354,9 +354,11 @@ export interface CheckResult {
/**
* Priority for subagent tools (registered dynamically).
* Effective priority matching Tier 1 (Default) read-only tools.
* Effective priority matching Tier 1 (Default) at priority 30.
* This ensures they are blocked by Plan Mode (priority 40) while
* remaining above directive write tools (priority 10).
*/
export const PRIORITY_SUBAGENT_TOOL = 1.05;
export const PRIORITY_SUBAGENT_TOOL = 1.03;
/**
* The fractional priority of "Always allow" rules (e.g., 950/1000).
+26
View File
@@ -804,6 +804,32 @@ describe('ShellTool', () => {
});
});
describe('invocation getDescription', () => {
it('should return the description if it is present and not empty whitespace', () => {
const invocation = shellTool.build({
command: 'echo hello',
description: 'prints hello',
});
expect(invocation.getDescription()).toBe('prints hello');
});
it('should return the raw command if description is an empty string', () => {
const invocation = shellTool.build({
command: 'echo hello',
description: '',
});
expect(invocation.getDescription()).toBe('echo hello');
});
it('should return the raw command if description is just whitespace', () => {
const invocation = shellTool.build({
command: 'echo hello',
description: ' ',
});
expect(invocation.getDescription()).toBe('echo hello');
});
});
describe('llmContent output format', () => {
const mockAbortSignal = new AbortController().signal;
+3 -1
View File
@@ -136,7 +136,9 @@ export class ShellToolInvocation extends BaseToolInvocation<
}
getDescription(): string {
return this.params.description || '';
return this.params.description?.trim()
? this.params.description
: this.params.command;
}
private simplifyPaths(paths: Set<string>): string[] {