mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-25 20:44:46 -07:00
Merge branch 'main' into memory_usage3
This commit is contained in:
@@ -17,6 +17,9 @@ import type {
|
||||
ToolCallRequestInfo,
|
||||
} from '../scheduler/types.js';
|
||||
import { CoreToolCallStatus } from '../scheduler/types.js';
|
||||
import type { GeminiClient } from '../core/client.js';
|
||||
import type { Scheduler } from '../scheduler/scheduler.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock helpers
|
||||
@@ -24,7 +27,7 @@ import { CoreToolCallStatus } from '../scheduler/types.js';
|
||||
|
||||
function createMockDeps(
|
||||
overrides?: Partial<LegacyAgentSessionDeps>,
|
||||
): LegacyAgentSessionDeps {
|
||||
): Required<LegacyAgentSessionDeps> {
|
||||
const mockClient = {
|
||||
sendMessageStream: vi.fn(),
|
||||
getChat: vi.fn().mockReturnValue({
|
||||
@@ -40,18 +43,22 @@ function createMockDeps(
|
||||
const mockConfig = {
|
||||
getMaxSessionTurns: vi.fn().mockReturnValue(-1),
|
||||
getModel: vi.fn().mockReturnValue('gemini-2.5-pro'),
|
||||
getGeminiClient: vi.fn().mockReturnValue(mockClient),
|
||||
getMessageBus: vi.fn().mockImplementation(() => ({
|
||||
subscribe: vi.fn(),
|
||||
unsubscribe: vi.fn(),
|
||||
})),
|
||||
};
|
||||
|
||||
return {
|
||||
client: mockClient as unknown as LegacyAgentSessionDeps['client'],
|
||||
|
||||
scheduler: mockScheduler as unknown as LegacyAgentSessionDeps['scheduler'],
|
||||
|
||||
config: mockConfig as unknown as LegacyAgentSessionDeps['config'],
|
||||
client: mockClient as unknown as GeminiClient,
|
||||
scheduler: mockScheduler as unknown as Scheduler,
|
||||
config: mockConfig as unknown as Config,
|
||||
promptId: 'test-prompt',
|
||||
streamId: 'test-stream',
|
||||
getPreferredEditor: vi.fn().mockReturnValue(undefined),
|
||||
...overrides,
|
||||
};
|
||||
} as Required<LegacyAgentSessionDeps>;
|
||||
}
|
||||
|
||||
async function* makeStream(
|
||||
@@ -129,7 +136,7 @@ async function collectEvents(
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LegacyAgentSession', () => {
|
||||
let deps: LegacyAgentSessionDeps;
|
||||
let deps: Required<LegacyAgentSessionDeps>;
|
||||
|
||||
beforeEach(() => {
|
||||
deps = createMockDeps();
|
||||
|
||||
@@ -14,10 +14,11 @@ import type { Part } from '@google/genai';
|
||||
import type { GeminiClient } from '../core/client.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import type { ToolCallRequestInfo } from '../scheduler/types.js';
|
||||
import type { Scheduler } from '../scheduler/scheduler.js';
|
||||
import { Scheduler } from '../scheduler/scheduler.js';
|
||||
import { recordToolCallInteractions } from '../code_assist/telemetry.js';
|
||||
import { ToolErrorType, isFatalToolError } from '../tools/tool-error.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import type { EditorType } from '../utils/editor.js';
|
||||
import {
|
||||
buildToolResponseData,
|
||||
contentPartsToGeminiParts,
|
||||
@@ -45,14 +46,17 @@ function isAbortLikeError(err: unknown): boolean {
|
||||
}
|
||||
|
||||
export interface LegacyAgentSessionDeps {
|
||||
client: GeminiClient;
|
||||
scheduler: Scheduler;
|
||||
config: Config;
|
||||
promptId: string;
|
||||
client?: GeminiClient;
|
||||
scheduler?: Scheduler;
|
||||
promptId?: string;
|
||||
streamId?: string;
|
||||
getPreferredEditor?: () => EditorType | undefined;
|
||||
}
|
||||
|
||||
class LegacyAgentProtocol implements AgentProtocol {
|
||||
const schedulerMap = new WeakMap<Config, Scheduler>();
|
||||
|
||||
export class LegacyAgentProtocol implements AgentProtocol {
|
||||
private _events: AgentEvent[] = [];
|
||||
private _subscribers = new Set<(event: AgentEvent) => void>();
|
||||
private _translationState: TranslationState;
|
||||
@@ -69,10 +73,26 @@ class LegacyAgentProtocol implements AgentProtocol {
|
||||
constructor(deps: LegacyAgentSessionDeps) {
|
||||
this._translationState = createTranslationState(deps.streamId);
|
||||
this._nextStreamIdOverride = deps.streamId;
|
||||
this._client = deps.client;
|
||||
this._scheduler = deps.scheduler;
|
||||
this._config = deps.config;
|
||||
this._promptId = deps.promptId;
|
||||
this._client = deps.client ?? deps.config.getGeminiClient();
|
||||
this._promptId = deps.promptId ?? deps.config.promptId ?? '';
|
||||
|
||||
if (deps.scheduler) {
|
||||
this._scheduler = deps.scheduler;
|
||||
} else {
|
||||
let scheduler = schedulerMap.get(deps.config);
|
||||
if (!scheduler) {
|
||||
const sessionId = deps.config.getSessionId();
|
||||
const schedulerId = `legacy-agent-scheduler-${sessionId}`;
|
||||
scheduler = new Scheduler({
|
||||
context: deps.config,
|
||||
schedulerId,
|
||||
getPreferredEditor: deps.getPreferredEditor ?? (() => undefined),
|
||||
});
|
||||
schedulerMap.set(deps.config, scheduler);
|
||||
}
|
||||
this._scheduler = scheduler;
|
||||
}
|
||||
}
|
||||
|
||||
get events(): readonly AgentEvent[] {
|
||||
|
||||
@@ -15,6 +15,7 @@ import type { MessageBus } from '../confirmation-bus/message-bus.js';
|
||||
vi.mock('../scheduler/scheduler.js', () => ({
|
||||
Scheduler: vi.fn().mockImplementation(() => ({
|
||||
schedule: vi.fn().mockResolvedValue([{ status: 'success' }]),
|
||||
dispose: vi.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
@@ -125,6 +126,57 @@ describe('agent-scheduler', () => {
|
||||
expect(schedulerConfig.toolRegistry).not.toBe(mainRegistry);
|
||||
});
|
||||
|
||||
it('should dispose the scheduler after schedule completes', async () => {
|
||||
const mockConfig = {
|
||||
getPromptRegistry: vi.fn(),
|
||||
getResourceRegistry: vi.fn(),
|
||||
messageBus: mockMessageBus,
|
||||
toolRegistry: mockToolRegistry,
|
||||
} as unknown as Mocked<Config>;
|
||||
|
||||
const options = {
|
||||
schedulerId: 'subagent-1',
|
||||
toolRegistry: mockToolRegistry as unknown as ToolRegistry,
|
||||
signal: new AbortController().signal,
|
||||
};
|
||||
|
||||
await scheduleAgentTools(mockConfig as unknown as Config, [], options);
|
||||
|
||||
const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value;
|
||||
expect(schedulerInstance.dispose).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it('should dispose the scheduler even when schedule throws', async () => {
|
||||
const scheduleError = new Error('schedule failed');
|
||||
vi.mocked(Scheduler).mockImplementationOnce(
|
||||
() =>
|
||||
({
|
||||
schedule: vi.fn().mockRejectedValue(scheduleError),
|
||||
dispose: vi.fn(),
|
||||
}) as unknown as Scheduler,
|
||||
);
|
||||
|
||||
const mockConfig = {
|
||||
getPromptRegistry: vi.fn(),
|
||||
getResourceRegistry: vi.fn(),
|
||||
messageBus: mockMessageBus,
|
||||
toolRegistry: mockToolRegistry,
|
||||
} as unknown as Mocked<Config>;
|
||||
|
||||
const options = {
|
||||
schedulerId: 'subagent-1',
|
||||
toolRegistry: mockToolRegistry as unknown as ToolRegistry,
|
||||
signal: new AbortController().signal,
|
||||
};
|
||||
|
||||
await expect(
|
||||
scheduleAgentTools(mockConfig as unknown as Config, [], options),
|
||||
).rejects.toThrow('schedule failed');
|
||||
|
||||
const schedulerInstance = vi.mocked(Scheduler).mock.results[0].value;
|
||||
expect(schedulerInstance.dispose).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it('should create an AgentLoopContext that has a defined .config property', async () => {
|
||||
const mockConfig = {
|
||||
getPromptRegistry: vi.fn(),
|
||||
|
||||
@@ -85,5 +85,9 @@ export async function scheduleAgentTools(
|
||||
onWaitingForConfirmation,
|
||||
});
|
||||
|
||||
return scheduler.schedule(requests, signal);
|
||||
try {
|
||||
return await scheduler.schedule(requests, signal);
|
||||
} finally {
|
||||
scheduler.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,6 +38,8 @@ const mockBrowserManager = {
|
||||
]),
|
||||
callTool: vi.fn().mockResolvedValue({ content: [] }),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
acquire: vi.fn(),
|
||||
release: vi.fn(),
|
||||
};
|
||||
|
||||
// Mock dependencies
|
||||
|
||||
@@ -81,207 +81,218 @@ export async function createBrowserAgentDefinition(
|
||||
|
||||
// Get or create browser manager singleton for this session mode/profile
|
||||
const browserManager = BrowserManager.getInstance(config);
|
||||
await browserManager.ensureConnection();
|
||||
browserManager.acquire();
|
||||
|
||||
debugLogger.log('Browser connected with isolated MCP client.');
|
||||
try {
|
||||
await browserManager.ensureConnection();
|
||||
|
||||
// Determine if input blocker should be active (non-headless + enabled)
|
||||
const shouldDisableInput = config.shouldDisableBrowserUserInput();
|
||||
// Inject automation overlay and input blocker if not in headless mode
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig?.customConfig?.headless) {
|
||||
debugLogger.log('Injecting automation overlay...');
|
||||
await injectAutomationOverlay(browserManager);
|
||||
if (shouldDisableInput) {
|
||||
debugLogger.log('Injecting input blocker...');
|
||||
await injectInputBlocker(browserManager);
|
||||
}
|
||||
}
|
||||
debugLogger.log('Browser connected with isolated MCP client.');
|
||||
|
||||
// Create declarative tools from dynamically discovered MCP tools
|
||||
// These tools dispatch to browserManager's isolated client
|
||||
const mcpTools = await createMcpDeclarativeTools(
|
||||
browserManager,
|
||||
messageBus,
|
||||
shouldDisableInput,
|
||||
browserConfig.customConfig.blockFileUploads,
|
||||
);
|
||||
const availableToolNames = mcpTools.map((t) => t.name);
|
||||
|
||||
// Register high-priority policy rules for sensitive actions which is not
|
||||
// able to be overwrite by YOLO mode.
|
||||
const policyEngine = config.getPolicyEngine();
|
||||
|
||||
if (policyEngine) {
|
||||
const existingRules = policyEngine.getRules();
|
||||
|
||||
const restrictedTools = ['fill', 'fill_form'];
|
||||
|
||||
// ASK_USER for upload_file and evaluate_script when sensitive action
|
||||
// need confirmation.
|
||||
if (browserConfig.customConfig.confirmSensitiveActions) {
|
||||
restrictedTools.push('upload_file', 'evaluate_script');
|
||||
}
|
||||
|
||||
for (const toolName of restrictedTools) {
|
||||
const rule = generateAskUserRules(toolName);
|
||||
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
|
||||
policyEngine.addRule(rule);
|
||||
// Determine if input blocker should be active (non-headless + enabled)
|
||||
const shouldDisableInput = config.shouldDisableBrowserUserInput();
|
||||
// Inject automation overlay and input blocker if not in headless mode
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig?.customConfig?.headless) {
|
||||
debugLogger.log('Injecting automation overlay...');
|
||||
await injectAutomationOverlay(browserManager);
|
||||
if (shouldDisableInput) {
|
||||
debugLogger.log('Injecting input blocker...');
|
||||
await injectInputBlocker(browserManager);
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce noise for read-only tools in default mode
|
||||
const readOnlyTools = (await browserManager.getDiscoveredTools())
|
||||
.filter((t) => !!t.annotations?.readOnlyHint)
|
||||
.map((t) => t.name);
|
||||
const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot'];
|
||||
// Create declarative tools from dynamically discovered MCP tools
|
||||
// These tools dispatch to browserManager's isolated client
|
||||
const mcpTools = await createMcpDeclarativeTools(
|
||||
browserManager,
|
||||
messageBus,
|
||||
shouldDisableInput,
|
||||
browserConfig.customConfig.blockFileUploads,
|
||||
);
|
||||
const availableToolNames = mcpTools.map((t) => t.name);
|
||||
|
||||
for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) {
|
||||
if (availableToolNames.includes(toolName)) {
|
||||
const rule = generateAllowRules(toolName);
|
||||
// Register high-priority policy rules for sensitive actions which is not
|
||||
// able to be overwrite by YOLO mode.
|
||||
const policyEngine = config.getPolicyEngine();
|
||||
|
||||
if (policyEngine) {
|
||||
const existingRules = policyEngine.getRules();
|
||||
|
||||
const restrictedTools = ['fill', 'fill_form'];
|
||||
|
||||
// ASK_USER for upload_file and evaluate_script when sensitive action
|
||||
// need confirmation.
|
||||
if (browserConfig.customConfig.confirmSensitiveActions) {
|
||||
restrictedTools.push('upload_file', 'evaluate_script');
|
||||
}
|
||||
|
||||
for (const toolName of restrictedTools) {
|
||||
const rule = generateAskUserRules(toolName);
|
||||
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
|
||||
policyEngine.addRule(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function generateAskUserRules(toolName: string): PolicyRule {
|
||||
return {
|
||||
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 999,
|
||||
source: 'BrowserAgent (Sensitive Actions)',
|
||||
mcpName: BROWSER_AGENT_NAME,
|
||||
// Reduce noise for read-only tools in default mode
|
||||
const readOnlyTools = (await browserManager.getDiscoveredTools())
|
||||
.filter((t) => !!t.annotations?.readOnlyHint)
|
||||
.map((t) => t.name);
|
||||
const allowlistedReadonlyTools = ['take_snapshot', 'take_screenshot'];
|
||||
|
||||
for (const toolName of [...readOnlyTools, ...allowlistedReadonlyTools]) {
|
||||
if (availableToolNames.includes(toolName)) {
|
||||
const rule = generateAllowRules(toolName);
|
||||
if (!existingRules.some((r) => isRuleEqual(r, rule))) {
|
||||
policyEngine.addRule(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function generateAskUserRules(toolName: string): PolicyRule {
|
||||
return {
|
||||
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 999,
|
||||
source: 'BrowserAgent (Sensitive Actions)',
|
||||
mcpName: BROWSER_AGENT_NAME,
|
||||
};
|
||||
}
|
||||
|
||||
function generateAllowRules(toolName: string): PolicyRule {
|
||||
return {
|
||||
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: PRIORITY_SUBAGENT_TOOL,
|
||||
source: 'BrowserAgent (Read-Only)',
|
||||
mcpName: BROWSER_AGENT_NAME,
|
||||
};
|
||||
}
|
||||
|
||||
// Check if policy rule the same in all the attributes that we care about
|
||||
function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) {
|
||||
return (
|
||||
rule1.toolName === rule2.toolName &&
|
||||
rule1.decision === rule2.decision &&
|
||||
rule1.priority === rule2.priority &&
|
||||
rule1.mcpName === rule2.mcpName
|
||||
);
|
||||
}
|
||||
|
||||
// Validate required semantic tools are available
|
||||
const requiredSemanticTools = [
|
||||
'click',
|
||||
'fill',
|
||||
'navigate_page',
|
||||
'take_snapshot',
|
||||
];
|
||||
const missingSemanticTools = requiredSemanticTools.filter(
|
||||
(t) => !availableToolNames.includes(t),
|
||||
);
|
||||
|
||||
const rawSessionMode = browserConfig?.customConfig?.sessionMode;
|
||||
const sessionMode =
|
||||
rawSessionMode === 'isolated' || rawSessionMode === 'existing'
|
||||
? rawSessionMode
|
||||
: 'persistent';
|
||||
|
||||
recordBrowserAgentToolDiscovery(
|
||||
config,
|
||||
mcpTools.length,
|
||||
missingSemanticTools,
|
||||
sessionMode,
|
||||
);
|
||||
|
||||
if (missingSemanticTools.length > 0) {
|
||||
debugLogger.warn(
|
||||
`Semantic tools missing (${missingSemanticTools.join(', ')}). ` +
|
||||
'Some browser interactions may not work correctly.',
|
||||
);
|
||||
}
|
||||
|
||||
// Only click_at is strictly required — text input can use press_key or fill.
|
||||
const requiredVisualTools = ['click_at'];
|
||||
const missingVisualTools = requiredVisualTools.filter(
|
||||
(t) => !availableToolNames.includes(t),
|
||||
);
|
||||
|
||||
// Check whether vision can be enabled; returns structured type with code and message.
|
||||
function getVisionDisabledReason(): VisionDisabledReason {
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig.customConfig.visualModel) {
|
||||
return {
|
||||
code: 'no_visual_model',
|
||||
message: 'No visualModel configured.',
|
||||
};
|
||||
}
|
||||
if (missingVisualTools.length > 0) {
|
||||
return {
|
||||
code: 'missing_visual_tools',
|
||||
message:
|
||||
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
|
||||
`The installed chrome-devtools-mcp version may be too old.`,
|
||||
};
|
||||
}
|
||||
const authType = config.getContentGeneratorConfig()?.authType;
|
||||
const blockedAuthTypes = new Set([
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
AuthType.LEGACY_CLOUD_SHELL,
|
||||
AuthType.COMPUTE_ADC,
|
||||
]);
|
||||
if (authType && blockedAuthTypes.has(authType)) {
|
||||
return {
|
||||
code: 'blocked_auth_type',
|
||||
message: 'Visual agent model not available for current auth type.',
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const allTools: AnyDeclarativeTool[] = [...mcpTools];
|
||||
const visionDisabledReason = getVisionDisabledReason();
|
||||
|
||||
logBrowserAgentVisionStatus(config, {
|
||||
enabled: !visionDisabledReason,
|
||||
disabled_reason: visionDisabledReason?.code,
|
||||
});
|
||||
|
||||
if (visionDisabledReason) {
|
||||
debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`);
|
||||
} else {
|
||||
allTools.push(
|
||||
createAnalyzeScreenshotTool(browserManager, config, messageBus),
|
||||
);
|
||||
}
|
||||
|
||||
debugLogger.log(
|
||||
`Created ${allTools.length} tools for browser agent: ` +
|
||||
allTools.map((t) => t.name).join(', '),
|
||||
);
|
||||
|
||||
// Create configured definition with tools
|
||||
// BrowserAgentDefinition is a factory function - call it with config
|
||||
const baseDefinition = BrowserAgentDefinition(
|
||||
config,
|
||||
!visionDisabledReason,
|
||||
);
|
||||
const definition: LocalAgentDefinition<typeof BrowserTaskResultSchema> = {
|
||||
...baseDefinition,
|
||||
toolConfig: {
|
||||
tools: allTools,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function generateAllowRules(toolName: string): PolicyRule {
|
||||
return {
|
||||
toolName: `${MCP_TOOL_PREFIX}${BROWSER_AGENT_NAME}_${toolName}`,
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: PRIORITY_SUBAGENT_TOOL,
|
||||
source: 'BrowserAgent (Read-Only)',
|
||||
mcpName: BROWSER_AGENT_NAME,
|
||||
definition,
|
||||
browserManager,
|
||||
visionEnabled: !visionDisabledReason,
|
||||
sessionMode,
|
||||
};
|
||||
} catch (error) {
|
||||
// Release the browser manager if setup fails, so concurrent tasks can try again.
|
||||
browserManager.release();
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Check if policy rule the same in all the attributes that we care about
|
||||
function isRuleEqual(rule1: PolicyRule, rule2: PolicyRule) {
|
||||
return (
|
||||
rule1.toolName === rule2.toolName &&
|
||||
rule1.decision === rule2.decision &&
|
||||
rule1.priority === rule2.priority &&
|
||||
rule1.mcpName === rule2.mcpName
|
||||
);
|
||||
}
|
||||
|
||||
// Validate required semantic tools are available
|
||||
const requiredSemanticTools = [
|
||||
'click',
|
||||
'fill',
|
||||
'navigate_page',
|
||||
'take_snapshot',
|
||||
];
|
||||
const missingSemanticTools = requiredSemanticTools.filter(
|
||||
(t) => !availableToolNames.includes(t),
|
||||
);
|
||||
|
||||
const rawSessionMode = browserConfig?.customConfig?.sessionMode;
|
||||
const sessionMode =
|
||||
rawSessionMode === 'isolated' || rawSessionMode === 'existing'
|
||||
? rawSessionMode
|
||||
: 'persistent';
|
||||
|
||||
recordBrowserAgentToolDiscovery(
|
||||
config,
|
||||
mcpTools.length,
|
||||
missingSemanticTools,
|
||||
sessionMode,
|
||||
);
|
||||
|
||||
if (missingSemanticTools.length > 0) {
|
||||
debugLogger.warn(
|
||||
`Semantic tools missing (${missingSemanticTools.join(', ')}). ` +
|
||||
'Some browser interactions may not work correctly.',
|
||||
);
|
||||
}
|
||||
|
||||
// Only click_at is strictly required — text input can use press_key or fill.
|
||||
const requiredVisualTools = ['click_at'];
|
||||
const missingVisualTools = requiredVisualTools.filter(
|
||||
(t) => !availableToolNames.includes(t),
|
||||
);
|
||||
|
||||
// Check whether vision can be enabled; returns structured type with code and message.
|
||||
function getVisionDisabledReason(): VisionDisabledReason {
|
||||
const browserConfig = config.getBrowserAgentConfig();
|
||||
if (!browserConfig.customConfig.visualModel) {
|
||||
return {
|
||||
code: 'no_visual_model',
|
||||
message: 'No visualModel configured.',
|
||||
};
|
||||
}
|
||||
if (missingVisualTools.length > 0) {
|
||||
return {
|
||||
code: 'missing_visual_tools',
|
||||
message:
|
||||
`Visual tools missing (${missingVisualTools.join(', ')}). ` +
|
||||
`The installed chrome-devtools-mcp version may be too old.`,
|
||||
};
|
||||
}
|
||||
const authType = config.getContentGeneratorConfig()?.authType;
|
||||
const blockedAuthTypes = new Set([
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
AuthType.LEGACY_CLOUD_SHELL,
|
||||
AuthType.COMPUTE_ADC,
|
||||
]);
|
||||
if (authType && blockedAuthTypes.has(authType)) {
|
||||
return {
|
||||
code: 'blocked_auth_type',
|
||||
message: 'Visual agent model not available for current auth type.',
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const allTools: AnyDeclarativeTool[] = [...mcpTools];
|
||||
const visionDisabledReason = getVisionDisabledReason();
|
||||
|
||||
logBrowserAgentVisionStatus(config, {
|
||||
enabled: !visionDisabledReason,
|
||||
disabled_reason: visionDisabledReason?.code,
|
||||
});
|
||||
|
||||
if (visionDisabledReason) {
|
||||
debugLogger.log(`Vision disabled: ${visionDisabledReason.message}`);
|
||||
} else {
|
||||
allTools.push(
|
||||
createAnalyzeScreenshotTool(browserManager, config, messageBus),
|
||||
);
|
||||
}
|
||||
|
||||
debugLogger.log(
|
||||
`Created ${allTools.length} tools for browser agent: ` +
|
||||
allTools.map((t) => t.name).join(', '),
|
||||
);
|
||||
|
||||
// Create configured definition with tools
|
||||
// BrowserAgentDefinition is a factory function - call it with config
|
||||
const baseDefinition = BrowserAgentDefinition(config, !visionDisabledReason);
|
||||
const definition: LocalAgentDefinition<typeof BrowserTaskResultSchema> = {
|
||||
...baseDefinition,
|
||||
toolConfig: {
|
||||
tools: allTools,
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
definition,
|
||||
browserManager,
|
||||
visionEnabled: !visionDisabledReason,
|
||||
sessionMode,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -192,7 +192,10 @@ describe('BrowserAgentInvocation', () => {
|
||||
promptConfig: { query: '', systemPrompt: '' },
|
||||
toolConfig: { tools: ['analyze_screenshot', 'click'] },
|
||||
},
|
||||
browserManager: {} as never,
|
||||
browserManager: {
|
||||
release: vi.fn(),
|
||||
callTool: vi.fn().mockResolvedValue({ content: [] }),
|
||||
} as never,
|
||||
visionEnabled: true,
|
||||
sessionMode: 'persistent',
|
||||
});
|
||||
@@ -766,6 +769,7 @@ describe('BrowserAgentInvocation', () => {
|
||||
}
|
||||
return { isError: false };
|
||||
}),
|
||||
release: vi.fn(),
|
||||
};
|
||||
|
||||
vi.mocked(createBrowserAgentDefinition).mockResolvedValue({
|
||||
|
||||
@@ -440,6 +440,8 @@ ${output.result}`;
|
||||
}
|
||||
} catch {
|
||||
// Ignore errors for removing the overlays.
|
||||
} finally {
|
||||
browserManager.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -873,6 +873,122 @@ describe('BrowserManager', () => {
|
||||
|
||||
expect(instance1).not.toBe(instance2);
|
||||
});
|
||||
|
||||
it('should throw when acquired instance is requested in persistent mode', () => {
|
||||
// mockConfig defaults to persistent mode
|
||||
const instance1 = BrowserManager.getInstance(mockConfig);
|
||||
instance1.acquire();
|
||||
|
||||
expect(() => BrowserManager.getInstance(mockConfig)).toThrow(
|
||||
/Cannot launch a concurrent browser agent in "persistent" session mode/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw when acquired instance is requested in existing mode', () => {
|
||||
const existingConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { sessionMode: 'existing' },
|
||||
},
|
||||
});
|
||||
|
||||
const instance1 = BrowserManager.getInstance(existingConfig);
|
||||
instance1.acquire();
|
||||
|
||||
expect(() => BrowserManager.getInstance(existingConfig)).toThrow(
|
||||
/Cannot launch a concurrent browser agent in "existing" session mode/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return a different instance when the primary is acquired in isolated mode', () => {
|
||||
const isolatedConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { sessionMode: 'isolated' },
|
||||
},
|
||||
});
|
||||
|
||||
const instance1 = BrowserManager.getInstance(isolatedConfig);
|
||||
instance1.acquire();
|
||||
|
||||
const instance2 = BrowserManager.getInstance(isolatedConfig);
|
||||
|
||||
expect(instance2).not.toBe(instance1);
|
||||
expect(instance1.isAcquired()).toBe(true);
|
||||
expect(instance2.isAcquired()).toBe(false);
|
||||
});
|
||||
|
||||
it('should reuse the primary when it has been released', () => {
|
||||
const instance1 = BrowserManager.getInstance(mockConfig);
|
||||
instance1.acquire();
|
||||
instance1.release();
|
||||
|
||||
const instance2 = BrowserManager.getInstance(mockConfig);
|
||||
|
||||
expect(instance2).toBe(instance1);
|
||||
expect(instance1.isAcquired()).toBe(false);
|
||||
});
|
||||
|
||||
it('should reuse a released parallel instance in isolated mode', () => {
|
||||
const isolatedConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { sessionMode: 'isolated' },
|
||||
},
|
||||
});
|
||||
|
||||
const instance1 = BrowserManager.getInstance(isolatedConfig);
|
||||
instance1.acquire();
|
||||
|
||||
const instance2 = BrowserManager.getInstance(isolatedConfig);
|
||||
instance2.acquire();
|
||||
instance2.release();
|
||||
|
||||
// Primary is still acquired, parallel is released — should reuse parallel
|
||||
const instance3 = BrowserManager.getInstance(isolatedConfig);
|
||||
expect(instance3).toBe(instance2);
|
||||
});
|
||||
|
||||
it('should create multiple parallel instances in isolated mode', () => {
|
||||
const isolatedConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { sessionMode: 'isolated' },
|
||||
},
|
||||
});
|
||||
|
||||
const instance1 = BrowserManager.getInstance(isolatedConfig);
|
||||
instance1.acquire();
|
||||
|
||||
const instance2 = BrowserManager.getInstance(isolatedConfig);
|
||||
instance2.acquire();
|
||||
|
||||
const instance3 = BrowserManager.getInstance(isolatedConfig);
|
||||
|
||||
expect(instance1).not.toBe(instance2);
|
||||
expect(instance2).not.toBe(instance3);
|
||||
expect(instance1).not.toBe(instance3);
|
||||
});
|
||||
|
||||
it('should throw when MAX_PARALLEL_INSTANCES is reached in isolated mode', () => {
|
||||
const isolatedConfig = makeFakeConfig({
|
||||
agents: {
|
||||
overrides: { browser_agent: { enabled: true } },
|
||||
browser: { sessionMode: 'isolated' },
|
||||
},
|
||||
});
|
||||
|
||||
// Acquire MAX_PARALLEL_INSTANCES instances
|
||||
for (let i = 0; i < BrowserManager.MAX_PARALLEL_INSTANCES; i++) {
|
||||
const instance = BrowserManager.getInstance(isolatedConfig);
|
||||
instance.acquire();
|
||||
}
|
||||
|
||||
// Next call should throw
|
||||
expect(() => BrowserManager.getInstance(isolatedConfig)).toThrow(
|
||||
/Maximum number of parallel browser instances/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resetAll', () => {
|
||||
|
||||
@@ -114,6 +114,12 @@ export class BrowserManager {
|
||||
// --- Static singleton management ---
|
||||
private static instances = new Map<string, BrowserManager>();
|
||||
|
||||
/**
|
||||
* Maximum number of parallel browser instances allowed in isolated mode.
|
||||
* Prevents unbounded resource consumption from concurrent browser_agent calls.
|
||||
*/
|
||||
static readonly MAX_PARALLEL_INSTANCES = 5;
|
||||
|
||||
/**
|
||||
* Returns the cache key for a given config.
|
||||
* Uses `sessionMode:profilePath` so different profiles get separate instances.
|
||||
@@ -128,14 +134,64 @@ export class BrowserManager {
|
||||
/**
|
||||
* Returns an existing BrowserManager for the current config's session mode
|
||||
* and profile, or creates a new one.
|
||||
*
|
||||
* Concurrency rules:
|
||||
* - **persistent / existing mode**: Only one instance is allowed at a time.
|
||||
* If the instance is already in-use, an error is thrown instructing the
|
||||
* caller to run browser tasks sequentially.
|
||||
* - **isolated mode**: Parallel instances are allowed up to
|
||||
* MAX_PARALLEL_INSTANCES. Each isolated instance gets its own temp profile.
|
||||
*/
|
||||
static getInstance(config: Config): BrowserManager {
|
||||
const key = BrowserManager.getInstanceKey(config);
|
||||
const sessionMode =
|
||||
config.getBrowserAgentConfig().customConfig.sessionMode ?? 'persistent';
|
||||
let instance = BrowserManager.instances.get(key);
|
||||
if (!instance) {
|
||||
instance = new BrowserManager(config);
|
||||
BrowserManager.instances.set(key, instance);
|
||||
debugLogger.log(`Created new BrowserManager singleton (key: ${key})`);
|
||||
} else if (instance.inUse) {
|
||||
// Persistent and existing modes share a browser profile directory.
|
||||
// Chrome prevents multiple instances from using the same profile, so
|
||||
// concurrent usage would cause "profile locked" errors.
|
||||
if (sessionMode === 'persistent' || sessionMode === 'existing') {
|
||||
throw new Error(
|
||||
`Cannot launch a concurrent browser agent in "${sessionMode}" session mode. ` +
|
||||
`The browser instance is already in use by another task. ` +
|
||||
`Please run browser tasks sequentially, or switch to "isolated" session mode for concurrent browser usage.`,
|
||||
);
|
||||
}
|
||||
|
||||
// Isolated mode: allow parallel instances up to the limit.
|
||||
let inUseCount = 1; // primary is already in-use
|
||||
let suffix = 1;
|
||||
let parallelKey = `${key}:${suffix}`;
|
||||
let parallel = BrowserManager.instances.get(parallelKey);
|
||||
while (parallel?.inUse) {
|
||||
inUseCount++;
|
||||
if (inUseCount >= BrowserManager.MAX_PARALLEL_INSTANCES) {
|
||||
throw new Error(
|
||||
`Maximum number of parallel browser instances (${BrowserManager.MAX_PARALLEL_INSTANCES}) reached. ` +
|
||||
`Please wait for an existing browser task to complete before starting a new one.`,
|
||||
);
|
||||
}
|
||||
suffix++;
|
||||
parallelKey = `${key}:${suffix}`;
|
||||
parallel = BrowserManager.instances.get(parallelKey);
|
||||
}
|
||||
if (!parallel) {
|
||||
parallel = new BrowserManager(config);
|
||||
BrowserManager.instances.set(parallelKey, parallel);
|
||||
debugLogger.log(
|
||||
`Created parallel BrowserManager (key: ${parallelKey})`,
|
||||
);
|
||||
} else {
|
||||
debugLogger.log(
|
||||
`Reusing released parallel BrowserManager (key: ${parallelKey})`,
|
||||
);
|
||||
}
|
||||
instance = parallel;
|
||||
} else {
|
||||
debugLogger.log(
|
||||
`Reusing existing BrowserManager singleton (key: ${key})`,
|
||||
@@ -180,6 +236,36 @@ export class BrowserManager {
|
||||
private isClosing = false;
|
||||
private connectionPromise: Promise<void> | undefined;
|
||||
|
||||
/**
|
||||
* Whether this instance is currently acquired by an active invocation.
|
||||
* Used by getInstance() to avoid handing the same browser to concurrent
|
||||
* browser_agent calls.
|
||||
*/
|
||||
private inUse = false;
|
||||
|
||||
/**
|
||||
* Marks this instance as in-use. Call this when starting a browser agent
|
||||
* invocation so concurrent calls get a separate instance.
|
||||
*/
|
||||
acquire(): void {
|
||||
this.inUse = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks this instance as available for reuse. Call this in the finally
|
||||
* block of a browser agent invocation.
|
||||
*/
|
||||
release(): void {
|
||||
this.inUse = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this instance is currently acquired by an active invocation.
|
||||
*/
|
||||
isAcquired(): boolean {
|
||||
return this.inUse;
|
||||
}
|
||||
|
||||
/** State for action rate limiting */
|
||||
private actionCounter = 0;
|
||||
private readonly maxActionsPerTask: number;
|
||||
|
||||
@@ -1075,7 +1075,7 @@ describe('AgentRegistry', () => {
|
||||
expect.objectContaining({
|
||||
toolName: 'PolicyTestAgent',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 1.05,
|
||||
priority: 1.03,
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -1102,7 +1102,7 @@ describe('AgentRegistry', () => {
|
||||
expect.objectContaining({
|
||||
toolName: 'RemotePolicyAgent',
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 1.05,
|
||||
priority: 1.03,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1224,7 +1224,7 @@ export class Config implements McpContext, AgentLoopContext {
|
||||
this.useRipgrep = params.useRipgrep ?? true;
|
||||
this.useBackgroundColor = params.useBackgroundColor ?? true;
|
||||
this.useAlternateBuffer = params.useAlternateBuffer ?? false;
|
||||
this.useTerminalBuffer = params.useTerminalBuffer ?? true;
|
||||
this.useTerminalBuffer = params.useTerminalBuffer ?? false;
|
||||
this.useRenderProcess = params.useRenderProcess ?? true;
|
||||
this.enableInteractiveShell = params.enableInteractiveShell ?? false;
|
||||
|
||||
|
||||
@@ -398,9 +398,10 @@ export async function createPolicyEngineConfig(
|
||||
// TOML policy priorities (before transformation):
|
||||
// 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
// 15: Auto-edit tool override (becomes 1.015 in default tier)
|
||||
// 30: Unknown subagents (blocked by Plan Mode's 40)
|
||||
// 40: Plan mode catch-all DENY override (becomes 1.040 in default tier)
|
||||
// 50: Read-only tools (becomes 1.050 in default tier)
|
||||
// 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||
// 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||
// 70: Mode transition overrides (becomes 1.070 in default tier)
|
||||
// 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
|
||||
// MCP servers that are explicitly excluded in settings.mcp.excluded
|
||||
|
||||
@@ -23,8 +23,10 @@
|
||||
#
|
||||
# TOML policy priorities (before transformation):
|
||||
# 10: Write tools default to ASK_USER (becomes 1.010 in default tier)
|
||||
# 60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
|
||||
# 70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
|
||||
# 30: Unknown subagents (blocked by Plan Mode's 40)
|
||||
# 40: Plan mode catch-all DENY override (becomes 1.040 in default tier)
|
||||
# 50: Read-only tools / Plan mode explicit ALLOW (becomes 1.050 in default tier)
|
||||
# 70: Mode transition overrides (into/out of Plan Mode)
|
||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
|
||||
# Mode Transitions (into/out of Plan Mode)
|
||||
@@ -59,6 +61,7 @@ interactive = true
|
||||
toolName = "exit_plan_mode"
|
||||
decision = "allow"
|
||||
priority = 70
|
||||
modes = ["plan"]
|
||||
interactive = false
|
||||
|
||||
[[rule]]
|
||||
@@ -73,18 +76,23 @@ denyMessage = "You are not currently in Plan Mode. Use enter_plan_mode first to
|
||||
[[rule]]
|
||||
toolName = "*"
|
||||
decision = "deny"
|
||||
priority = 60
|
||||
priority = 40
|
||||
modes = ["plan"]
|
||||
denyMessage = "You are in Plan Mode with access to read-only tools. Execution of scripts (including those from skills) is blocked."
|
||||
|
||||
# Explicitly Allow Read-Only Tools in Plan mode.
|
||||
[[rule]]
|
||||
toolName = ["activate_skill"]
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
modes = ["plan"]
|
||||
|
||||
[[rule]]
|
||||
toolName = "*"
|
||||
mcpName = "*"
|
||||
toolAnnotations = { readOnlyHint = true }
|
||||
decision = "ask_user"
|
||||
priority = 70
|
||||
priority = 50
|
||||
modes = ["plan"]
|
||||
interactive = true
|
||||
|
||||
@@ -93,45 +101,21 @@ toolName = "*"
|
||||
mcpName = "*"
|
||||
toolAnnotations = { readOnlyHint = true }
|
||||
decision = "deny"
|
||||
priority = 70
|
||||
priority = 50
|
||||
modes = ["plan"]
|
||||
interactive = false
|
||||
|
||||
[[rule]]
|
||||
toolName = [
|
||||
"glob",
|
||||
"grep_search",
|
||||
"list_directory",
|
||||
"read_file",
|
||||
"google_web_search",
|
||||
"activate_skill",
|
||||
"codebase_investigator",
|
||||
"cli_help",
|
||||
"get_internal_docs",
|
||||
"complete_task"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 70
|
||||
modes = ["plan"]
|
||||
|
||||
# Topic grouping tool is innocuous and used for UI organization.
|
||||
[[rule]]
|
||||
toolName = "update_topic"
|
||||
decision = "allow"
|
||||
priority = 70
|
||||
modes = ["plan"]
|
||||
|
||||
[[rule]]
|
||||
toolName = ["ask_user", "save_memory", "web_fetch"]
|
||||
decision = "ask_user"
|
||||
priority = 70
|
||||
priority = 50
|
||||
modes = ["plan"]
|
||||
interactive = true
|
||||
|
||||
[[rule]]
|
||||
toolName = ["ask_user", "save_memory", "web_fetch"]
|
||||
decision = "deny"
|
||||
priority = 70
|
||||
priority = 50
|
||||
modes = ["plan"]
|
||||
interactive = false
|
||||
|
||||
|
||||
@@ -28,43 +28,26 @@
|
||||
# 999: YOLO mode allow-all (becomes 1.999 in default tier)
|
||||
|
||||
[[rule]]
|
||||
toolName = "glob"
|
||||
toolName = [
|
||||
"glob",
|
||||
"grep_search",
|
||||
"list_directory",
|
||||
"read_file",
|
||||
"google_web_search",
|
||||
"codebase_investigator",
|
||||
"cli_help",
|
||||
"get_internal_docs",
|
||||
# Tracker tools for task management (safe as they only modify internal state)
|
||||
"tracker_create_task",
|
||||
"tracker_update_task",
|
||||
"tracker_get_task",
|
||||
"tracker_list_tasks",
|
||||
"tracker_add_dependency",
|
||||
"tracker_visualize",
|
||||
# Topic grouping tool is innocuous and used for UI organization.
|
||||
"update_topic",
|
||||
# Core agent lifecycle tool
|
||||
"complete_task"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
[[rule]]
|
||||
toolName = "grep_search"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
[[rule]]
|
||||
toolName = "list_directory"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
[[rule]]
|
||||
toolName = "read_file"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
[[rule]]
|
||||
toolName = "google_web_search"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
[[rule]]
|
||||
toolName = ["codebase_investigator", "cli_help", "get_internal_docs"]
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
# Topic grouping tool is innocuous and used for UI organization.
|
||||
[[rule]]
|
||||
toolName = "update_topic"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
|
||||
# Core agent lifecycle tool
|
||||
[[rule]]
|
||||
toolName = "complete_task"
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
@@ -1,34 +0,0 @@
|
||||
# Priority system for policy rules:
|
||||
# - Higher priority numbers win over lower priority numbers
|
||||
# - When multiple rules match, the highest priority rule is applied
|
||||
# - Rules are evaluated in order of priority (highest first)
|
||||
#
|
||||
# Priority bands (tiers):
|
||||
# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
|
||||
# - Extension policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
|
||||
# - Workspace policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
|
||||
# - User policies (TOML): 4 + priority/1000 (e.g., priority 100 → 4.100)
|
||||
# - Admin policies (TOML): 5 + priority/1000 (e.g., priority 100 → 5.100)
|
||||
#
|
||||
# Settings-based and dynamic rules (all in user tier 4.x):
|
||||
# 4.95: Tools that the user has selected as "Always Allow" in the interactive UI
|
||||
# 4.9: MCP servers excluded list (security: persistent server blocks)
|
||||
# 4.4: Command line flag --exclude-tools (explicit temporary blocks)
|
||||
# 4.3: Command line flag --allowed-tools (explicit temporary allows)
|
||||
# 4.2: MCP servers with trust=true (persistent trusted servers)
|
||||
# 4.1: MCP servers allowed list (persistent general server allows)
|
||||
|
||||
# Allow tracker tools to execute without asking the user.
|
||||
# These tools are only registered when the tracker feature is enabled,
|
||||
# so this rule is a no-op when the feature is disabled.
|
||||
[[rule]]
|
||||
toolName = [
|
||||
"tracker_create_task",
|
||||
"tracker_update_task",
|
||||
"tracker_get_task",
|
||||
"tracker_list_tasks",
|
||||
"tracker_add_dependency",
|
||||
"tracker_visualize"
|
||||
]
|
||||
decision = "allow"
|
||||
priority = 50
|
||||
@@ -1715,13 +1715,13 @@ describe('PolicyEngine', () => {
|
||||
|
||||
describe('Plan Mode vs Subagent Priority (Regression)', () => {
|
||||
it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => {
|
||||
// Plan Mode Deny (1.06) > Subagent Allow (1.05)
|
||||
// Plan Mode Deny (1.04) > Subagent Allow (1.03)
|
||||
|
||||
const fixedRules: PolicyRule[] = [
|
||||
{
|
||||
toolName: '*',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 1.06,
|
||||
priority: 1.04,
|
||||
modes: [ApprovalMode.PLAN],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -890,8 +890,8 @@ priority = 100
|
||||
readOnlyHint: true,
|
||||
});
|
||||
expect(annotationRule!.decision).toBe(PolicyDecision.ASK_USER);
|
||||
// Priority 70 in tier 1 => 1.070
|
||||
expect(annotationRule!.priority).toBe(1.07);
|
||||
// Priority 50 in tier 1 => 1.050
|
||||
expect(annotationRule!.priority).toBe(1.05);
|
||||
|
||||
// Verify deny rule was loaded correctly
|
||||
const denyRule = result.rules.find(
|
||||
@@ -904,8 +904,8 @@ priority = 100
|
||||
denyRule,
|
||||
'Should have loaded the catch-all deny rule',
|
||||
).toBeDefined();
|
||||
// Priority 60 in tier 1 => 1.060
|
||||
expect(denyRule!.priority).toBe(1.06);
|
||||
// Priority 40 in tier 1 => 1.040
|
||||
expect(denyRule!.priority).toBe(1.04);
|
||||
|
||||
// 2. Initialize Policy Engine in Plan Mode
|
||||
const engine = new PolicyEngine({
|
||||
@@ -974,12 +974,23 @@ priority = 100
|
||||
|
||||
it('should override default subagent rules when in Plan Mode for unknown subagents', async () => {
|
||||
const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
|
||||
const fileContent = await fs.readFile(planTomlPath, 'utf-8');
|
||||
const readOnlyTomlPath = path.resolve(
|
||||
__dirname,
|
||||
'policies',
|
||||
'read-only.toml',
|
||||
);
|
||||
const planContent = await fs.readFile(planTomlPath, 'utf-8');
|
||||
const readOnlyContent = await fs.readFile(readOnlyTomlPath, 'utf-8');
|
||||
|
||||
const tempPolicyDir = await fs.mkdtemp(
|
||||
path.join(os.tmpdir(), 'plan-policy-test-'),
|
||||
);
|
||||
try {
|
||||
await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent);
|
||||
await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), planContent);
|
||||
await fs.writeFile(
|
||||
path.join(tempPolicyDir, 'read-only.toml'),
|
||||
readOnlyContent,
|
||||
);
|
||||
const getPolicyTier = () => 1; // Default tier
|
||||
|
||||
// 1. Load the actual Plan Mode policies
|
||||
@@ -1004,6 +1015,7 @@ priority = 100
|
||||
|
||||
// 4. Verify Behavior:
|
||||
// The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow
|
||||
// Plan Mode Deny (1.04) > Subagent Allow (1.03)
|
||||
const checkResult = await engine.check(
|
||||
{ name: 'unknown_subagent' },
|
||||
undefined,
|
||||
@@ -1015,7 +1027,7 @@ priority = 100
|
||||
).toBe(PolicyDecision.DENY);
|
||||
|
||||
// 5. Verify Explicit Allows still work
|
||||
// e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60)
|
||||
// e.g. 'read_file' should be allowed because its priority in read-only.toml (50) is higher than the deny (40)
|
||||
const readResult = await engine.check({ name: 'read_file' }, undefined);
|
||||
expect(
|
||||
readResult.decision,
|
||||
@@ -1023,6 +1035,7 @@ priority = 100
|
||||
).toBe(PolicyDecision.ALLOW);
|
||||
|
||||
// 6. Verify Built-in Research Subagents are ALLOWED
|
||||
// codebase_investigator is priority 50 in read-only.toml
|
||||
const codebaseResult = await engine.check(
|
||||
{ name: 'codebase_investigator' },
|
||||
undefined,
|
||||
|
||||
@@ -354,9 +354,11 @@ export interface CheckResult {
|
||||
|
||||
/**
|
||||
* Priority for subagent tools (registered dynamically).
|
||||
* Effective priority matching Tier 1 (Default) read-only tools.
|
||||
* Effective priority matching Tier 1 (Default) at priority 30.
|
||||
* This ensures they are blocked by Plan Mode (priority 40) while
|
||||
* remaining above directive write tools (priority 10).
|
||||
*/
|
||||
export const PRIORITY_SUBAGENT_TOOL = 1.05;
|
||||
export const PRIORITY_SUBAGENT_TOOL = 1.03;
|
||||
|
||||
/**
|
||||
* The fractional priority of "Always allow" rules (e.g., 950/1000).
|
||||
|
||||
@@ -804,6 +804,32 @@ describe('ShellTool', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('invocation getDescription', () => {
|
||||
it('should return the description if it is present and not empty whitespace', () => {
|
||||
const invocation = shellTool.build({
|
||||
command: 'echo hello',
|
||||
description: 'prints hello',
|
||||
});
|
||||
expect(invocation.getDescription()).toBe('prints hello');
|
||||
});
|
||||
|
||||
it('should return the raw command if description is an empty string', () => {
|
||||
const invocation = shellTool.build({
|
||||
command: 'echo hello',
|
||||
description: '',
|
||||
});
|
||||
expect(invocation.getDescription()).toBe('echo hello');
|
||||
});
|
||||
|
||||
it('should return the raw command if description is just whitespace', () => {
|
||||
const invocation = shellTool.build({
|
||||
command: 'echo hello',
|
||||
description: ' ',
|
||||
});
|
||||
expect(invocation.getDescription()).toBe('echo hello');
|
||||
});
|
||||
});
|
||||
|
||||
describe('llmContent output format', () => {
|
||||
const mockAbortSignal = new AbortController().signal;
|
||||
|
||||
|
||||
@@ -136,7 +136,9 @@ export class ShellToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
|
||||
getDescription(): string {
|
||||
return this.params.description || '';
|
||||
return this.params.description?.trim()
|
||||
? this.params.description
|
||||
: this.params.command;
|
||||
}
|
||||
|
||||
private simplifyPaths(paths: Set<string>): string[] {
|
||||
|
||||
Reference in New Issue
Block a user