fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)

This commit is contained in:
Gaurav
2026-04-01 07:00:16 +08:00
committed by GitHub
parent 94f9480a3a
commit 782bb4e4bd
5 changed files with 144 additions and 128 deletions

View File

@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
expect(browserManager).toBeDefined();
});
it('should call printOutput when provided', async () => {
it('should not call printOutput for internal setup messages', async () => {
const printOutput = vi.fn();
await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
printOutput,
);
expect(printOutput).toHaveBeenCalled();
expect(printOutput).not.toHaveBeenCalled();
});
it('should create definition with correct structure', async () => {

View File

@@ -53,7 +53,7 @@ import {
export async function createBrowserAgentDefinition(
config: Config,
messageBus: MessageBus,
printOutput?: (msg: string) => void,
_printOutput?: (msg: string) => void,
): Promise<{
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
const browserManager = BrowserManager.getInstance(config);
await browserManager.ensureConnection();
if (printOutput) {
printOutput('Browser connected with isolated MCP client.');
}
debugLogger.log('Browser connected with isolated MCP client.');
// Determine if input blocker should be active (non-headless + enabled)
const shouldDisableInput = config.shouldDisableBrowserUserInput();
// Inject automation overlay and input blocker if not in headless mode
const browserConfig = config.getBrowserAgentConfig();
if (!browserConfig?.customConfig?.headless) {
if (printOutput) {
printOutput('Injecting automation overlay...');
}
debugLogger.log('Injecting automation overlay...');
await injectAutomationOverlay(browserManager);
if (shouldDisableInput) {
if (printOutput) {
printOutput('Injecting input blocker...');
}
debugLogger.log('Injecting input blocker...');
await injectInputBlocker(browserManager);
}
}

View File

@@ -18,7 +18,6 @@ import { randomUUID } from 'node:crypto';
import type { Config } from '../../config/config.js';
import { type AgentLoopContext } from '../../config/agent-loop-context.js';
import { LocalAgentExecutor } from '../local-executor.js';
import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
import {
BaseToolInvocation,
type ToolResult,
@@ -30,6 +29,7 @@ import {
type SubagentActivityEvent,
type SubagentProgress,
type SubagentActivityItem,
AgentTerminateMode,
isToolActivityError,
} from '../types.js';
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
@@ -56,6 +56,8 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
AgentInputs,
ToolResult
> {
private readonly agentName: string;
constructor(
private readonly context: AgentLoopContext,
params: AgentInputs,
@@ -63,13 +65,15 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
_toolName?: string,
_toolDisplayName?: string,
) {
const resolvedName = _toolName ?? 'browser_agent';
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
super(
params,
messageBus,
_toolName ?? 'browser_agent',
resolvedName,
_toolDisplayName ?? 'Browser Agent',
);
this.agentName = resolvedName;
}
private get config(): Config {
@@ -112,7 +116,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
// Send initial state
const initialProgress: SubagentProgress = {
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
agentName: this.agentName,
recentActivity: [],
state: 'running',
};
@@ -135,7 +139,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
}
updateOutput({
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
agentName: this.agentName,
recentActivity: [...recentActivity],
state: 'running',
} as SubagentProgress);
@@ -280,7 +284,7 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
const progress: SubagentProgress = {
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
agentName: this.agentName,
recentActivity: [...recentActivity],
state: 'running',
};
@@ -297,34 +301,40 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
const output = await executor.run(this.params, signal);
const displayResult = safeJsonToMarkdown(output.result);
const resultContent = `Browser agent finished.
Termination Reason: ${output.terminate_reason}
Result:
${output.result}`;
const displayContent = `
Browser Agent Finished
// Map terminate_reason to the correct SubagentProgress state.
// GOAL = agent completed its task normally.
// ABORTED = user cancelled.
// Others (ERROR, MAX_TURNS, ERROR_NO_COMPLETE_TASK_CALL) = error.
let progressState: SubagentProgress['state'];
if (output.terminate_reason === AgentTerminateMode.ABORTED) {
progressState = 'cancelled';
} else if (output.terminate_reason === AgentTerminateMode.GOAL) {
progressState = 'completed';
} else {
progressState = 'error';
}
Termination Reason: ${output.terminate_reason}
Result:
${displayResult}
`;
const progress: SubagentProgress = {
isSubagentProgress: true,
agentName: this.agentName,
recentActivity: [...recentActivity],
state: progressState,
result: output.result,
terminateReason: output.terminate_reason,
};
if (updateOutput) {
updateOutput({
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
recentActivity: [...recentActivity],
state: 'completed',
} as SubagentProgress);
updateOutput(progress);
}
return {
llmContent: [{ text: resultContent }],
returnDisplay: displayContent,
returnDisplay: progress,
};
} catch (error) {
const rawErrorMessage =
@@ -343,7 +353,7 @@ ${displayResult}
const progress: SubagentProgress = {
isSubagentProgress: true,
agentName: this['_toolName'] ?? 'browser_agent',
agentName: this.agentName,
recentActivity: [...recentActivity],
state: isAbort ? 'cancelled' : 'error',
};