Feat/browser agent progress emission (#21218)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Aditya Bijalwan
2026-03-09 22:56:00 +05:30
committed by GitHub
parent d485e08606
commit 759575faa8
4 changed files with 847 additions and 32 deletions

View File

@@ -9,7 +9,11 @@ import { BrowserAgentInvocation } from './browserAgentInvocation.js';
import { makeFakeConfig } from '../../test-utils/config.js';
import type { Config } from '../../config/config.js';
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
import type { AgentInputs } from '../types.js';
import {
type AgentInputs,
type SubagentProgress,
type SubagentActivityEvent,
} from '../types.js';
// Mock dependencies before imports
vi.mock('../../utils/debugLogger.js', () => ({
@@ -19,6 +23,24 @@ vi.mock('../../utils/debugLogger.js', () => ({
},
}));
vi.mock('./browserAgentFactory.js', () => ({
createBrowserAgentDefinition: vi.fn(),
cleanupBrowserAgent: vi.fn(),
}));
vi.mock('../local-executor.js', () => ({
LocalAgentExecutor: {
create: vi.fn(),
},
}));
import {
createBrowserAgentDefinition,
cleanupBrowserAgent,
} from './browserAgentFactory.js';
import { LocalAgentExecutor } from '../local-executor.js';
import type { ToolLiveOutput } from '../../tools/tools.js';
describe('BrowserAgentInvocation', () => {
let mockConfig: Config;
let mockMessageBus: MessageBus;
@@ -136,4 +158,473 @@ describe('BrowserAgentInvocation', () => {
expect(locations).toEqual([]);
});
});
describe('execute', () => {
let mockExecutor: { run: ReturnType<typeof vi.fn> };
beforeEach(() => {
vi.mocked(createBrowserAgentDefinition).mockResolvedValue({
definition: {
name: 'browser_agent',
description: 'mock definition',
kind: 'local',
inputConfig: {} as never,
outputConfig: {} as never,
processOutput: () => '',
modelConfig: { model: 'test' },
runConfig: {},
promptConfig: { query: '', systemPrompt: '' },
toolConfig: { tools: ['analyze_screenshot', 'click'] },
},
browserManager: {} as never,
});
mockExecutor = {
run: vi.fn().mockResolvedValue({
result: JSON.stringify({ success: true }),
terminate_reason: 'GOAL',
}),
};
vi.mocked(LocalAgentExecutor.create).mockResolvedValue(
mockExecutor as never,
);
vi.mocked(cleanupBrowserAgent).mockClear();
});
it('should return result text and call cleanup on success', async () => {
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const controller = new AbortController();
const updateOutput: (output: ToolLiveOutput) => void = vi.fn();
const result = await invocation.execute(controller.signal, updateOutput);
expect(Array.isArray(result.llmContent)).toBe(true);
expect((result.llmContent as Array<{ text: string }>)[0].text).toContain(
'Browser agent finished',
);
expect(cleanupBrowserAgent).toHaveBeenCalled();
});
it('should work without updateOutput (fire-and-forget)', async () => {
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const controller = new AbortController();
// Should not throw even with no updateOutput
await expect(
invocation.execute(controller.signal),
).resolves.toBeDefined();
});
it('should return error result when executor throws', async () => {
mockExecutor.run.mockRejectedValue(new Error('Unexpected crash'));
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const controller = new AbortController();
const result = await invocation.execute(controller.signal);
expect(result.error).toBeDefined();
expect(cleanupBrowserAgent).toHaveBeenCalled();
});
// ─── Structured SubagentProgress emission tests ───────────────────────
/**
* Helper: sets up LocalAgentExecutor.create to capture the onActivity
* callback so tests can fire synthetic activity events.
*/
function setupActivityCapture(): {
capturedOnActivity: () => SubagentActivityEvent | undefined;
fireActivity: (event: SubagentActivityEvent) => void;
} {
let onActivityFn: ((e: SubagentActivityEvent) => void) | undefined;
vi.mocked(LocalAgentExecutor.create).mockImplementation(
async (_def, _config, onActivity) => {
onActivityFn = onActivity;
return mockExecutor as never;
},
);
return {
capturedOnActivity: () => undefined,
fireActivity: (event: SubagentActivityEvent) => {
onActivityFn?.(event);
},
};
}
it('should emit initial SubagentProgress with running state', async () => {
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
await invocation.execute(new AbortController().signal, updateOutput);
const firstCall = updateOutput.mock.calls[0]?.[0] as SubagentProgress;
expect(firstCall.isSubagentProgress).toBe(true);
expect(firstCall.state).toBe('running');
expect(firstCall.recentActivity).toEqual([]);
});
it('should emit completed SubagentProgress on success', async () => {
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
await invocation.execute(new AbortController().signal, updateOutput);
const lastCall = updateOutput.mock.calls[
updateOutput.mock.calls.length - 1
]?.[0] as SubagentProgress;
expect(lastCall.isSubagentProgress).toBe(true);
expect(lastCall.state).toBe('completed');
});
it('should handle THOUGHT_CHUNK and emit structured progress', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
// Allow createBrowserAgentDefinition to resolve and onActivity to be registered
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'THOUGHT_CHUNK',
data: { text: 'Navigating to the page...' },
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const thoughtProgress = progressCalls.find((p) =>
p.recentActivity.some(
(a) =>
a.type === 'thought' &&
a.content.includes('Navigating to the page...'),
),
);
expect(thoughtProgress).toBeDefined();
});
it('should handle TOOL_CALL_START and TOOL_CALL_END with callId tracking', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_START',
data: {
name: 'navigate_browser',
callId: 'call-1',
args: { url: 'https://example.com' },
},
});
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_END',
data: { name: 'navigate_browser', id: 'call-1' },
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
// After TOOL_CALL_END, the tool should be completed
const finalProgress = progressCalls[progressCalls.length - 1];
const toolItem = finalProgress?.recentActivity.find(
(a) => a.type === 'tool_call' && a.content === 'navigate_browser',
);
expect(toolItem).toBeDefined();
expect(toolItem?.status).toBe('completed');
});
it('should sanitize sensitive data in tool call args', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_START',
data: {
name: 'fill_form',
callId: 'call-2',
args: { password: 'supersecret123', url: 'https://example.com' },
},
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const toolItem = progressCalls
.flatMap((p) => p.recentActivity)
.find((a) => a.type === 'tool_call' && a.content === 'fill_form');
expect(toolItem).toBeDefined();
expect(toolItem?.args).not.toContain('supersecret123');
expect(toolItem?.args).toContain('[REDACTED]');
});
it('should handle ERROR event with callId and mark tool as errored', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_START',
data: { name: 'click_element', callId: 'call-3', args: {} },
});
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'ERROR',
data: { error: 'Element not found', callId: 'call-3' },
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const allItems = progressCalls.flatMap((p) => p.recentActivity);
const toolItem = allItems.find(
(a) => a.type === 'tool_call' && a.content === 'click_element',
);
expect(toolItem?.status).toBe('error');
});
it('should sanitize sensitive data in ERROR event messages', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'ERROR',
data: { error: 'Auth failed: api_key=sk-secret-abc1234567890' },
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const errorItem = progressCalls
.flatMap((p) => p.recentActivity)
.find((a) => a.type === 'thought' && a.status === 'error');
expect(errorItem).toBeDefined();
expect(errorItem?.content).not.toContain('sk-secret-abc1234567890');
expect(errorItem?.content).toContain('[REDACTED]');
});
it('should sanitize inline PEM content in error messages', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'ERROR',
data: {
error:
'Failed to authenticate:\n-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA12345...\n-----END RSA PRIVATE KEY-----\nPlease check credentials.',
},
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const errorItem = progressCalls
.flatMap((p) => p.recentActivity)
.find((a) => a.type === 'thought' && a.status === 'error');
expect(errorItem).toBeDefined();
expect(errorItem?.content).toContain('[REDACTED_PEM]');
expect(errorItem?.content).not.toContain('-----BEGIN');
});
it('should mark all running tools as errored when ERROR has no callId', async () => {
const { fireActivity } = setupActivityCapture();
const updateOutput = vi.fn();
const invocation = new BrowserAgentInvocation(
mockConfig,
mockParams,
mockMessageBus,
);
const executePromise = invocation.execute(
new AbortController().signal,
updateOutput,
);
await Promise.resolve();
await Promise.resolve();
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_START',
data: { name: 'tool_a', callId: 'c1', args: {} },
});
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'TOOL_CALL_START',
data: { name: 'tool_b', callId: 'c2', args: {} },
});
// ERROR with no callId should mark ALL running tools as error
fireActivity({
isSubagentActivityEvent: true,
agentName: 'browser_agent',
type: 'ERROR',
data: { error: 'Agent crashed' },
});
await executePromise;
const progressCalls = updateOutput.mock.calls
.map((c) => c[0] as SubagentProgress)
.filter((p) => p.isSubagentProgress);
const allItems = progressCalls.flatMap((p) => p.recentActivity);
const toolA = allItems.find(
(a) => a.type === 'tool_call' && a.content === 'tool_a',
);
const toolB = allItems.find(
(a) => a.type === 'tool_call' && a.content === 'tool_b',
);
// Both should be error since no callId was specified
expect(toolA?.status).toBe('error');
expect(toolB?.status).toBe('error');
});
});
});