mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
Feat/browser agent progress emission (#21218)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -9,7 +9,11 @@ import { BrowserAgentInvocation } from './browserAgentInvocation.js';
|
||||
import { makeFakeConfig } from '../../test-utils/config.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
||||
import type { AgentInputs } from '../types.js';
|
||||
import {
|
||||
type AgentInputs,
|
||||
type SubagentProgress,
|
||||
type SubagentActivityEvent,
|
||||
} from '../types.js';
|
||||
|
||||
// Mock dependencies before imports
|
||||
vi.mock('../../utils/debugLogger.js', () => ({
|
||||
@@ -19,6 +23,24 @@ vi.mock('../../utils/debugLogger.js', () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock('./browserAgentFactory.js', () => ({
|
||||
createBrowserAgentDefinition: vi.fn(),
|
||||
cleanupBrowserAgent: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../local-executor.js', () => ({
|
||||
LocalAgentExecutor: {
|
||||
create: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import {
|
||||
createBrowserAgentDefinition,
|
||||
cleanupBrowserAgent,
|
||||
} from './browserAgentFactory.js';
|
||||
import { LocalAgentExecutor } from '../local-executor.js';
|
||||
import type { ToolLiveOutput } from '../../tools/tools.js';
|
||||
|
||||
describe('BrowserAgentInvocation', () => {
|
||||
let mockConfig: Config;
|
||||
let mockMessageBus: MessageBus;
|
||||
@@ -136,4 +158,473 @@ describe('BrowserAgentInvocation', () => {
|
||||
expect(locations).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute', () => {
|
||||
let mockExecutor: { run: ReturnType<typeof vi.fn> };
|
||||
|
||||
beforeEach(() => {
|
||||
vi.mocked(createBrowserAgentDefinition).mockResolvedValue({
|
||||
definition: {
|
||||
name: 'browser_agent',
|
||||
description: 'mock definition',
|
||||
kind: 'local',
|
||||
inputConfig: {} as never,
|
||||
outputConfig: {} as never,
|
||||
processOutput: () => '',
|
||||
modelConfig: { model: 'test' },
|
||||
runConfig: {},
|
||||
promptConfig: { query: '', systemPrompt: '' },
|
||||
toolConfig: { tools: ['analyze_screenshot', 'click'] },
|
||||
},
|
||||
browserManager: {} as never,
|
||||
});
|
||||
|
||||
mockExecutor = {
|
||||
run: vi.fn().mockResolvedValue({
|
||||
result: JSON.stringify({ success: true }),
|
||||
terminate_reason: 'GOAL',
|
||||
}),
|
||||
};
|
||||
|
||||
vi.mocked(LocalAgentExecutor.create).mockResolvedValue(
|
||||
mockExecutor as never,
|
||||
);
|
||||
vi.mocked(cleanupBrowserAgent).mockClear();
|
||||
});
|
||||
|
||||
it('should return result text and call cleanup on success', async () => {
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const controller = new AbortController();
|
||||
const updateOutput: (output: ToolLiveOutput) => void = vi.fn();
|
||||
|
||||
const result = await invocation.execute(controller.signal, updateOutput);
|
||||
|
||||
expect(Array.isArray(result.llmContent)).toBe(true);
|
||||
expect((result.llmContent as Array<{ text: string }>)[0].text).toContain(
|
||||
'Browser agent finished',
|
||||
);
|
||||
expect(cleanupBrowserAgent).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should work without updateOutput (fire-and-forget)', async () => {
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const controller = new AbortController();
|
||||
// Should not throw even with no updateOutput
|
||||
await expect(
|
||||
invocation.execute(controller.signal),
|
||||
).resolves.toBeDefined();
|
||||
});
|
||||
|
||||
it('should return error result when executor throws', async () => {
|
||||
mockExecutor.run.mockRejectedValue(new Error('Unexpected crash'));
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const controller = new AbortController();
|
||||
const result = await invocation.execute(controller.signal);
|
||||
|
||||
expect(result.error).toBeDefined();
|
||||
expect(cleanupBrowserAgent).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// ─── Structured SubagentProgress emission tests ───────────────────────
|
||||
|
||||
/**
|
||||
* Helper: sets up LocalAgentExecutor.create to capture the onActivity
|
||||
* callback so tests can fire synthetic activity events.
|
||||
*/
|
||||
function setupActivityCapture(): {
|
||||
capturedOnActivity: () => SubagentActivityEvent | undefined;
|
||||
fireActivity: (event: SubagentActivityEvent) => void;
|
||||
} {
|
||||
let onActivityFn: ((e: SubagentActivityEvent) => void) | undefined;
|
||||
|
||||
vi.mocked(LocalAgentExecutor.create).mockImplementation(
|
||||
async (_def, _config, onActivity) => {
|
||||
onActivityFn = onActivity;
|
||||
return mockExecutor as never;
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
capturedOnActivity: () => undefined,
|
||||
fireActivity: (event: SubagentActivityEvent) => {
|
||||
onActivityFn?.(event);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
it('should emit initial SubagentProgress with running state', async () => {
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
await invocation.execute(new AbortController().signal, updateOutput);
|
||||
|
||||
const firstCall = updateOutput.mock.calls[0]?.[0] as SubagentProgress;
|
||||
expect(firstCall.isSubagentProgress).toBe(true);
|
||||
expect(firstCall.state).toBe('running');
|
||||
expect(firstCall.recentActivity).toEqual([]);
|
||||
});
|
||||
|
||||
it('should emit completed SubagentProgress on success', async () => {
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
await invocation.execute(new AbortController().signal, updateOutput);
|
||||
|
||||
const lastCall = updateOutput.mock.calls[
|
||||
updateOutput.mock.calls.length - 1
|
||||
]?.[0] as SubagentProgress;
|
||||
expect(lastCall.isSubagentProgress).toBe(true);
|
||||
expect(lastCall.state).toBe('completed');
|
||||
});
|
||||
|
||||
it('should handle THOUGHT_CHUNK and emit structured progress', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
// Allow createBrowserAgentDefinition to resolve and onActivity to be registered
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'THOUGHT_CHUNK',
|
||||
data: { text: 'Navigating to the page...' },
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const thoughtProgress = progressCalls.find((p) =>
|
||||
p.recentActivity.some(
|
||||
(a) =>
|
||||
a.type === 'thought' &&
|
||||
a.content.includes('Navigating to the page...'),
|
||||
),
|
||||
);
|
||||
expect(thoughtProgress).toBeDefined();
|
||||
});
|
||||
|
||||
it('should handle TOOL_CALL_START and TOOL_CALL_END with callId tracking', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'navigate_browser',
|
||||
callId: 'call-1',
|
||||
args: { url: 'https://example.com' },
|
||||
},
|
||||
});
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_END',
|
||||
data: { name: 'navigate_browser', id: 'call-1' },
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
// After TOOL_CALL_END, the tool should be completed
|
||||
const finalProgress = progressCalls[progressCalls.length - 1];
|
||||
const toolItem = finalProgress?.recentActivity.find(
|
||||
(a) => a.type === 'tool_call' && a.content === 'navigate_browser',
|
||||
);
|
||||
expect(toolItem).toBeDefined();
|
||||
expect(toolItem?.status).toBe('completed');
|
||||
});
|
||||
|
||||
it('should sanitize sensitive data in tool call args', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: {
|
||||
name: 'fill_form',
|
||||
callId: 'call-2',
|
||||
args: { password: 'supersecret123', url: 'https://example.com' },
|
||||
},
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const toolItem = progressCalls
|
||||
.flatMap((p) => p.recentActivity)
|
||||
.find((a) => a.type === 'tool_call' && a.content === 'fill_form');
|
||||
|
||||
expect(toolItem).toBeDefined();
|
||||
expect(toolItem?.args).not.toContain('supersecret123');
|
||||
expect(toolItem?.args).toContain('[REDACTED]');
|
||||
});
|
||||
|
||||
it('should handle ERROR event with callId and mark tool as errored', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: { name: 'click_element', callId: 'call-3', args: {} },
|
||||
});
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'ERROR',
|
||||
data: { error: 'Element not found', callId: 'call-3' },
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const allItems = progressCalls.flatMap((p) => p.recentActivity);
|
||||
const toolItem = allItems.find(
|
||||
(a) => a.type === 'tool_call' && a.content === 'click_element',
|
||||
);
|
||||
expect(toolItem?.status).toBe('error');
|
||||
});
|
||||
|
||||
it('should sanitize sensitive data in ERROR event messages', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'ERROR',
|
||||
data: { error: 'Auth failed: api_key=sk-secret-abc1234567890' },
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const errorItem = progressCalls
|
||||
.flatMap((p) => p.recentActivity)
|
||||
.find((a) => a.type === 'thought' && a.status === 'error');
|
||||
|
||||
expect(errorItem).toBeDefined();
|
||||
expect(errorItem?.content).not.toContain('sk-secret-abc1234567890');
|
||||
expect(errorItem?.content).toContain('[REDACTED]');
|
||||
});
|
||||
|
||||
it('should sanitize inline PEM content in error messages', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
error:
|
||||
'Failed to authenticate:\n-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA12345...\n-----END RSA PRIVATE KEY-----\nPlease check credentials.',
|
||||
},
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const errorItem = progressCalls
|
||||
.flatMap((p) => p.recentActivity)
|
||||
.find((a) => a.type === 'thought' && a.status === 'error');
|
||||
|
||||
expect(errorItem).toBeDefined();
|
||||
expect(errorItem?.content).toContain('[REDACTED_PEM]');
|
||||
expect(errorItem?.content).not.toContain('-----BEGIN');
|
||||
});
|
||||
|
||||
it('should mark all running tools as errored when ERROR has no callId', async () => {
|
||||
const { fireActivity } = setupActivityCapture();
|
||||
const updateOutput = vi.fn();
|
||||
|
||||
const invocation = new BrowserAgentInvocation(
|
||||
mockConfig,
|
||||
mockParams,
|
||||
mockMessageBus,
|
||||
);
|
||||
|
||||
const executePromise = invocation.execute(
|
||||
new AbortController().signal,
|
||||
updateOutput,
|
||||
);
|
||||
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: { name: 'tool_a', callId: 'c1', args: {} },
|
||||
});
|
||||
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'TOOL_CALL_START',
|
||||
data: { name: 'tool_b', callId: 'c2', args: {} },
|
||||
});
|
||||
|
||||
// ERROR with no callId should mark ALL running tools as error
|
||||
fireActivity({
|
||||
isSubagentActivityEvent: true,
|
||||
agentName: 'browser_agent',
|
||||
type: 'ERROR',
|
||||
data: { error: 'Agent crashed' },
|
||||
});
|
||||
|
||||
await executePromise;
|
||||
|
||||
const progressCalls = updateOutput.mock.calls
|
||||
.map((c) => c[0] as SubagentProgress)
|
||||
.filter((p) => p.isSubagentProgress);
|
||||
|
||||
const allItems = progressCalls.flatMap((p) => p.recentActivity);
|
||||
const toolA = allItems.find(
|
||||
(a) => a.type === 'tool_call' && a.content === 'tool_a',
|
||||
);
|
||||
const toolB = allItems.find(
|
||||
(a) => a.type === 'tool_call' && a.content === 'tool_b',
|
||||
);
|
||||
|
||||
// Both should be error since no callId was specified
|
||||
expect(toolA?.status).toBe('error');
|
||||
expect(toolB?.status).toBe('error');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
* The MCP tools are only available in the browser agent's isolated registry.
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import { LocalAgentExecutor } from '../local-executor.js';
|
||||
import {
|
||||
@@ -22,7 +23,12 @@ import {
|
||||
type ToolLiveOutput,
|
||||
} from '../../tools/tools.js';
|
||||
import { ToolErrorType } from '../../tools/tool-error.js';
|
||||
import type { AgentInputs, SubagentActivityEvent } from '../types.js';
|
||||
import {
|
||||
type AgentInputs,
|
||||
type SubagentActivityEvent,
|
||||
type SubagentProgress,
|
||||
type SubagentActivityItem,
|
||||
} from '../types.js';
|
||||
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
||||
import {
|
||||
createBrowserAgentDefinition,
|
||||
@@ -31,6 +37,134 @@ import {
|
||||
|
||||
const INPUT_PREVIEW_MAX_LENGTH = 50;
|
||||
const DESCRIPTION_MAX_LENGTH = 200;
|
||||
const MAX_RECENT_ACTIVITY = 20;
|
||||
|
||||
/**
|
||||
* Sensitive key patterns used for redaction.
|
||||
*/
|
||||
const SENSITIVE_KEY_PATTERNS = [
|
||||
'password',
|
||||
'pwd',
|
||||
'apikey',
|
||||
'api_key',
|
||||
'api-key',
|
||||
'token',
|
||||
'secret',
|
||||
'credential',
|
||||
'auth',
|
||||
'authorization',
|
||||
'access_token',
|
||||
'access_key',
|
||||
'refresh_token',
|
||||
'session_id',
|
||||
'cookie',
|
||||
'passphrase',
|
||||
'privatekey',
|
||||
'private_key',
|
||||
'private-key',
|
||||
'secret_key',
|
||||
'client_secret',
|
||||
'client_id',
|
||||
];
|
||||
|
||||
/**
|
||||
* Sanitizes tool arguments by recursively redacting sensitive fields.
|
||||
* Supports nested objects and arrays.
|
||||
*/
|
||||
function sanitizeToolArgs(args: unknown): unknown {
|
||||
if (typeof args === 'string') {
|
||||
return sanitizeErrorMessage(args);
|
||||
}
|
||||
if (typeof args !== 'object' || args === null) {
|
||||
return args;
|
||||
}
|
||||
|
||||
if (Array.isArray(args)) {
|
||||
return args.map(sanitizeToolArgs);
|
||||
}
|
||||
|
||||
const sanitized: Record<string, unknown> = {};
|
||||
|
||||
for (const [key, value] of Object.entries(args)) {
|
||||
// Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey)
|
||||
let decodedKey = key;
|
||||
try {
|
||||
decodedKey = decodeURIComponent(key);
|
||||
} catch {
|
||||
// Ignore decoding errors
|
||||
}
|
||||
const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, '');
|
||||
const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) =>
|
||||
keyNormalized.includes(pattern.replace(/[-_]/g, '')),
|
||||
);
|
||||
if (isSensitive) {
|
||||
sanitized[key] = '[REDACTED]';
|
||||
} else {
|
||||
sanitized[key] = sanitizeToolArgs(value);
|
||||
}
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes error messages by redacting potential sensitive data patterns.
|
||||
* Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values.
|
||||
*/
|
||||
function sanitizeErrorMessage(message: string): string {
|
||||
if (!message) return message;
|
||||
|
||||
let sanitized = message;
|
||||
|
||||
// 1. Redact inline PEM content
|
||||
sanitized = sanitized.replace(
|
||||
/-----BEGIN\s+[\w\s]+-----[\s\S]*?-----END\s+[\w\s]+-----/g,
|
||||
'[REDACTED_PEM]',
|
||||
);
|
||||
|
||||
const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`;
|
||||
const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`;
|
||||
|
||||
// 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag)
|
||||
const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) =>
|
||||
p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
|
||||
).join('|');
|
||||
|
||||
const keyWithDelimiter = new RegExp(
|
||||
`((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`,
|
||||
'gi',
|
||||
);
|
||||
sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]');
|
||||
|
||||
// 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret")
|
||||
const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`;
|
||||
const spaceKeywords = [
|
||||
...SENSITIVE_KEY_PATTERNS.map((p) =>
|
||||
p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
|
||||
),
|
||||
'bearer',
|
||||
];
|
||||
const spaceSeparated = new RegExp(
|
||||
`\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`,
|
||||
'gi',
|
||||
);
|
||||
sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]');
|
||||
|
||||
// 4. Handle file path redaction
|
||||
sanitized = sanitized.replace(
|
||||
/((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi,
|
||||
'/path/to/[REDACTED].key',
|
||||
);
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes LLM thought content by redacting sensitive data patterns.
|
||||
*/
|
||||
function sanitizeThoughtContent(text: string): string {
|
||||
return sanitizeErrorMessage(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Browser agent invocation with async tool setup.
|
||||
@@ -88,15 +222,41 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
updateOutput?: (output: ToolLiveOutput) => void,
|
||||
): Promise<ToolResult> {
|
||||
let browserManager;
|
||||
let recentActivity: SubagentActivityItem[] = [];
|
||||
|
||||
try {
|
||||
if (updateOutput) {
|
||||
updateOutput('🌐 Starting browser agent...\n');
|
||||
// Send initial state
|
||||
const initialProgress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [],
|
||||
state: 'running',
|
||||
};
|
||||
updateOutput(initialProgress);
|
||||
}
|
||||
|
||||
// Create definition with MCP tools
|
||||
// Note: printOutput is used for low-level connection logs before agent starts
|
||||
const printOutput = updateOutput
|
||||
? (msg: string) => updateOutput(`🌐 ${msg}\n`)
|
||||
? (msg: string) => {
|
||||
const sanitizedMsg = sanitizeThoughtContent(msg);
|
||||
recentActivity.push({
|
||||
id: randomUUID(),
|
||||
type: 'thought',
|
||||
content: sanitizedMsg,
|
||||
status: 'completed',
|
||||
});
|
||||
if (recentActivity.length > MAX_RECENT_ACTIVITY) {
|
||||
recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
|
||||
}
|
||||
updateOutput({
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'running',
|
||||
} as SubagentProgress);
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const result = await createBrowserAgentDefinition(
|
||||
@@ -107,22 +267,141 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
|
||||
const { definition } = result;
|
||||
browserManager = result.browserManager;
|
||||
|
||||
if (updateOutput) {
|
||||
updateOutput(
|
||||
`🌐 Browser connected. Tools: ${definition.toolConfig?.tools.length ?? 0}\n`,
|
||||
);
|
||||
}
|
||||
|
||||
// Create activity callback for streaming output
|
||||
const onActivity = (activity: SubagentActivityEvent): void => {
|
||||
if (!updateOutput) return;
|
||||
|
||||
if (
|
||||
activity.type === 'THOUGHT_CHUNK' &&
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
typeof activity.data['text'] === 'string'
|
||||
) {
|
||||
updateOutput(`🌐💭 ${activity.data['text']}`);
|
||||
let updated = false;
|
||||
|
||||
switch (activity.type) {
|
||||
case 'THOUGHT_CHUNK': {
|
||||
const text = String(activity.data['text']);
|
||||
const lastItem = recentActivity[recentActivity.length - 1];
|
||||
if (
|
||||
lastItem &&
|
||||
lastItem.type === 'thought' &&
|
||||
lastItem.status === 'running'
|
||||
) {
|
||||
lastItem.content = sanitizeThoughtContent(
|
||||
lastItem.content + text,
|
||||
);
|
||||
} else {
|
||||
recentActivity.push({
|
||||
id: randomUUID(),
|
||||
type: 'thought',
|
||||
content: sanitizeThoughtContent(text),
|
||||
status: 'running',
|
||||
});
|
||||
}
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
case 'TOOL_CALL_START': {
|
||||
const name = String(activity.data['name']);
|
||||
const displayName = activity.data['displayName']
|
||||
? sanitizeErrorMessage(String(activity.data['displayName']))
|
||||
: undefined;
|
||||
const description = activity.data['description']
|
||||
? sanitizeErrorMessage(String(activity.data['description']))
|
||||
: undefined;
|
||||
const args = JSON.stringify(
|
||||
sanitizeToolArgs(activity.data['args']),
|
||||
);
|
||||
const callId = activity.data['callId']
|
||||
? String(activity.data['callId'])
|
||||
: randomUUID();
|
||||
recentActivity.push({
|
||||
id: callId,
|
||||
type: 'tool_call',
|
||||
content: name,
|
||||
displayName,
|
||||
description,
|
||||
args,
|
||||
status: 'running',
|
||||
});
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
case 'TOOL_CALL_END': {
|
||||
const callId = activity.data['id']
|
||||
? String(activity.data['id'])
|
||||
: undefined;
|
||||
// Find the tool call by ID
|
||||
// Find the tool call by ID
|
||||
for (let i = recentActivity.length - 1; i >= 0; i--) {
|
||||
if (
|
||||
recentActivity[i].type === 'tool_call' &&
|
||||
callId != null &&
|
||||
recentActivity[i].id === callId &&
|
||||
recentActivity[i].status === 'running'
|
||||
) {
|
||||
recentActivity[i].status = 'completed';
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'ERROR': {
|
||||
const error = String(activity.data['error']);
|
||||
const isCancellation = error === 'Request cancelled.';
|
||||
const callId = activity.data['callId']
|
||||
? String(activity.data['callId'])
|
||||
: undefined;
|
||||
const newStatus = isCancellation ? 'cancelled' : 'error';
|
||||
|
||||
if (callId) {
|
||||
// Mark the specific tool as error/cancelled
|
||||
for (let i = recentActivity.length - 1; i >= 0; i--) {
|
||||
if (
|
||||
recentActivity[i].type === 'tool_call' &&
|
||||
recentActivity[i].id === callId &&
|
||||
recentActivity[i].status === 'running'
|
||||
) {
|
||||
recentActivity[i].status = newStatus;
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No specific tool — mark ALL running tool_call items
|
||||
for (const item of recentActivity) {
|
||||
if (item.type === 'tool_call' && item.status === 'running') {
|
||||
item.status = newStatus;
|
||||
updated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitize the error message before emitting
|
||||
const sanitizedError = sanitizeErrorMessage(error);
|
||||
recentActivity.push({
|
||||
id: randomUUID(),
|
||||
type: 'thought',
|
||||
content: isCancellation
|
||||
? sanitizedError
|
||||
: `Error: ${sanitizedError}`,
|
||||
status: newStatus,
|
||||
});
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (updated) {
|
||||
if (recentActivity.length > MAX_RECENT_ACTIVITY) {
|
||||
recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
|
||||
}
|
||||
|
||||
const progress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'running',
|
||||
};
|
||||
updateOutput(progress);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -149,17 +428,52 @@ Result:
|
||||
${output.result}
|
||||
`;
|
||||
|
||||
if (updateOutput) {
|
||||
updateOutput({
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [...recentActivity],
|
||||
state: 'completed',
|
||||
} as SubagentProgress);
|
||||
}
|
||||
|
||||
return {
|
||||
llmContent: [{ text: resultContent }],
|
||||
returnDisplay: displayContent,
|
||||
};
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
const rawErrorMessage =
|
||||
error instanceof Error ? error.message : String(error);
|
||||
const isAbort =
|
||||
(error instanceof Error && error.name === 'AbortError') ||
|
||||
rawErrorMessage.includes('Aborted');
|
||||
const errorMessage = sanitizeErrorMessage(rawErrorMessage);
|
||||
|
||||
// Mark any running items as error/cancelled
|
||||
for (const item of recentActivity) {
|
||||
if (item.status === 'running') {
|
||||
item.status = isAbort ? 'cancelled' : 'error';
|
||||
}
|
||||
}
|
||||
|
||||
const progress: SubagentProgress = {
|
||||
isSubagentProgress: true,
|
||||
agentName: this['_toolName'] ?? 'browser_agent',
|
||||
recentActivity: [...recentActivity],
|
||||
state: isAbort ? 'cancelled' : 'error',
|
||||
};
|
||||
|
||||
if (updateOutput) {
|
||||
updateOutput(progress);
|
||||
}
|
||||
|
||||
const llmContent = isAbort
|
||||
? 'Browser agent execution was aborted.'
|
||||
: `Browser agent failed. Error: ${errorMessage}`;
|
||||
|
||||
return {
|
||||
llmContent: `Browser agent failed. Error: ${errorMessage}`,
|
||||
returnDisplay: `Browser Agent Failed\nError: ${errorMessage}`,
|
||||
llmContent: [{ text: llmContent }],
|
||||
returnDisplay: progress,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
type: ToolErrorType.EXECUTION_FAILED,
|
||||
|
||||
@@ -927,11 +927,11 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
context: 'tool_call',
|
||||
name: TASK_COMPLETE_TOOL_NAME,
|
||||
error: expectedError,
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -1213,11 +1213,11 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
context: 'tool_call',
|
||||
name: TASK_COMPLETE_TOOL_NAME,
|
||||
error: expect.stringContaining('Output validation failed'),
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -1338,11 +1338,11 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'ERROR',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
context: 'tool_call',
|
||||
name: LS_TOOL_NAME,
|
||||
error: toolErrorMessage,
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -1699,15 +1699,17 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'THOUGHT_CHUNK',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
text: 'Execution limit reached (MAX_TURNS). Attempting one final recovery turn with a grace period.',
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'THOUGHT_CHUNK',
|
||||
data: { text: 'Graceful recovery succeeded.' },
|
||||
data: expect.objectContaining({
|
||||
text: 'Graceful recovery succeeded.',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -1784,9 +1786,9 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'THOUGHT_CHUNK',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
text: 'Execution limit reached (ERROR_NO_COMPLETE_TASK_CALL). Attempting one final recovery turn with a grace period.',
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -1882,9 +1884,9 @@ describe('LocalAgentExecutor', () => {
|
||||
expect(activities).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'THOUGHT_CHUNK',
|
||||
data: {
|
||||
data: expect.objectContaining({
|
||||
text: 'Execution limit reached (TIMEOUT). Attempting one final recovery turn with a grace period.',
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -902,6 +902,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
displayName,
|
||||
description,
|
||||
args,
|
||||
callId,
|
||||
});
|
||||
|
||||
if (toolName === TASK_COMPLETE_TOOL_NAME) {
|
||||
@@ -969,6 +970,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
});
|
||||
this.emitActivity('TOOL_CALL_END', {
|
||||
name: toolName,
|
||||
id: callId,
|
||||
output: 'Output submitted and task completed.',
|
||||
});
|
||||
} else {
|
||||
@@ -985,6 +987,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
this.emitActivity('ERROR', {
|
||||
context: 'tool_call',
|
||||
name: toolName,
|
||||
callId,
|
||||
error,
|
||||
});
|
||||
}
|
||||
@@ -1009,6 +1012,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
});
|
||||
this.emitActivity('TOOL_CALL_END', {
|
||||
name: toolName,
|
||||
id: callId,
|
||||
output: 'Result submitted and task completed.',
|
||||
});
|
||||
} else {
|
||||
@@ -1026,6 +1030,7 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
this.emitActivity('ERROR', {
|
||||
context: 'tool_call',
|
||||
name: toolName,
|
||||
callId,
|
||||
error,
|
||||
});
|
||||
}
|
||||
@@ -1086,18 +1091,21 @@ export class LocalAgentExecutor<TOutput extends z.ZodTypeAny> {
|
||||
if (call.status === 'success') {
|
||||
this.emitActivity('TOOL_CALL_END', {
|
||||
name: toolName,
|
||||
id: call.request.callId,
|
||||
output: call.response.resultDisplay,
|
||||
});
|
||||
} else if (call.status === 'error') {
|
||||
this.emitActivity('ERROR', {
|
||||
context: 'tool_call',
|
||||
name: toolName,
|
||||
callId: call.request.callId,
|
||||
error: call.response.error?.message || 'Unknown error',
|
||||
});
|
||||
} else if (call.status === 'cancelled') {
|
||||
this.emitActivity('ERROR', {
|
||||
context: 'tool_call',
|
||||
name: toolName,
|
||||
callId: call.request.callId,
|
||||
error: 'Request cancelled.',
|
||||
});
|
||||
aborted = true;
|
||||
|
||||
Reference in New Issue
Block a user