mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-16 00:00:52 -07:00
493 lines
14 KiB
TypeScript
493 lines
14 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
/**
|
|
* @fileoverview Browser agent invocation that handles async tool setup.
|
|
*
|
|
* Unlike regular LocalSubagentInvocation, this invocation:
|
|
* 1. Uses browserAgentFactory to create definition with MCP tools
|
|
* 2. Cleans up browser resources after execution
|
|
*
|
|
* The MCP tools are only available in the browser agent's isolated registry.
|
|
*/
|
|
|
|
import { randomUUID } from 'node:crypto';
|
|
import type { Config } from '../../config/config.js';
|
|
import { LocalAgentExecutor } from '../local-executor.js';
|
|
import { safeJsonToMarkdown } from '../../utils/markdownUtils.js';
|
|
import {
|
|
BaseToolInvocation,
|
|
type ToolResult,
|
|
type ToolLiveOutput,
|
|
} from '../../tools/tools.js';
|
|
import { ToolErrorType } from '../../tools/tool-error.js';
|
|
import {
|
|
type AgentInputs,
|
|
type SubagentActivityEvent,
|
|
type SubagentProgress,
|
|
type SubagentActivityItem,
|
|
} from '../types.js';
|
|
import type { MessageBus } from '../../confirmation-bus/message-bus.js';
|
|
import {
|
|
createBrowserAgentDefinition,
|
|
cleanupBrowserAgent,
|
|
} from './browserAgentFactory.js';
|
|
|
|
const INPUT_PREVIEW_MAX_LENGTH = 50;
|
|
const DESCRIPTION_MAX_LENGTH = 200;
|
|
const MAX_RECENT_ACTIVITY = 20;
|
|
|
|
/**
|
|
* Sensitive key patterns used for redaction.
|
|
*/
|
|
const SENSITIVE_KEY_PATTERNS = [
|
|
'password',
|
|
'pwd',
|
|
'apikey',
|
|
'api_key',
|
|
'api-key',
|
|
'token',
|
|
'secret',
|
|
'credential',
|
|
'auth',
|
|
'authorization',
|
|
'access_token',
|
|
'access_key',
|
|
'refresh_token',
|
|
'session_id',
|
|
'cookie',
|
|
'passphrase',
|
|
'privatekey',
|
|
'private_key',
|
|
'private-key',
|
|
'secret_key',
|
|
'client_secret',
|
|
'client_id',
|
|
];
|
|
|
|
/**
|
|
* Sanitizes tool arguments by recursively redacting sensitive fields.
|
|
* Supports nested objects and arrays.
|
|
*/
|
|
function sanitizeToolArgs(args: unknown): unknown {
|
|
if (typeof args === 'string') {
|
|
return sanitizeErrorMessage(args);
|
|
}
|
|
if (typeof args !== 'object' || args === null) {
|
|
return args;
|
|
}
|
|
|
|
if (Array.isArray(args)) {
|
|
return args.map(sanitizeToolArgs);
|
|
}
|
|
|
|
const sanitized: Record<string, unknown> = {};
|
|
|
|
for (const [key, value] of Object.entries(args)) {
|
|
// Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey)
|
|
let decodedKey = key;
|
|
try {
|
|
decodedKey = decodeURIComponent(key);
|
|
} catch {
|
|
// Ignore decoding errors
|
|
}
|
|
const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, '');
|
|
const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) =>
|
|
keyNormalized.includes(pattern.replace(/[-_]/g, '')),
|
|
);
|
|
if (isSensitive) {
|
|
sanitized[key] = '[REDACTED]';
|
|
} else {
|
|
sanitized[key] = sanitizeToolArgs(value);
|
|
}
|
|
}
|
|
|
|
return sanitized;
|
|
}
|
|
|
|
/**
|
|
* Sanitizes error messages by redacting potential sensitive data patterns.
|
|
* Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values.
|
|
*/
|
|
function sanitizeErrorMessage(message: string): string {
|
|
if (!message) return message;
|
|
|
|
let sanitized = message;
|
|
|
|
// 1. Redact inline PEM content
|
|
sanitized = sanitized.replace(
|
|
/-----BEGIN\s+[\w\s]+-----[\s\S]*?-----END\s+[\w\s]+-----/g,
|
|
'[REDACTED_PEM]',
|
|
);
|
|
|
|
const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`;
|
|
const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`;
|
|
|
|
// 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag)
|
|
const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) =>
|
|
p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
|
|
).join('|');
|
|
|
|
const keyWithDelimiter = new RegExp(
|
|
`((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`,
|
|
'gi',
|
|
);
|
|
sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]');
|
|
|
|
// 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret")
|
|
const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`;
|
|
const spaceKeywords = [
|
|
...SENSITIVE_KEY_PATTERNS.map((p) =>
|
|
p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
|
|
),
|
|
'bearer',
|
|
];
|
|
const spaceSeparated = new RegExp(
|
|
`\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`,
|
|
'gi',
|
|
);
|
|
sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]');
|
|
|
|
// 4. Handle file path redaction
|
|
sanitized = sanitized.replace(
|
|
/((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi,
|
|
'/path/to/[REDACTED].key',
|
|
);
|
|
|
|
return sanitized;
|
|
}
|
|
|
|
/**
|
|
* Sanitizes LLM thought content by redacting sensitive data patterns.
|
|
*/
|
|
function sanitizeThoughtContent(text: string): string {
|
|
return sanitizeErrorMessage(text);
|
|
}
|
|
|
|
/**
|
|
* Browser agent invocation with async tool setup.
|
|
*
|
|
* This invocation handles the browser agent's special requirements:
|
|
* - MCP connection and tool wrapping at invocation time
|
|
* - Browser cleanup after execution
|
|
*/
|
|
export class BrowserAgentInvocation extends BaseToolInvocation<
|
|
AgentInputs,
|
|
ToolResult
|
|
> {
|
|
constructor(
|
|
private readonly config: Config,
|
|
params: AgentInputs,
|
|
messageBus: MessageBus,
|
|
_toolName?: string,
|
|
_toolDisplayName?: string,
|
|
) {
|
|
// Note: BrowserAgentDefinition is a factory function, so we use hardcoded names
|
|
super(
|
|
params,
|
|
messageBus,
|
|
_toolName ?? 'browser_agent',
|
|
_toolDisplayName ?? 'Browser Agent',
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Returns a concise, human-readable description of the invocation.
|
|
*/
|
|
getDescription(): string {
|
|
const inputSummary = Object.entries(this.params)
|
|
.map(
|
|
([key, value]) =>
|
|
`${key}: ${String(value).slice(0, INPUT_PREVIEW_MAX_LENGTH)}`,
|
|
)
|
|
.join(', ');
|
|
|
|
const description = `Running browser agent with inputs: { ${inputSummary} }`;
|
|
return description.slice(0, DESCRIPTION_MAX_LENGTH);
|
|
}
|
|
|
|
/**
|
|
* Executes the browser agent.
|
|
*
|
|
* This method:
|
|
* 1. Creates browser manager and MCP connection
|
|
* 2. Wraps MCP tools for the isolated registry
|
|
* 3. Runs the agent via LocalAgentExecutor
|
|
* 4. Cleans up browser resources
|
|
*/
|
|
async execute(
|
|
signal: AbortSignal,
|
|
updateOutput?: (output: ToolLiveOutput) => void,
|
|
): Promise<ToolResult> {
|
|
let browserManager;
|
|
let recentActivity: SubagentActivityItem[] = [];
|
|
|
|
try {
|
|
if (updateOutput) {
|
|
// Send initial state
|
|
const initialProgress: SubagentProgress = {
|
|
isSubagentProgress: true,
|
|
agentName: this['_toolName'] ?? 'browser_agent',
|
|
recentActivity: [],
|
|
state: 'running',
|
|
};
|
|
updateOutput(initialProgress);
|
|
}
|
|
|
|
// Create definition with MCP tools
|
|
// Note: printOutput is used for low-level connection logs before agent starts
|
|
const printOutput = updateOutput
|
|
? (msg: string) => {
|
|
const sanitizedMsg = sanitizeThoughtContent(msg);
|
|
recentActivity.push({
|
|
id: randomUUID(),
|
|
type: 'thought',
|
|
content: sanitizedMsg,
|
|
status: 'completed',
|
|
});
|
|
if (recentActivity.length > MAX_RECENT_ACTIVITY) {
|
|
recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
|
|
}
|
|
updateOutput({
|
|
isSubagentProgress: true,
|
|
agentName: this['_toolName'] ?? 'browser_agent',
|
|
recentActivity: [...recentActivity],
|
|
state: 'running',
|
|
} as SubagentProgress);
|
|
}
|
|
: undefined;
|
|
|
|
const result = await createBrowserAgentDefinition(
|
|
this.config,
|
|
this.messageBus,
|
|
printOutput,
|
|
);
|
|
const { definition } = result;
|
|
browserManager = result.browserManager;
|
|
|
|
// Create activity callback for streaming output
|
|
const onActivity = (activity: SubagentActivityEvent): void => {
|
|
if (!updateOutput) return;
|
|
|
|
let updated = false;
|
|
|
|
switch (activity.type) {
|
|
case 'THOUGHT_CHUNK': {
|
|
const text = String(activity.data['text']);
|
|
const lastItem = recentActivity[recentActivity.length - 1];
|
|
if (
|
|
lastItem &&
|
|
lastItem.type === 'thought' &&
|
|
lastItem.status === 'running'
|
|
) {
|
|
lastItem.content = sanitizeThoughtContent(
|
|
lastItem.content + text,
|
|
);
|
|
} else {
|
|
recentActivity.push({
|
|
id: randomUUID(),
|
|
type: 'thought',
|
|
content: sanitizeThoughtContent(text),
|
|
status: 'running',
|
|
});
|
|
}
|
|
updated = true;
|
|
break;
|
|
}
|
|
case 'TOOL_CALL_START': {
|
|
const name = String(activity.data['name']);
|
|
const displayName = activity.data['displayName']
|
|
? sanitizeErrorMessage(String(activity.data['displayName']))
|
|
: undefined;
|
|
const description = activity.data['description']
|
|
? sanitizeErrorMessage(String(activity.data['description']))
|
|
: undefined;
|
|
const args = JSON.stringify(
|
|
sanitizeToolArgs(activity.data['args']),
|
|
);
|
|
const callId = activity.data['callId']
|
|
? String(activity.data['callId'])
|
|
: randomUUID();
|
|
recentActivity.push({
|
|
id: callId,
|
|
type: 'tool_call',
|
|
content: name,
|
|
displayName,
|
|
description,
|
|
args,
|
|
status: 'running',
|
|
});
|
|
updated = true;
|
|
break;
|
|
}
|
|
case 'TOOL_CALL_END': {
|
|
const callId = activity.data['id']
|
|
? String(activity.data['id'])
|
|
: undefined;
|
|
// Find the tool call by ID
|
|
// Find the tool call by ID
|
|
for (let i = recentActivity.length - 1; i >= 0; i--) {
|
|
if (
|
|
recentActivity[i].type === 'tool_call' &&
|
|
callId != null &&
|
|
recentActivity[i].id === callId &&
|
|
recentActivity[i].status === 'running'
|
|
) {
|
|
recentActivity[i].status = 'completed';
|
|
updated = true;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 'ERROR': {
|
|
const error = String(activity.data['error']);
|
|
const isCancellation = error === 'Request cancelled.';
|
|
const callId = activity.data['callId']
|
|
? String(activity.data['callId'])
|
|
: undefined;
|
|
const newStatus = isCancellation ? 'cancelled' : 'error';
|
|
|
|
if (callId) {
|
|
// Mark the specific tool as error/cancelled
|
|
for (let i = recentActivity.length - 1; i >= 0; i--) {
|
|
if (
|
|
recentActivity[i].type === 'tool_call' &&
|
|
recentActivity[i].id === callId &&
|
|
recentActivity[i].status === 'running'
|
|
) {
|
|
recentActivity[i].status = newStatus;
|
|
updated = true;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
// No specific tool — mark ALL running tool_call items
|
|
for (const item of recentActivity) {
|
|
if (item.type === 'tool_call' && item.status === 'running') {
|
|
item.status = newStatus;
|
|
updated = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sanitize the error message before emitting
|
|
const sanitizedError = sanitizeErrorMessage(error);
|
|
recentActivity.push({
|
|
id: randomUUID(),
|
|
type: 'thought',
|
|
content: isCancellation
|
|
? sanitizedError
|
|
: `Error: ${sanitizedError}`,
|
|
status: newStatus,
|
|
});
|
|
updated = true;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (updated) {
|
|
if (recentActivity.length > MAX_RECENT_ACTIVITY) {
|
|
recentActivity = recentActivity.slice(-MAX_RECENT_ACTIVITY);
|
|
}
|
|
|
|
const progress: SubagentProgress = {
|
|
isSubagentProgress: true,
|
|
agentName: this['_toolName'] ?? 'browser_agent',
|
|
recentActivity: [...recentActivity],
|
|
state: 'running',
|
|
};
|
|
updateOutput(progress);
|
|
}
|
|
};
|
|
|
|
// Create and run executor with the configured definition
|
|
const executor = await LocalAgentExecutor.create(
|
|
definition,
|
|
this.config,
|
|
onActivity,
|
|
);
|
|
|
|
const output = await executor.run(this.params, signal);
|
|
|
|
const displayResult = safeJsonToMarkdown(output.result);
|
|
|
|
const resultContent = `Browser agent finished.
|
|
Termination Reason: ${output.terminate_reason}
|
|
Result:
|
|
${output.result}`;
|
|
|
|
const displayContent = `
|
|
Browser Agent Finished
|
|
|
|
Termination Reason: ${output.terminate_reason}
|
|
|
|
Result:
|
|
${displayResult}
|
|
`;
|
|
|
|
if (updateOutput) {
|
|
updateOutput({
|
|
isSubagentProgress: true,
|
|
agentName: this['_toolName'] ?? 'browser_agent',
|
|
recentActivity: [...recentActivity],
|
|
state: 'completed',
|
|
} as SubagentProgress);
|
|
}
|
|
|
|
return {
|
|
llmContent: [{ text: resultContent }],
|
|
returnDisplay: displayContent,
|
|
};
|
|
} catch (error) {
|
|
const rawErrorMessage =
|
|
error instanceof Error ? error.message : String(error);
|
|
const isAbort =
|
|
(error instanceof Error && error.name === 'AbortError') ||
|
|
rawErrorMessage.includes('Aborted');
|
|
const errorMessage = sanitizeErrorMessage(rawErrorMessage);
|
|
|
|
// Mark any running items as error/cancelled
|
|
for (const item of recentActivity) {
|
|
if (item.status === 'running') {
|
|
item.status = isAbort ? 'cancelled' : 'error';
|
|
}
|
|
}
|
|
|
|
const progress: SubagentProgress = {
|
|
isSubagentProgress: true,
|
|
agentName: this['_toolName'] ?? 'browser_agent',
|
|
recentActivity: [...recentActivity],
|
|
state: isAbort ? 'cancelled' : 'error',
|
|
};
|
|
|
|
if (updateOutput) {
|
|
updateOutput(progress);
|
|
}
|
|
|
|
const llmContent = isAbort
|
|
? 'Browser agent execution was aborted.'
|
|
: `Browser agent failed. Error: ${errorMessage}`;
|
|
|
|
return {
|
|
llmContent: [{ text: llmContent }],
|
|
returnDisplay: progress,
|
|
error: {
|
|
message: errorMessage,
|
|
type: ToolErrorType.EXECUTION_FAILED,
|
|
},
|
|
};
|
|
} finally {
|
|
// Always cleanup browser resources
|
|
if (browserManager) {
|
|
await cleanupBrowserAgent(browserManager);
|
|
}
|
|
}
|
|
}
|
|
}
|