fix(core): add sanitization to sub agent thoughts and centralize utilities (#22828)

2026-06-18 07:17:16 -07:00 · 2026-03-20 15:19:18 -04:00
parent 3335b861ee
commit 4a1cd30c88
6 changed files with 362 additions and 136 deletions
@@ -37,138 +37,16 @@ import {
  cleanupBrowserAgent,
 } from './browserAgentFactory.js';
 import { removeInputBlocker } from './inputBlocker.js';
+import {
+  sanitizeThoughtContent,
+  sanitizeToolArgs,
+  sanitizeErrorMessage,
+} from '../../utils/agent-sanitization-utils.js';

 const INPUT_PREVIEW_MAX_LENGTH = 50;
 const DESCRIPTION_MAX_LENGTH = 200;
 const MAX_RECENT_ACTIVITY = 20;

-/**
- * Sensitive key patterns used for redaction.
- */
-const SENSITIVE_KEY_PATTERNS = [
-  'password',
-  'pwd',
-  'apikey',
-  'api_key',
-  'api-key',
-  'token',
-  'secret',
-  'credential',
-  'auth',
-  'authorization',
-  'access_token',
-  'access_key',
-  'refresh_token',
-  'session_id',
-  'cookie',
-  'passphrase',
-  'privatekey',
-  'private_key',
-  'private-key',
-  'secret_key',
-  'client_secret',
-  'client_id',
-];
-
-/**
- * Sanitizes tool arguments by recursively redacting sensitive fields.
- * Supports nested objects and arrays.
- */
-function sanitizeToolArgs(args: unknown): unknown {
-  if (typeof args === 'string') {
-    return sanitizeErrorMessage(args);
-  }
-  if (typeof args !== 'object' || args === null) {
-    return args;
-  }
-
-  if (Array.isArray(args)) {
-    return args.map(sanitizeToolArgs);
-  }
-
-  const sanitized: Record<string, unknown> = {};
-
-  for (const [key, value] of Object.entries(args)) {
-    // Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey)
-    let decodedKey = key;
-    try {
-      decodedKey = decodeURIComponent(key);
-    } catch {
-      // Ignore decoding errors
-    }
-    const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, '');
-    const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) =>
-      keyNormalized.includes(pattern.replace(/[-_]/g, '')),
-    );
-    if (isSensitive) {
-      sanitized[key] = '[REDACTED]';
-    } else {
-      sanitized[key] = sanitizeToolArgs(value);
-    }
-  }
-
-  return sanitized;
-}
-
-/**
- * Sanitizes error messages by redacting potential sensitive data patterns.
- * Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values.
- */
-function sanitizeErrorMessage(message: string): string {
-  if (!message) return message;
-
-  let sanitized = message;
-
-  // 1. Redact inline PEM content
-  sanitized = sanitized.replace(
-    /-----BEGIN\s+[\w\s]+-----[\s\S]*?-----END\s+[\w\s]+-----/g,
-    '[REDACTED_PEM]',
-  );
-
-  const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`;
-  const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`;
-
-  // 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag)
-  const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) =>
-    p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
-  ).join('|');
-
-  const keyWithDelimiter = new RegExp(
-    `((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`,
-    'gi',
-  );
-  sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]');
-
-  // 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret")
-  const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`;
-  const spaceKeywords = [
-    ...SENSITIVE_KEY_PATTERNS.map((p) =>
-      p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
-    ),
-    'bearer',
-  ];
-  const spaceSeparated = new RegExp(
-    `\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`,
-    'gi',
-  );
-  sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]');
-
-  // 4. Handle file path redaction
-  sanitized = sanitized.replace(
-    /((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi,
-    '/path/to/[REDACTED].key',
-  );
-
-  return sanitized;
-}
-
-/**
- * Sanitizes LLM thought content by redacting sensitive data patterns.
- */
-function sanitizeThoughtContent(text: string): string {
-  return sanitizeErrorMessage(text);
-}
-
 /**
 * Browser agent invocation with async tool setup.
 *
@@ -284,14 +162,13 @@ export class BrowserAgentInvocation extends BaseToolInvocation<
          case 'THOUGHT_CHUNK': {
            const text = String(activity.data['text']);
            const lastItem = recentActivity[recentActivity.length - 1];
+
            if (
              lastItem &&
              lastItem.type === 'thought' &&
              lastItem.status === 'running'
            ) {
-              lastItem.content = sanitizeThoughtContent(
-                lastItem.content + text,
-              );
+              lastItem.content = sanitizeThoughtContent(text);
            } else {
              recentActivity.push({
                id: randomUUID(),