fix(core): add sanitization to sub agent thoughts and centralize utilities (#22828)

2026-07-20 15:00:54 -07:00 · 2026-03-20 15:19:18 -04:00
parent 3335b861ee
commit 4a1cd30c88
6 changed files with 362 additions and 136 deletions
@@ -0,0 +1,103 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  sanitizeErrorMessage,
+  sanitizeToolArgs,
+  sanitizeThoughtContent,
+} from './agent-sanitization-utils.js';
+
+describe('agent-sanitization-utils', () => {
+  describe('sanitizeErrorMessage', () => {
+    it('should redact standard inline PEM content', () => {
+      const input =
+        'Here is my key: -----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA12345\n-----END RSA PRIVATE KEY----- do not share.';
+      const expected = 'Here is my key: [REDACTED_PEM] do not share.';
+      expect(sanitizeErrorMessage(input)).toBe(expected);
+    });
+
+    it('should redact non-standard inline PEM content (with punctuation)', () => {
+      const input =
+        '-----BEGIN X.509 CERTIFICATE-----\nMIIEowIBAAKCAQEA12345\n-----END X.509 CERTIFICATE-----';
+      const expected = '[REDACTED_PEM]';
+      expect(sanitizeErrorMessage(input)).toBe(expected);
+    });
+
+    it('should not hang on ReDoS attack string for PEM redaction', () => {
+      const start = Date.now();
+      // A string that starts with -----BEGIN but has no ending, with many spaces
+      // In the vulnerable regex, this would cause catastrophic backtracking.
+      const maliciousInput = '-----BEGIN ' + ' '.repeat(50000) + 'A';
+      const result = sanitizeErrorMessage(maliciousInput);
+      const duration = Date.now() - start;
+
+      // Should process very quickly (e.g. < 50ms)
+      expect(duration).toBeLessThan(50);
+
+      // Since it doesn't match the full PEM block pattern, it should return the input unaltered
+      expect(result).toBe(maliciousInput);
+    });
+
+    it('should redact key-value pairs with sensitive keys', () => {
+      const input = 'Error: connection failed. --api-key="secret123"';
+      const result = sanitizeErrorMessage(input);
+      expect(result).toContain('[REDACTED]');
+      expect(result).not.toContain('secret123');
+    });
+
+    it('should redact space-separated sensitive keywords', () => {
+      // The keyword regex requires tokens to be 8+ chars
+      const input = 'Using password mySuperSecretPassword123';
+      const result = sanitizeErrorMessage(input);
+      expect(result).toContain('[REDACTED]');
+      expect(result).not.toContain('mySuperSecretPassword123');
+    });
+  });
+
+  describe('sanitizeToolArgs', () => {
+    it('should redact sensitive fields in an object', () => {
+      const input = {
+        username: 'admin',
+        password: 'superSecretPassword',
+        nested: {
+          api_key: 'abc123xyz',
+          normal_field: 'hello',
+        },
+      };
+
+      const result = sanitizeToolArgs(input);
+
+      expect(result).toEqual({
+        username: 'admin',
+        password: '[REDACTED]',
+        nested: {
+          api_key: '[REDACTED]',
+          normal_field: 'hello',
+        },
+      });
+    });
+
+    it('should handle arrays and strings correctly', () => {
+      const input = ['normal string', '--api-key="secret123"'];
+      const result = sanitizeToolArgs(input) as string[];
+
+      expect(result[0]).toBe('normal string');
+      expect(result[1]).toContain('[REDACTED]');
+      expect(result[1]).not.toContain('secret123');
+    });
+  });
+
+  describe('sanitizeThoughtContent', () => {
+    it('should redact sensitive patterns from thought content', () => {
+      const input = 'I will now authenticate using token 1234567890abcdef.';
+      const result = sanitizeThoughtContent(input);
+
+      expect(result).toContain('[REDACTED]');
+      expect(result).not.toContain('1234567890abcdef');
+    });
+  });
+});
@@ -0,0 +1,154 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Sensitive key patterns used for redaction.
+ */
+export const SENSITIVE_KEY_PATTERNS = [
+  'password',
+  'pwd',
+  'apikey',
+  'api_key',
+  'api-key',
+  'token',
+  'secret',
+  'credential',
+  'auth',
+  'authorization',
+  'access_token',
+  'access_key',
+  'refresh_token',
+  'session_id',
+  'cookie',
+  'passphrase',
+  'privatekey',
+  'private_key',
+  'private-key',
+  'secret_key',
+  'client_secret',
+  'client_id',
+];
+
+/**
+ * Sanitizes tool arguments by recursively redacting sensitive fields.
+ * Supports nested objects and arrays.
+ */
+export function sanitizeToolArgs(args: unknown): unknown {
+  if (typeof args === 'string') {
+    return sanitizeErrorMessage(args);
+  }
+  if (typeof args !== 'object' || args === null) {
+    return args;
+  }
+
+  if (Array.isArray(args)) {
+    return args.map(sanitizeToolArgs);
+  }
+
+  const sanitized: Record<string, unknown> = {};
+
+  for (const [key, value] of Object.entries(args)) {
+    // Decode key to handle URL-encoded sensitive keys (e.g., api%5fkey)
+    let decodedKey = key;
+    try {
+      decodedKey = decodeURIComponent(key);
+    } catch {
+      // Ignore decoding errors
+    }
+    const keyNormalized = decodedKey.toLowerCase().replace(/[-_]/g, '');
+    const isSensitive = SENSITIVE_KEY_PATTERNS.some((pattern) =>
+      keyNormalized.includes(pattern.replace(/[-_]/g, '')),
+    );
+    if (isSensitive) {
+      sanitized[key] = '[REDACTED]';
+    } else {
+      sanitized[key] = sanitizeToolArgs(value);
+    }
+  }
+
+  return sanitized;
+}
+
+/**
+ * Sanitizes error messages by redacting potential sensitive data patterns.
+ * Uses [^\s'"]+ to catch JWTs, tokens with dots/slashes, and other complex values.
+ */
+export function sanitizeErrorMessage(message: string): string {
+  if (!message) return message;
+
+  let sanitized = message;
+
+  // 1. Redact inline PEM content (Safe iterative approach to avoid ReDoS)
+  let startIndex = 0;
+  while ((startIndex = sanitized.indexOf('-----BEGIN', startIndex)) !== -1) {
+    const endOfBegin = sanitized.indexOf('-----', startIndex + 10);
+    if (endOfBegin === -1) {
+      break; // No closing dashes for the BEGIN header
+    }
+
+    // Find the END header
+    const endHeaderStart = sanitized.indexOf('-----END', endOfBegin + 5);
+    if (endHeaderStart === -1) {
+      break; // No END header found
+    }
+
+    const endHeaderEnd = sanitized.indexOf('-----', endHeaderStart + 8);
+    if (endHeaderEnd === -1) {
+      break; // No closing dashes for the END header
+    }
+
+    // We found a complete block. Replace it.
+    const before = sanitized.substring(0, startIndex);
+    const after = sanitized.substring(endHeaderEnd + 5);
+    sanitized = before + '[REDACTED_PEM]' + after;
+
+    // Resume searching after the redacted block
+    startIndex = before.length + 14; // length of '[REDACTED_PEM]'
+  }
+
+  const unquotedValue = `[^\\s]+(?:\\s+(?![a-zA-Z0-9_.-]+(?:=|:))[^\\s=:<>]+)*`;
+  const valuePattern = `(?:"[^"]*"|'[^']*'|${unquotedValue})`;
+
+  // 2. Handle key-value pairs with delimiters (=, :, space, CLI-style --flag)
+  const urlSafeKeyPatternStr = SENSITIVE_KEY_PATTERNS.map((p) =>
+    p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
+  ).join('|');
+
+  const keyWithDelimiter = new RegExp(
+    `((?:--)?("|')?(${urlSafeKeyPatternStr})\\2\\s*(?:[:=]|%3A|%3D)\\s*)${valuePattern}`,
+    'gi',
+  );
+  sanitized = sanitized.replace(keyWithDelimiter, '$1[REDACTED]');
+
+  // 3. Handle space-separated sensitive keywords (e.g. "password mypass", "--api-key secret")
+  const tokenValuePattern = `[A-Za-z0-9._\\-/+=]{8,}`;
+  const spaceKeywords = [
+    ...SENSITIVE_KEY_PATTERNS.map((p) =>
+      p.replace(/[-_]/g, '(?:[-_]|%2D|%5F|%2d|%5f)?'),
+    ),
+    'bearer',
+  ];
+  const spaceSeparated = new RegExp(
+    `\\b((?:--)?(?:${spaceKeywords.join('|')})(?:\\s*:\\s*bearer)?\\s+)(${tokenValuePattern})`,
+    'gi',
+  );
+  sanitized = sanitized.replace(spaceSeparated, '$1[REDACTED]');
+
+  // 4. Handle file path redaction
+  sanitized = sanitized.replace(
+    /((?:[/\\][a-zA-Z0-9_-]+)*[/\\][a-zA-Z0-9_-]*\.(?:key|pem|p12|pfx))/gi,
+    '/path/to/[REDACTED].key',
+  );
+
+  return sanitized;
+}
+
+/**
+ * Sanitizes LLM thought content by redacting sensitive data patterns.
+ */
+export function sanitizeThoughtContent(text: string): string {
+  return sanitizeErrorMessage(text);
+}