security: implement deceptive URL detection and disclosure in tool confirmations (#19288)

This commit is contained in:
Emily Hedlund
2026-02-20 15:21:31 -05:00
committed by GitHub
parent 49b2e76ee1
commit a01d7e9a05
4 changed files with 337 additions and 7 deletions

View File

@@ -0,0 +1,65 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { getDeceptiveUrlDetails, toUnicodeUrl } from './urlSecurityUtils.js';
describe('urlSecurityUtils', () => {
describe('toUnicodeUrl', () => {
it('should convert a Punycode URL string to its Unicode version', () => {
expect(toUnicodeUrl('https://xn--tst-qla.com/')).toBe(
'https://täst.com/',
);
});
it('should convert a URL object to its Unicode version', () => {
const urlObj = new URL('https://xn--tst-qla.com/path');
expect(toUnicodeUrl(urlObj)).toBe('https://täst.com/path');
});
it('should handle complex URLs with credentials and ports', () => {
const complexUrl = 'https://user:pass@xn--tst-qla.com:8080/path?q=1#hash';
expect(toUnicodeUrl(complexUrl)).toBe(
'https://user:pass@täst.com:8080/path?q=1#hash',
);
});
it('should correctly reconstruct the URL even if the hostname appears in the path', () => {
const urlWithHostnameInPath =
'https://xn--tst-qla.com/some/path/xn--tst-qla.com/index.html';
expect(toUnicodeUrl(urlWithHostnameInPath)).toBe(
'https://täst.com/some/path/xn--tst-qla.com/index.html',
);
});
it('should return the original string if URL parsing fails', () => {
expect(toUnicodeUrl('not a url')).toBe('not a url');
});
it('should return the original string for already safe URLs', () => {
expect(toUnicodeUrl('https://google.com/')).toBe('https://google.com/');
});
});
describe('getDeceptiveUrlDetails', () => {
it('should return full details for a deceptive URL', () => {
const details = getDeceptiveUrlDetails('https://еxample.com');
expect(details).not.toBeNull();
expect(details?.originalUrl).toBe('https://еxample.com/');
expect(details?.punycodeUrl).toBe('https://xn--xample-2of.com/');
});
it('should return null for safe URLs', () => {
expect(getDeceptiveUrlDetails('https://google.com')).toBeNull();
});
it('should handle already Punycoded hostnames', () => {
const details = getDeceptiveUrlDetails('https://xn--tst-qla.com');
expect(details).not.toBeNull();
expect(details?.originalUrl).toBe('https://täst.com/');
});
});
});

View File

@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import url from 'node:url';
/**
* Details about a deceptive URL.
*/
export interface DeceptiveUrlDetails {
/** The Unicode version of the visually deceptive URL. */
originalUrl: string;
/** The ASCII-safe Punycode version of the URL. */
punycodeUrl: string;
}
/**
* Whether a hostname contains non-ASCII or Punycode markers.
*
* @param hostname The hostname to check.
* @returns true if deceptive markers are found, false otherwise.
*/
function containsDeceptiveMarkers(hostname: string): boolean {
return (
// eslint-disable-next-line no-control-regex
hostname.toLowerCase().includes('xn--') || /[^\x00-\x7F]/.test(hostname)
);
}
/**
* Converts a URL (string or object) to its visually deceptive Unicode version.
*
* This function manually reconstructs the URL to bypass the automatic Punycode
* conversion performed by the WHATWG URL class when setting the hostname.
*
* @param urlInput The URL string or URL object to convert.
* @returns The reconstructed URL string with the hostname in Unicode.
*/
export function toUnicodeUrl(urlInput: string | URL): string {
try {
const urlObj = typeof urlInput === 'string' ? new URL(urlInput) : urlInput;
const punycodeHost = urlObj.hostname;
const unicodeHost = url.domainToUnicode(punycodeHost);
// Reconstruct the URL manually because the WHATWG URL class automatically
// Punycodes the hostname if we try to set it.
const protocol = urlObj.protocol + '//';
const credentials = urlObj.username
? `${urlObj.username}${urlObj.password ? ':' + urlObj.password : ''}@`
: '';
const port = urlObj.port ? ':' + urlObj.port : '';
return `${protocol}${credentials}${unicodeHost}${port}${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
} catch {
return typeof urlInput === 'string' ? urlInput : urlInput.href;
}
}
/**
* Extracts deceptive URL details if a URL hostname contains non-ASCII characters
* or is already in Punycode.
*
* @param urlString The URL string to check.
* @returns DeceptiveUrlDetails if a potential deceptive URL is detected, otherwise null.
*/
export function getDeceptiveUrlDetails(
urlString: string,
): DeceptiveUrlDetails | null {
try {
if (!urlString.includes('://')) {
return null;
}
const urlObj = new URL(urlString);
if (!containsDeceptiveMarkers(urlObj.hostname)) {
return null;
}
return {
originalUrl: toUnicodeUrl(urlObj),
punycodeUrl: urlObj.href,
};
} catch {
// If URL parsing fails, it's not a valid URL we can safely analyze.
return null;
}
}