222 lines
5.9 KiB
JavaScript
222 lines
5.9 KiB
JavaScript
'use strict';
|
|
// TODO: Use the `URL` global when targeting Node.js 10
|
|
const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
|
const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
|
|
const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
|
|
|
|
const testParameter = (name, filters) => {
|
|
return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
|
|
};
|
|
|
|
const normalizeDataURL = (urlString, {stripHash}) => {
|
|
const parts = urlString.match(/^data:([^,]*?),([^#]*?)(?:#(.*))?$/);
|
|
|
|
if (!parts) {
|
|
throw new Error(`Invalid URL: ${urlString}`);
|
|
}
|
|
|
|
const mediaType = parts[1].split(';');
|
|
const body = parts[2];
|
|
const hash = stripHash ? '' : parts[3];
|
|
|
|
let base64 = false;
|
|
|
|
if (mediaType[mediaType.length - 1] === 'base64') {
|
|
mediaType.pop();
|
|
base64 = true;
|
|
}
|
|
|
|
// Lowercase MIME type
|
|
const mimeType = (mediaType.shift() || '').toLowerCase();
|
|
const attributes = mediaType
|
|
.map(attribute => {
|
|
let [key, value = ''] = attribute.split('=').map(string => string.trim());
|
|
|
|
// Lowercase `charset`
|
|
if (key === 'charset') {
|
|
value = value.toLowerCase();
|
|
|
|
if (value === DATA_URL_DEFAULT_CHARSET) {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
return `${key}${value ? `=${value}` : ''}`;
|
|
})
|
|
.filter(Boolean);
|
|
|
|
const normalizedMediaType = [
|
|
...attributes
|
|
];
|
|
|
|
if (base64) {
|
|
normalizedMediaType.push('base64');
|
|
}
|
|
|
|
if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
|
|
normalizedMediaType.unshift(mimeType);
|
|
}
|
|
|
|
return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}${hash ? `#${hash}` : ''}`;
|
|
};
|
|
|
|
const normalizeUrl = (urlString, options) => {
|
|
options = {
|
|
defaultProtocol: 'http:',
|
|
normalizeProtocol: true,
|
|
forceHttp: false,
|
|
forceHttps: false,
|
|
stripAuthentication: true,
|
|
stripHash: false,
|
|
stripWWW: true,
|
|
removeQueryParameters: [/^utm_\w+/i],
|
|
removeTrailingSlash: true,
|
|
removeDirectoryIndex: false,
|
|
sortQueryParameters: true,
|
|
...options
|
|
};
|
|
|
|
// TODO: Remove this at some point in the future
|
|
if (Reflect.has(options, 'normalizeHttps')) {
|
|
throw new Error('options.normalizeHttps is renamed to options.forceHttp');
|
|
}
|
|
|
|
if (Reflect.has(options, 'normalizeHttp')) {
|
|
throw new Error('options.normalizeHttp is renamed to options.forceHttps');
|
|
}
|
|
|
|
if (Reflect.has(options, 'stripFragment')) {
|
|
throw new Error('options.stripFragment is renamed to options.stripHash');
|
|
}
|
|
|
|
urlString = urlString.trim();
|
|
|
|
// Data URL
|
|
if (/^data:/i.test(urlString)) {
|
|
return normalizeDataURL(urlString, options);
|
|
}
|
|
|
|
const hasRelativeProtocol = urlString.startsWith('//');
|
|
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
|
|
|
|
// Prepend protocol
|
|
if (!isRelativeUrl) {
|
|
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
|
|
}
|
|
|
|
const urlObj = new URLParser(urlString);
|
|
|
|
if (options.forceHttp && options.forceHttps) {
|
|
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
|
|
}
|
|
|
|
if (options.forceHttp && urlObj.protocol === 'https:') {
|
|
urlObj.protocol = 'http:';
|
|
}
|
|
|
|
if (options.forceHttps && urlObj.protocol === 'http:') {
|
|
urlObj.protocol = 'https:';
|
|
}
|
|
|
|
// Remove auth
|
|
if (options.stripAuthentication) {
|
|
urlObj.username = '';
|
|
urlObj.password = '';
|
|
}
|
|
|
|
// Remove hash
|
|
if (options.stripHash) {
|
|
urlObj.hash = '';
|
|
}
|
|
|
|
// Remove duplicate slashes if not preceded by a protocol
|
|
if (urlObj.pathname) {
|
|
// TODO: Use the following instead when targeting Node.js 10
|
|
// `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
|
|
urlObj.pathname = urlObj.pathname.replace(/((?!:).|^)\/{2,}/g, (_, p1) => {
|
|
if (/^(?!\/)/g.test(p1)) {
|
|
return `${p1}/`;
|
|
}
|
|
|
|
return '/';
|
|
});
|
|
}
|
|
|
|
// Decode URI octets
|
|
if (urlObj.pathname) {
|
|
urlObj.pathname = decodeURI(urlObj.pathname);
|
|
}
|
|
|
|
// Remove directory index
|
|
if (options.removeDirectoryIndex === true) {
|
|
options.removeDirectoryIndex = [/^index\.[a-z]+$/];
|
|
}
|
|
|
|
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
|
|
let pathComponents = urlObj.pathname.split('/');
|
|
const lastComponent = pathComponents[pathComponents.length - 1];
|
|
|
|
if (testParameter(lastComponent, options.removeDirectoryIndex)) {
|
|
pathComponents = pathComponents.slice(0, pathComponents.length - 1);
|
|
urlObj.pathname = pathComponents.slice(1).join('/') + '/';
|
|
}
|
|
}
|
|
|
|
if (urlObj.hostname) {
|
|
// Remove trailing dot
|
|
urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
|
|
|
|
// Remove `www.`
|
|
if (options.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z.]{2,5})$/.test(urlObj.hostname)) {
|
|
// Each label should be max 63 at length (min: 2).
|
|
// The extension should be max 5 at length (min: 2).
|
|
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
|
|
urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
|
|
}
|
|
}
|
|
|
|
// Remove query unwanted parameters
|
|
if (Array.isArray(options.removeQueryParameters)) {
|
|
for (const key of [...urlObj.searchParams.keys()]) {
|
|
if (testParameter(key, options.removeQueryParameters)) {
|
|
urlObj.searchParams.delete(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort query parameters
|
|
if (options.sortQueryParameters) {
|
|
urlObj.searchParams.sort();
|
|
}
|
|
|
|
if (options.removeTrailingSlash) {
|
|
urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
|
|
}
|
|
|
|
// Take advantage of many of the Node `url` normalizations
|
|
urlString = urlObj.toString();
|
|
|
|
// Remove ending `/`
|
|
if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '') {
|
|
urlString = urlString.replace(/\/$/, '');
|
|
}
|
|
|
|
// Restore relative protocol, if applicable
|
|
if (hasRelativeProtocol && !options.normalizeProtocol) {
|
|
urlString = urlString.replace(/^http:\/\//, '//');
|
|
}
|
|
|
|
// Remove http/https
|
|
if (options.stripProtocol) {
|
|
urlString = urlString.replace(/^(?:https?:)?\/\//, '');
|
|
}
|
|
|
|
return urlString;
|
|
};
|
|
|
|
module.exports = normalizeUrl;
|
|
// TODO: Remove this for the next major release
|
|
module.exports.default = normalizeUrl;
|