-
-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3205ea3
commit 5af2b7c
Showing
2 changed files
with
686 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,346 @@ | ||
'use strict'; | ||
|
||
var parsePath = require('parse-path'); | ||
|
||
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; } | ||
|
||
var parsePath__default = /*#__PURE__*/_interopDefaultLegacy(parsePath); | ||
|
||
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs | ||
const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; | ||
const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; | ||
|
||
const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); | ||
|
||
const normalizeDataURL = (urlString, {stripHash}) => { | ||
const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString); | ||
|
||
if (!match) { | ||
throw new Error(`Invalid URL: ${urlString}`); | ||
} | ||
|
||
let {type, data, hash} = match.groups; | ||
const mediaType = type.split(';'); | ||
hash = stripHash ? '' : hash; | ||
|
||
let isBase64 = false; | ||
if (mediaType[mediaType.length - 1] === 'base64') { | ||
mediaType.pop(); | ||
isBase64 = true; | ||
} | ||
|
||
// Lowercase MIME type | ||
const mimeType = (mediaType.shift() || '').toLowerCase(); | ||
const attributes = mediaType | ||
.map(attribute => { | ||
let [key, value = ''] = attribute.split('=').map(string => string.trim()); | ||
|
||
// Lowercase `charset` | ||
if (key === 'charset') { | ||
value = value.toLowerCase(); | ||
|
||
if (value === DATA_URL_DEFAULT_CHARSET) { | ||
return ''; | ||
} | ||
} | ||
|
||
return `${key}${value ? `=${value}` : ''}`; | ||
}) | ||
.filter(Boolean); | ||
|
||
const normalizedMediaType = [ | ||
...attributes, | ||
]; | ||
|
||
if (isBase64) { | ||
normalizedMediaType.push('base64'); | ||
} | ||
|
||
if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { | ||
normalizedMediaType.unshift(mimeType); | ||
} | ||
|
||
return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; | ||
}; | ||
|
||
function normalizeUrl(urlString, options) { | ||
options = { | ||
defaultProtocol: 'http:', | ||
normalizeProtocol: true, | ||
forceHttp: false, | ||
forceHttps: false, | ||
stripAuthentication: true, | ||
stripHash: false, | ||
stripTextFragment: true, | ||
stripWWW: true, | ||
removeQueryParameters: [/^utm_\w+/i], | ||
removeTrailingSlash: true, | ||
removeSingleSlash: true, | ||
removeDirectoryIndex: false, | ||
sortQueryParameters: true, | ||
...options, | ||
}; | ||
|
||
urlString = urlString.trim(); | ||
|
||
// Data URL | ||
if (/^data:/i.test(urlString)) { | ||
return normalizeDataURL(urlString, options); | ||
} | ||
|
||
if (/^view-source:/i.test(urlString)) { | ||
throw new Error('`view-source:` is not supported as it is a non-standard protocol'); | ||
} | ||
|
||
const hasRelativeProtocol = urlString.startsWith('//'); | ||
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); | ||
|
||
// Prepend protocol | ||
if (!isRelativeUrl) { | ||
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); | ||
} | ||
|
||
const urlObject = new URL(urlString); | ||
|
||
if (options.forceHttp && options.forceHttps) { | ||
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); | ||
} | ||
|
||
if (options.forceHttp && urlObject.protocol === 'https:') { | ||
urlObject.protocol = 'http:'; | ||
} | ||
|
||
if (options.forceHttps && urlObject.protocol === 'http:') { | ||
urlObject.protocol = 'https:'; | ||
} | ||
|
||
// Remove auth | ||
if (options.stripAuthentication) { | ||
urlObject.username = ''; | ||
urlObject.password = ''; | ||
} | ||
|
||
// Remove hash | ||
if (options.stripHash) { | ||
urlObject.hash = ''; | ||
} else if (options.stripTextFragment) { | ||
urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); | ||
} | ||
|
||
// Remove duplicate slashes if not preceded by a protocol | ||
// NOTE: This could be implemented using a single negative lookbehind | ||
// regex, but we avoid that to maintain compatibility with older js engines | ||
// which do not have support for that feature. | ||
if (urlObject.pathname) { | ||
// TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind. | ||
|
||
// Split the string by occurrences of this protocol regex, and perform | ||
// duplicate-slash replacement on the strings between those occurrences | ||
// (if any). | ||
const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g; | ||
|
||
let lastIndex = 0; | ||
let result = ''; | ||
for (;;) { | ||
const match = protocolRegex.exec(urlObject.pathname); | ||
if (!match) { | ||
break; | ||
} | ||
|
||
const protocol = match[0]; | ||
const protocolAtIndex = match.index; | ||
const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex); | ||
|
||
result += intermediate.replace(/\/{2,}/g, '/'); | ||
result += protocol; | ||
lastIndex = protocolAtIndex + protocol.length; | ||
} | ||
|
||
const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length); | ||
result += remnant.replace(/\/{2,}/g, '/'); | ||
|
||
urlObject.pathname = result; | ||
} | ||
|
||
// Decode URI octets | ||
if (urlObject.pathname) { | ||
try { | ||
urlObject.pathname = decodeURI(urlObject.pathname); | ||
} catch {} | ||
} | ||
|
||
// Remove directory index | ||
if (options.removeDirectoryIndex === true) { | ||
options.removeDirectoryIndex = [/^index\.[a-z]+$/]; | ||
} | ||
|
||
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) { | ||
let pathComponents = urlObject.pathname.split('/'); | ||
const lastComponent = pathComponents[pathComponents.length - 1]; | ||
|
||
if (testParameter(lastComponent, options.removeDirectoryIndex)) { | ||
pathComponents = pathComponents.slice(0, -1); | ||
urlObject.pathname = pathComponents.slice(1).join('/') + '/'; | ||
} | ||
} | ||
|
||
if (urlObject.hostname) { | ||
// Remove trailing dot | ||
urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); | ||
|
||
// Remove `www.` | ||
if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { | ||
// Each label should be max 63 at length (min: 1). | ||
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names | ||
// Each TLD should be up to 63 characters long (min: 2). | ||
// It is technically possible to have a single character TLD, but none currently exist. | ||
urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); | ||
} | ||
} | ||
|
||
// Remove query unwanted parameters | ||
if (Array.isArray(options.removeQueryParameters)) { | ||
// eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. | ||
for (const key of [...urlObject.searchParams.keys()]) { | ||
if (testParameter(key, options.removeQueryParameters)) { | ||
urlObject.searchParams.delete(key); | ||
} | ||
} | ||
} | ||
|
||
if (options.removeQueryParameters === true) { | ||
urlObject.search = ''; | ||
} | ||
|
||
// Sort query parameters | ||
if (options.sortQueryParameters) { | ||
urlObject.searchParams.sort(); | ||
|
||
// Calling `.sort()` encodes the search parameters, so we need to decode them again. | ||
try { | ||
urlObject.search = decodeURIComponent(urlObject.search); | ||
} catch {} | ||
} | ||
|
||
if (options.removeTrailingSlash) { | ||
urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); | ||
} | ||
|
||
const oldUrlString = urlString; | ||
|
||
// Take advantage of many of the Node `url` normalizations | ||
urlString = urlObject.toString(); | ||
|
||
if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { | ||
urlString = urlString.replace(/\/$/, ''); | ||
} | ||
|
||
// Remove ending `/` unless removeSingleSlash is false | ||
if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { | ||
urlString = urlString.replace(/\/$/, ''); | ||
} | ||
|
||
// Restore relative protocol, if applicable | ||
if (hasRelativeProtocol && !options.normalizeProtocol) { | ||
urlString = urlString.replace(/^http:\/\//, '//'); | ||
} | ||
|
||
// Remove http/https | ||
if (options.stripProtocol) { | ||
urlString = urlString.replace(/^(?:https?:)?\/\//, ''); | ||
} | ||
|
||
return urlString; | ||
} | ||
|
||
// Dependencies | ||
|
||
/** | ||
* parseUrl | ||
* Parses the input url. | ||
* | ||
* **Note**: This *throws* if invalid urls are provided. | ||
* | ||
* @name parseUrl | ||
* @function | ||
* @param {String} url The input url. | ||
* @param {Boolean|Object} normalize Whether to normalize the url or not. | ||
* Default is `false`. If `true`, the url will | ||
* be normalized. If an object, it will be the | ||
* options object sent to [`normalize-url`](https://github.com/sindresorhus/normalize-url). | ||
* | ||
* For SSH urls, normalize won't work. | ||
* | ||
* @return {Object} An object containing the following fields: | ||
* | ||
* - `protocols` (Array): An array with the url protocols (usually it has one element). | ||
* - `protocol` (String): The first protocol, `"ssh"` (if the url is a ssh url) or `"file"`. | ||
* - `port` (null|Number): The domain port. | ||
* - `resource` (String): The url domain (including subdomains). | ||
* - `host` (String): The fully qualified domain name of a network host, or its IP address. | ||
* - `user` (String): The authentication user (usually for ssh urls). | ||
* - `pathname` (String): The url pathname. | ||
* - `hash` (String): The url hash. | ||
* - `search` (String): The url querystring value. | ||
* - `href` (String): The input url. | ||
* - `query` (Object): The url querystring, parsed as object. | ||
* - `parse_failed` (Boolean): Whether the parsing failed or not. | ||
*/ | ||
const parseUrl = (url, normalize = false) => { | ||
|
||
// Constants | ||
/** | ||
* ([a-z_][a-z0-9_-]{0,31}) Try to match the user | ||
* ([\w\.\-@]+) Match the host/resource | ||
* (([\~,\.\w,\-,\_,\/,\s]|%[0-9A-Fa-f]{2})+?(?:\.git|\/)?) Match the path, allowing spaces/white | ||
*/ | ||
const GIT_RE = /^(?:([a-z_][a-z0-9_-]{0,31})@|https?:\/\/)([\w\.\-@]+)[\/:](([\~,\.\w,\-,\_,\/,\s]|%[0-9A-Fa-f]{2})+?(?:\.git|\/)?)$/; | ||
|
||
const throwErr = msg => { | ||
const err = new Error(msg); | ||
err.subject_url = url; | ||
throw err | ||
}; | ||
|
||
if (typeof url !== "string" || !url.trim()) { | ||
throwErr("Invalid url."); | ||
} | ||
|
||
if (url.length > parseUrl.MAX_INPUT_LENGTH) { | ||
throwErr("Input exceeds maximum length. If needed, change the value of parseUrl.MAX_INPUT_LENGTH."); | ||
} | ||
|
||
if (normalize) { | ||
if (typeof normalize !== "object") { | ||
normalize = { | ||
stripHash: false | ||
}; | ||
} | ||
url = normalizeUrl(url, normalize); | ||
} | ||
|
||
const parsed = parsePath__default["default"](url); | ||
|
||
// Potential git-ssh urls | ||
if (parsed.parse_failed) { | ||
const matched = parsed.href.match(GIT_RE); | ||
|
||
if (matched) { | ||
parsed.protocols = ["ssh"]; | ||
parsed.protocol = "ssh"; | ||
parsed.resource = matched[2]; | ||
parsed.host = matched[2]; | ||
parsed.user = matched[1]; | ||
parsed.pathname = `/${matched[3]}`; | ||
parsed.parse_failed = false; | ||
} else { | ||
throwErr("URL parsing failed."); | ||
} | ||
} | ||
|
||
return parsed; | ||
}; | ||
|
||
parseUrl.MAX_INPUT_LENGTH = 2048; | ||
|
||
module.exports = parseUrl; |
Oops, something went wrong.