From 7048cba388b86812da1d3b0774850e26a6e79d57 Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Wed, 28 Nov 2018 13:06:46 -0500 Subject: [PATCH] url: use SafeSet to filter known special protocols Avoids a maintenance hazard when reviewers assume that `hostlessProtocol` and `slashedProtocol` are disjoint. The following may be counter-intuitive: ```js // These objects seem to have no keys in common const hostlessProtocol = { 'javascript': true }; const slashedProtocol = { 'http': true }; // A reasonable reviewer may assumes bothTrue is never truthy function bothTrue(lowerProto) { return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto]; } // But console.log(Boolean(bothTrue('constructor'))); // true ``` This change uses SafeSet instead of plain-old objects. ---- Rejected alternative: We could have used object with a `null` prototype as lookup tables so that `lowerProto` is never treated as a key into `Object.prototype`. ```js const hostlessProtocol = { __proto__: null, 'javascript': true }; const slashedProtocol = { __proto__: null, 'http': true }; function bothTrue(lowerProto) { return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto]; } console.log(Boolean(bothTrue('constructor'))); // false ``` PR-URL: https://github.com/nodejs/node/pull/24703 Reviewed-By: Joyee Cheung Reviewed-By: Colin Ihrig Reviewed-By: James M Snell --- lib/url.js | 63 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/lib/url.js b/lib/url.js index 676722de622e2b..7870ce50d86545 100644 --- a/lib/url.js +++ b/lib/url.js @@ -26,6 +26,8 @@ const { toASCII } = process.binding('config').hasIntl ? const { hexTable } = require('internal/querystring'); +const { SafeSet } = require('internal/safe_globals'); + const { ERR_INVALID_ARG_TYPE } = require('internal/errors').codes; @@ -76,28 +78,28 @@ const simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/; const hostnameMaxLen = 255; // protocols that can allow "unsafe" and "unwise" chars. -const unsafeProtocol = { - 'javascript': true, - 'javascript:': true -}; +const unsafeProtocol = new SafeSet([ + 'javascript', + 'javascript:' +]); // protocols that never have a hostname. -const hostlessProtocol = { - 'javascript': true, - 'javascript:': true -}; +const hostlessProtocol = new SafeSet([ + 'javascript', + 'javascript:' +]); // protocols that always contain a // bit. -const slashedProtocol = { - 'http': true, - 'http:': true, - 'https': true, - 'https:': true, - 'ftp': true, - 'ftp:': true, - 'gopher': true, - 'gopher:': true, - 'file': true, - 'file:': true -}; +const slashedProtocol = new SafeSet([ + 'http', + 'http:', + 'https', + 'https:', + 'ftp', + 'ftp:', + 'gopher', + 'gopher:', + 'file', + 'file:' +]); const { CHAR_SPACE, CHAR_TAB, @@ -267,14 +269,14 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { if (slashesDenoteHost || proto || hostPattern.test(rest)) { var slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH && rest.charCodeAt(1) === CHAR_FORWARD_SLASH; - if (slashes && !(proto && hostlessProtocol[lowerProto])) { + if (slashes && !(proto && hostlessProtocol.has(lowerProto))) { rest = rest.slice(2); this.slashes = true; } } - if (!hostlessProtocol[lowerProto] && - (slashes || (proto && !slashedProtocol[proto]))) { + if (!hostlessProtocol.has(lowerProto) && + (slashes || (proto && !slashedProtocol.has(proto)))) { // there's a hostname. // the first instance of /, ?, ;, or # ends the host. @@ -400,7 +402,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { // now rest is set to the post-host stuff. // chop off any delim chars. - if (!unsafeProtocol[lowerProto]) { + if (!unsafeProtocol.has(lowerProto)) { // First, make 100% sure that any "autoEscape" chars get // escaped, even if encodeURIComponent doesn't think they // need to be. @@ -447,7 +449,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { } else if (firstIdx > 0) { this.pathname = rest.slice(0, firstIdx); } - if (slashedProtocol[lowerProto] && + if (slashedProtocol.has(lowerProto) && this.hostname && !this.pathname) { this.pathname = '/'; } @@ -629,7 +631,7 @@ Url.prototype.format = function format() { // only the slashedProtocols get the //. Not mailto:, xmpp:, etc. // unless they had them to begin with. - if (this.slashes || slashedProtocol[protocol]) { + if (this.slashes || slashedProtocol.has(protocol)) { if (this.slashes || host) { if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH) pathname = '/' + pathname; @@ -701,7 +703,7 @@ Url.prototype.resolveObject = function resolveObject(relative) { } // urlParse appends trailing / to urls like http://www.example.com - if (slashedProtocol[result.protocol] && + if (slashedProtocol.has(result.protocol) && result.hostname && !result.pathname) { result.path = result.pathname = '/'; } @@ -719,7 +721,7 @@ Url.prototype.resolveObject = function resolveObject(relative) { // if it is file:, then the host is dropped, // because that's known to be hostless. // anything else is assumed to be absolute. - if (!slashedProtocol[relative.protocol]) { + if (!slashedProtocol.has(relative.protocol)) { var keys = Object.keys(relative); for (var v = 0; v < keys.length; v++) { var k = keys[v]; @@ -732,7 +734,7 @@ Url.prototype.resolveObject = function resolveObject(relative) { result.protocol = relative.protocol; if (!relative.host && !/^file:?$/.test(relative.protocol) && - !hostlessProtocol[relative.protocol]) { + !hostlessProtocol.has(relative.protocol)) { const relPath = (relative.pathname || '').split('/'); while (relPath.length && !(relative.host = relPath.shift())); if (!relative.host) relative.host = ''; @@ -769,7 +771,8 @@ Url.prototype.resolveObject = function resolveObject(relative) { var removeAllDots = mustEndAbs; var srcPath = result.pathname && result.pathname.split('/') || []; var relPath = relative.pathname && relative.pathname.split('/') || []; - var noLeadingSlashes = result.protocol && !slashedProtocol[result.protocol]; + var noLeadingSlashes = result.protocol && + !slashedProtocol.has(result.protocol); // if the url is a non-slashed url, then relative // links like ../.. should be able