Skip to content

Commit

Permalink
url: implement parse method for safer URL parsing
Browse files Browse the repository at this point in the history
Implement the static parse method as per the WHATWG URL specification.
Unlike the URL constructor, URL.parse does not throw on invalid input,
instead returning null. This behavior allows safer parsing of URLs
without the need for try-catch blocks around constructor calls. The
implementation follows the steps outlined in the WHATWG URL standard,
ensuring compatibility and consistency with web platform URL parsing
APIs.

Fixes: #52208
Refs: whatwg/url#825
PR-URL: #52280
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Daniel Lemire <daniel@lemire.me>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
  • Loading branch information
thisalihassan authored and targos committed Sep 21, 2024
1 parent 2fb7cc9 commit 4eb0749
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 5 deletions.
24 changes: 22 additions & 2 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,14 @@ function isURL(self) {
return Boolean(self?.href && self.protocol && self.auth === undefined && self.path === undefined);
}

/**
* A unique symbol used as a private identifier to safely invoke the URL constructor
* with a special parsing behavior. When passed as the third argument to the URL
* constructor, it signals that the constructor should not throw an exception
* for invalid URL inputs.
*/
const kParseURLSymbol = Symbol('kParseURL');

class URL {
#context = new URLContext();
#searchParams;
Expand All @@ -782,7 +790,7 @@ class URL {
};
}

constructor(input, base = undefined) {
constructor(input, base = undefined, parseSymbol = undefined) {
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('url');
}
Expand All @@ -794,7 +802,19 @@ class URL {
base = `${base}`;
}

this.#updateContext(bindingUrl.parse(input, base));
const raiseException = parseSymbol !== kParseURLSymbol;
const href = bindingUrl.parse(input, base, raiseException);
if (href) {
this.#updateContext(href);
}
}

static parse(input, base = undefined) {
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('url');
}
const parsedURLObject = new URL(input, base, kParseURLSymbol);
return parsedURLObject.href ? parsedURLObject : null;
}

[inspect.custom](depth, opts) {
Expand Down
11 changes: 9 additions & 2 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,9 @@ void BindingData::Parse(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsString()); // input
// args[1] // base url
// args[2] // raise Exception

const bool raise_exception = args.Length() > 2 && args[2]->IsTrue();

Realm* realm = Realm::GetCurrent(args);
BindingData* binding_data = realm->GetBindingData<BindingData>();
Expand All @@ -274,16 +277,20 @@ void BindingData::Parse(const FunctionCallbackInfo<Value>& args) {
if (args[1]->IsString()) {
base_ = Utf8Value(isolate, args[1]).ToString();
base = ada::parse<ada::url_aggregator>(*base_);
if (!base) {
if (!base && raise_exception) {
return ThrowInvalidURL(realm->env(), input.ToStringView(), base_);
} else if (!base) {
return;
}
base_pointer = &base.value();
}
auto out =
ada::parse<ada::url_aggregator>(input.ToStringView(), base_pointer);

if (!out) {
if (!out && raise_exception) {
return ThrowInvalidURL(realm->env(), input.ToStringView(), base_);
} else if (!out) {
return;
}

binding_data->UpdateComponents(out->get_components(), out->type);
Expand Down
2 changes: 1 addition & 1 deletion test/fixtures/wpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ Last update:
- webmessaging/broadcastchannel: https://github.com/web-platform-tests/wpt/tree/e97fac4791/webmessaging/broadcastchannel

[Web Platform Tests]: https://github.com/web-platform-tests/wpt
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt
185 changes: 185 additions & 0 deletions test/fixtures/wpt/url/resources/urltestdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,36 @@
"search": "",
"hash": ""
},
{
"input": "http://a:b@c\\",
"base": null,
"href": "http://a:b@c/",
"origin": "http://c",
"protocol": "http:",
"username": "a",
"password": "b",
"host": "c",
"hostname": "c",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "ws://a@b\\c",
"base": null,
"href": "ws://a@b/c",
"origin": "ws://b",
"protocol": "ws:",
"username": "a",
"password": "",
"host": "b",
"hostname": "b",
"port": "",
"pathname": "/c",
"search": "",
"hash": ""
},
{
"input": "foo:/",
"base": "http://example.org/foo/bar",
Expand Down Expand Up @@ -9627,5 +9657,160 @@
"pathname": "",
"search": "",
"hash": ""
},
"Scheme relative path starting with multiple slashes",
{
"input": "///test",
"base": "http://example.org/",
"href": "http://test/",
"protocol": "http:",
"username": "",
"password": "",
"host": "test",
"hostname": "test",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "///\\//\\//test",
"base": "http://example.org/",
"href": "http://test/",
"protocol": "http:",
"username": "",
"password": "",
"host": "test",
"hostname": "test",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "///example.org/path",
"base": "http://example.org/",
"href": "http://example.org/path",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/path",
"search": "",
"hash": ""
},
{
"input": "///example.org/../path",
"base": "http://example.org/",
"href": "http://example.org/path",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/path",
"search": "",
"hash": ""
},
{
"input": "///example.org/../../",
"base": "http://example.org/",
"href": "http://example.org/",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "///example.org/../path/../../",
"base": "http://example.org/",
"href": "http://example.org/",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "///example.org/../path/../../path",
"base": "http://example.org/",
"href": "http://example.org/path",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/path",
"search": "",
"hash": ""
},
{
"input": "/\\/\\//example.org/../path",
"base": "http://example.org/",
"href": "http://example.org/path",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/path",
"search": "",
"hash": ""
},
{
"input": "///abcdef/../",
"base": "file:///",
"href": "file:///",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "/\\//\\/a/../",
"base": "file:///",
"href": "file://////",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "////",
"search": "",
"hash": ""
},
{
"input": "//a/../",
"base": "file:///",
"href": "file://a/",
"protocol": "file:",
"username": "",
"password": "",
"host": "a",
"hostname": "a",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
}
]
50 changes: 50 additions & 0 deletions test/fixtures/wpt/url/url-statics-parse.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// This intentionally does not use resources/urltestdata.json to preserve resources.
[
{
"url": undefined,
"base": undefined,
"expected": false
},
{
"url": "aaa:b",
"base": undefined,
"expected": true
},
{
"url": undefined,
"base": "aaa:b",
"expected": false
},
{
"url": "aaa:/b",
"base": undefined,
"expected": true
},
{
"url": undefined,
"base": "aaa:/b",
"expected": true
},
{
"url": "https://test:test",
"base": undefined,
"expected": false
},
{
"url": "a",
"base": "https://b/",
"expected": true
}
].forEach(({ url, base, expected }) => {
test(() => {
if (expected == false) {
assert_equals(URL.parse(url, base), null);
} else {
assert_equals(URL.parse(url, base).href, new URL(url, base).href);
}
}, `URL.parse(${url}, ${base})`);
});

test(() => {
assert_not_equals(URL.parse("https://example/"), URL.parse("https://example/"));
}, `URL.parse() should return a unique object`);

0 comments on commit 4eb0749

Please sign in to comment.