Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: make WHATWG URL implementation more spec compliant #10317

Merged
merged 1 commit into from
Jan 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
ctx.port = undefined;
return;
}
binding.parse(port, binding.kPort, null, ctx,
Expand Down Expand Up @@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) {
const ctx = this[context];
search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) {
ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {};
return;
}
if (search[0] === '?') search = search.slice(1);
ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this));
Expand Down
64 changes: 37 additions & 27 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ namespace url {
return type;
}

static inline int ParseNumber(const char* start, const char* end) {
static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
Expand Down Expand Up @@ -293,7 +293,7 @@ namespace url {
}
p++;
}
return strtol(start, NULL, R);
return strtoll(start, NULL, R);
}

static url_host_type ParseIPv4Host(url_host* host,
Expand All @@ -305,28 +305,25 @@ namespace url {
const char* end = pointer + length;
int parts = 0;
uint32_t val = 0;
unsigned numbers[4];
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
goto end;

while (pointer <= end) {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
if (++parts > 4)
goto end;
}
if (pointer - mark == 10) {
numbers[parts - 1] = n;
if (pointer - mark == 0)
break;
}
if (n > 255) {
type = HOST_TYPE_FAILED;
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasnell Just wondering if you remember what this condition was for? Maybe it was a mistake to remove it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not off the top of my head... I'll be able to take a look in the next day or two. (sorry, company in town for the holiday is taking up quite a bit of my time this week)


if (n > 255) {
tooBigNumbers++;
}
numbers[parts - 1] = n;
mark = pointer + 1;
Expand All @@ -335,14 +332,23 @@ namespace url {
}
pointer++;
}
CHECK_GT(parts, 0);

// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}

type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3-n;
val += numbers[n] * pow(256, b);
}
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}

host->value.ipv4 = val;
Expand Down Expand Up @@ -618,6 +624,13 @@ namespace url {
}
}

static inline void ShortenUrlPath(struct url_data* url) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it better to use a const reference?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possibly, no strong opinion

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it cannot be a const because the url is modified in the function. Non-const references are not allowed by the linter, so I'll keep the pointer.

if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}

static void Parse(Environment* env,
Local<Value> recv,
const char* input,
Expand Down Expand Up @@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH()
url.path = base.path;
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
url.port = base.port;
state = kPath;
Expand Down Expand Up @@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH()
url.path = base.path;
}
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
state = kPath;
continue;
Expand Down Expand Up @@ -1172,8 +1183,7 @@ namespace url {
special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH()
url.path.push_back("");
Expand Down Expand Up @@ -1247,7 +1257,7 @@ namespace url {
case 0:
break;
default:
buffer += ch;
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
}
break;
default:
Expand Down
34 changes: 20 additions & 14 deletions test/fixtures/url-setter-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@
}
},
{
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080",
"new_value": "example.com:",
"expected": {
Expand All @@ -358,7 +358,6 @@
}
},
{

"comment": "The empty host is not valid for special schemes",
"href": "http://example.net",
"new_value": "",
Expand Down Expand Up @@ -763,14 +762,14 @@
}
},
{
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080",
"new_value": "",
"expected": {
"href": "http://example.net:8080/",
"host": "example.net:8080",
"href": "http://example.net/",
"host": "example.net",
"hostname": "example.net",
"port": "8080"
"port": ""
}
},
{
Expand Down Expand Up @@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9"
}
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
}
],
"search": [
Expand Down Expand Up @@ -1011,7 +1019,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "?",
"expected": {
Expand Down Expand Up @@ -1096,7 +1103,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "#",
"expected": {
Expand All @@ -1113,22 +1119,22 @@
}
},
{
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
}
},
{
"comment": "Bytes already percent-encoded are left as-is",
"href": "http://example.net",
"new_value": "%c3%89té",
"expected": {
"href": "http://example.net/#%c3%89té",
"hash": "#%c3%89té"
"href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89t%C3%A9"
}
}
]
}
}
Loading