Skip to content

Commit

Permalink
src: support domains with empty labels
Browse files Browse the repository at this point in the history
Follow the spec of domainToASCII/domainToUnicode in whatwg, and
synchronise WPT url test data.

Refs: web-platform-tests/wpt#5397
  • Loading branch information
watilde committed Apr 30, 2017
1 parent 84dabe8 commit d080abb
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 13 deletions.
17 changes: 17 additions & 0 deletions src/node_i18n.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,13 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
&status);
}

// UTS #46's ToUnicode operation applies no validation of domain name length
// (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For
// that reason, unlike ToASCII below, ICU4C correctly accepts long domain
// names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS
// #46. Therefore, explicitly filters out that error here.
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;

if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
len = -1;
buf->SetLength(0);
Expand Down Expand Up @@ -500,6 +507,16 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
&status);
}

// The WHATWG URL "domain to ASCII" algorithm explicitly sets the
// VerifyDnsLength flag to false, which disables the domain name length
// verification step in ToASCII (as specified by UTS #46). Unfortunately,
// ICU4C's IDNA module does not support disabling this flag through `options`,
// so just filter out the errors that may be caused by the verification step
// afterwards.
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;

if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
len = -1;
buf->SetLength(0);
Expand Down
19 changes: 7 additions & 12 deletions test/fixtures/url-idna.js
Original file line number Diff line number Diff line change
Expand Up @@ -182,23 +182,18 @@ module.exports = {
ascii: 'xn--vitnam-jk8b.icom.museum',
unicode: 'việtnam.icom.museum'
},
// long URL
{
ascii: `${`${'a'.repeat(63)}.`.repeat(3)}com`,
unicode: `${`${'a'.repeat(63)}.`.repeat(3)}com`
}
],
invalid: [
// long label
{
url: `${'a'.repeat(64)}.com`,
mode: 'ascii'
ascii: `${'a'.repeat(64)}.com`,
unicode: `${'a'.repeat(64)}.com`,
},
// long URL
{
url: `${`${'a'.repeat(63)}.`.repeat(4)}com`,
mode: 'ascii'
},
ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`,
unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com`
}
],
invalid: [
// invalid character
{
url: '\ufffd.com',
Expand Down
48 changes: 47 additions & 1 deletion test/fixtures/url-tests.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';

/* WPT Refs:
https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/urltestdata.json
https://github.com/w3c/web-platform-tests/blob/3afae94/url/urltestdata.json
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
*/
module.exports =
Expand Down Expand Up @@ -3789,6 +3789,52 @@ module.exports =
"search": "",
"hash": ""
},
"Domains with empty labels",
{
"input": "http://./",
"base": "about:blank",
"href": "http://./",
"origin": "http://.",
"protocol": "http:",
"username": "",
"password": "",
"host": ".",
"hostname": ".",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://../",
"base": "about:blank",
"href": "http://../",
"origin": "http://..",
"protocol": "http:",
"username": "",
"password": "",
"host": "..",
"hostname": "..",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://0..0x300/",
"base": "about:blank",
"href": "http://0..0x300/",
"origin": "http://0..0x300",
"protocol": "http:",
"username": "",
"password": "",
"host": "0..0x300",
"hostname": "0..0x300",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
"Broken IPv6",
{
"input": "http://[www.google.com]/",
Expand Down

0 comments on commit d080abb

Please sign in to comment.