nodejs · TimothyGu · Feb 25, 2017 · Feb 25, 2017 · Feb 25, 2017 · addaleax
diff --git a/doc/api/url.md b/doc/api/url.md
@@ -992,22 +992,48 @@ for (const [name, value] of params) {
   // xyz baz
 ```
 
-### require('url').domainToAscii(domain)
+### require('url').domainToASCII(domain)
 
 * `domain` {String}
 * Returns: {String}
 
-Returns the [Punycode][] ASCII serialization of the `domain`.
+Returns the [Punycode][] ASCII serialization of the `domain`. If `domain` is an
+invalid domain, the empty string is returned.
 
-*Note*: The `require('url').domainToAscii()` method is introduced as part of
+It performs the inverse operation to [`require('url').domainToUnicode()`][].
+
+```js
+const url = require('url');
+console.log(url.domainToASCII('español.com'));
+  // Prints xn--espaol-zwa.com
+console.log(url.domainToASCII('中文.com'));
+  // Prints xn--fiq228c.com
+console.log(url.domainToASCII('xn--iñvalid.com'));
+  // Prints an empty string
+```
+
+*Note*: The `require('url').domainToASCII()` method is introduced as part of
 the new `URL` implementation but is not part of the WHATWG URL standard.
 
 ### require('url').domainToUnicode(domain)
 
 * `domain` {String}
 * Returns: {String}
 
-Returns the Unicode serialization of the `domain`.
+Returns the Unicode serialization of the `domain`. If `domain` is an invalid
+domain, the empty string is returned.
+
+It performs the inverse operation to [`require('url').domainToASCII()`][].
+
+```js
+const url = require('url');
+console.log(url.domainToUnicode('xn--espaol-zwa.com'));
+  // Prints español.com
+console.log(url.domainToUnicode('xn--fiq228c.com'));
+  // Prints 中文.com
+console.log(url.domainToUnicode('xn--iñvalid.com'));
+  // Prints an empty string
+```
 
 *Note*: The `require('url').domainToUnicode()` API is introduced as part of the
 the new `URL` implementation but is not part of the WHATWG URL standard.
@@ -1074,6 +1100,8 @@ console.log(myURL.origin);
 [`URLSearchParams`]: #url_class_urlsearchparams
 [`urlSearchParams.entries()`]: #url_urlsearchparams_entries
 [`urlSearchParams@@iterator()`]: #url_urlsearchparams_iterator
+[`require('url').domainToASCII()`]: #url_require_url_domaintoascii_domain
+[`require('url').domainToUnicode()`]: #url_require_url_domaintounicode_domain
 [stable sorting algorithm]: https://en.wikipedia.org/wiki/Sorting_algorithm#Stability
 [`JSON.stringify()`]: https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
 [`url.toJSON()`]: #url_url_tojson
diff --git a/lib/url.js b/lib/url.js
@@ -319,7 +319,10 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
       // It only converts parts of the domain name that
       // have non-ASCII characters, i.e. it doesn't matter if
       // you call it with a domain that already is ASCII-only.
-      this.hostname = toASCII(this.hostname);
+
+      // Use lenient mode (`true`) to try to support even non-compliant
+      // URLs.
+      this.hostname = toASCII(this.hostname, true);
     }
 
     var p = this.port ? ':' + this.port : '';

diff --git a/src/node_i18n.cc b/src/node_i18n.cc
@@ -410,7 +410,8 @@ bool InitializeICUDirectory(const std::string& path) {
 
 int32_t ToUnicode(MaybeStackBuffer<char>* buf,
                   const char* input,
-                  size_t length) {
+                  size_t length,
+                  bool lenient) {
   UErrorCode status = U_ZERO_ERROR;
   uint32_t options = UIDNA_DEFAULT;
   options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
@@ -435,7 +436,7 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
                                   &status);
   }
 
-  if (U_FAILURE(status)) {
+  if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
     len = -1;
     buf->SetLength(0);
   } else {
@@ -448,7 +449,8 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
 
 int32_t ToASCII(MaybeStackBuffer<char>* buf,
                 const char* input,
-                size_t length) {
+                size_t length,
+                bool lenient) {
   UErrorCode status = U_ZERO_ERROR;
   uint32_t options = UIDNA_DEFAULT;
   options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
@@ -473,7 +475,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
                                  &status);
   }
 
-  if (U_FAILURE(status)) {
+  if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
     len = -1;
     buf->SetLength(0);
   } else {
@@ -489,8 +491,11 @@ static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
   CHECK_GE(args.Length(), 1);
   CHECK(args[0]->IsString());
   Utf8Value val(env->isolate(), args[0]);
+  // optional arg
+  bool lenient = args[1]->BooleanValue(env->context()).FromJust();
+
   MaybeStackBuffer<char> buf;
-  int32_t len = ToUnicode(&buf, *val, val.length());
+  int32_t len = ToUnicode(&buf, *val, val.length(), lenient);
 
   if (len < 0) {
     return env->ThrowError("Cannot convert name to Unicode");
@@ -508,8 +513,11 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
   CHECK_GE(args.Length(), 1);
   CHECK(args[0]->IsString());
   Utf8Value val(env->isolate(), args[0]);
+  // optional arg
+  bool lenient = args[1]->BooleanValue(env->context()).FromJust();
+
   MaybeStackBuffer<char> buf;
-  int32_t len = ToASCII(&buf, *val, val.length());
+  int32_t len = ToASCII(&buf, *val, val.length(), lenient);
 
   if (len < 0) {
     return env->ThrowError("Cannot convert name to ASCII");

diff --git a/src/node_i18n.h b/src/node_i18n.h
@@ -18,10 +18,12 @@ bool InitializeICUDirectory(const std::string& path);
 
 int32_t ToASCII(MaybeStackBuffer<char>* buf,
                 const char* input,
-                size_t length);
+                size_t length,
+                bool lenient = false);
 int32_t ToUnicode(MaybeStackBuffer<char>* buf,
                   const char* input,
-                  size_t length);
+                  size_t length,
+                  bool lenient = false);
 
 }  // namespace i18n
 }  // namespace node

diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js
@@ -0,0 +1,217 @@
+'use strict';
+
+// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
+module.exports = {
+  valid: [
+    { ascii: 'xn--mgbaal8b0b9b2b.icom.museum',
+      unicode: 'افغانستا.icom.museum'
+    },
+    {
+      ascii: 'xn--lgbbat1ad8j.icom.museum',
+      unicode: 'الجزائر.icom.museum'
+    },
+    {
+      ascii: 'xn--sterreich-z7a.icom.museum',
+      unicode: 'österreich.icom.museum'
+    },
+    {
+      ascii: 'xn--54b6eqazv8bc7e.icom.museum',
+      unicode: 'বাংলাদেশ.icom.museum'
+    },
+    {
+      ascii: 'xn--80abmy0agn7e.icom.museum',
+      unicode: 'беларусь.icom.museum'
+    },
+    {
+      ascii: 'xn--belgi-rsa.icom.museum',
+      unicode: 'belgië.icom.museum'
+    },
+    {
+      ascii: 'xn--80abgvm6a7d2b.icom.museum',
+      unicode: 'българия.icom.museum'
+    },
+    {
+      ascii: 'xn--mgbfqim.icom.museum',
+      unicode: 'تشادر.icom.museum'
+    },
+    {
+      ascii: 'xn--fiqs8s.icom.museum',
+      unicode: '中国.icom.museum'
+    },
+    {
+      ascii: 'xn--mgbu4chg.icom.museum',
+      unicode: 'القمر.icom.museum'
+    },
+    {
+      ascii: 'xn--vxakcego.icom.museum',
+      unicode: 'κυπρος.icom.museum'
+    },
+    {
+      ascii: 'xn--eskrepublika-ebb62d.icom.museum',
+      unicode: 'českárepublika.icom.museum'
+    },
+    {
+      ascii: 'xn--wgbh1c.icom.museum',
+      unicode: 'مصر.icom.museum'
+    },
+    {
+      ascii: 'xn--hxakic4aa.icom.museum',
+      unicode: 'ελλάδα.icom.museum'
+    },
+    {
+      ascii: 'xn--magyarorszg-t7a.icom.museum',
+      unicode: 'magyarország.icom.museum'
+    },
+    {
+      ascii: 'xn--sland-ysa.icom.museum',
+      unicode: 'ísland.icom.museum'
+    },
+    {
+      ascii: 'xn--h2brj9c.icom.museum',
+      unicode: 'भारत.icom.museum'
+    },
+    {
+      ascii: 'xn--mgba3a4fra.icom.museum',
+      unicode: 'ايران.icom.museum'
+    },
+    {
+      ascii: 'xn--ire-9la.icom.museum',
+      unicode: 'éire.icom.museum'
+    },
+    {
+      ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum',
+      unicode: 'איקו״ם.ישראל.museum'
+    },
+    {
+      ascii: 'xn--wgv71a.icom.museum',
+      unicode: '日本.icom.museum'
+    },
+    {
+      ascii: 'xn--igbhzh7gpa.icom.museum',
+      unicode: 'الأردن.icom.museum'
+    },
+    {
+      ascii: 'xn--80aaa0a6awh12ed.icom.museum',
+      unicode: 'қазақстан.icom.museum'
+    },
+    {
+      ascii: 'xn--3e0b707e.icom.museum',
+      unicode: '한국.icom.museum'
+    },
+    {
+      ascii: 'xn--80afmksoji0fc.icom.museum',
+      unicode: 'кыргызстан.icom.museum'
+    },
+    {
+      ascii: 'xn--q7ce6a.icom.museum',
+      unicode: 'ລາວ.icom.museum'
+    },
+    {
+      ascii: 'xn--mgbb7fjb.icom.museum',
+      unicode: 'لبنان.icom.museum'
+    },
+    {
+      ascii: 'xn--80aaldqjmmi6x.icom.museum',
+      unicode: 'македонија.icom.museum'
+    },
+    {
+      ascii: 'xn--mgbah1a3hjkrd.icom.museum',
+      unicode: 'موريتانيا.icom.museum'
+    },
+    {
+      ascii: 'xn--mxico-bsa.icom.museum',
+      unicode: 'méxico.icom.museum'
+    },
+    {
+      ascii: 'xn--c1aqabffc0aq.icom.museum',
+      unicode: 'монголулс.icom.museum'
+    },
+    {
+      ascii: 'xn--mgbc0a9azcg.icom.museum',
+      unicode: 'المغرب.icom.museum'
+    },
+    {
+      ascii: 'xn--l2bey1c2b.icom.museum',
+      unicode: 'नेपाल.icom.museum'
+    },
+    {
+      ascii: 'xn--mgb9awbf.icom.museum',
+      unicode: 'عمان.icom.museum'
+    },
+    {
+      ascii: 'xn--wgbl6a.icom.museum',
+      unicode: 'قطر.icom.museum'
+    },
+    {
+      ascii: 'xn--romnia-yta.icom.museum',
+      unicode: 'românia.icom.museum'
+    },
+    {
+      ascii: 'xn--h1alffa9f.xn--h1aegh.museum',
+      unicode: 'россия.иком.museum'
+    },
+    {
+      ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum',
+      unicode: 'србијаицрнагора.иком.museum'
+    },
+    {
+      ascii: 'xn--xkc2al3hye2a.icom.museum',
+      unicode: 'இலங்கை.icom.museum'
+    },
+    {
+      ascii: 'xn--espaa-rta.icom.museum',
+      unicode: 'españa.icom.museum'
+    },
+    {
+      ascii: 'xn--o3cw4h.icom.museum',
+      unicode: 'ไทย.icom.museum'
+    },
+    {
+      ascii: 'xn--pgbs0dh.icom.museum',
+      unicode: 'تونس.icom.museum'
+    },
+    {
+      ascii: 'xn--trkiye-3ya.icom.museum',
+      unicode: 'türkiye.icom.museum'
+    },
+    {
+      ascii: 'xn--80aaxgrpt.icom.museum',
+      unicode: 'украина.icom.museum'
+    },
+    {
+      ascii: 'xn--vitnam-jk8b.icom.museum',
+      unicode: 'việtnam.icom.museum'
+    },
+    // long URL
+    {
+      ascii: `${`${'a'.repeat(63)}.`.repeat(3)}com`,
+      unicode: `${`${'a'.repeat(63)}.`.repeat(3)}com`
+    }
+  ],
+  invalid: [
+    // long label
+    {
+      url: `${'a'.repeat(64)}.com`,
+      mode: 'ascii'
+    },
+    // long URL
+    {
+      url: `${`${'a'.repeat(63)}.`.repeat(4)}com`,
+      mode: 'ascii'
+    },
+    // invalid character
+    {
+      url: '\ufffd.com',
+      mode: 'ascii'
+    },
+    {
+      url: '\ufffd.com',
+      mode: 'unicode'
+    },
+    // invalid Punycode
+    {
+      url: 'xn---abc.com',
+      mode: 'unicode'
+    }
+  ]
+}