Skip to content

Commit

Permalink
url: update IDNA handling
Browse files Browse the repository at this point in the history
Remove custom tests for invalid IDNA domains in url-idna.js in favor of
the more comprehensive official set.

PR-URL: nodejs#13362
Refs: whatwg/url#309
Refs: web-platform-tests/wpt#5976
Reviewed-By: Refael Ackermann <refack@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
  • Loading branch information
TimothyGu committed Nov 28, 2017
1 parent eba99fe commit 79147ae
Show file tree
Hide file tree
Showing 6 changed files with 509 additions and 80 deletions.
5 changes: 4 additions & 1 deletion lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,10 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
// It only converts parts of the domain name that
// have non-ASCII characters, i.e. it doesn't matter if
// you call it with a domain that already is ASCII-only.
this.hostname = toASCII(this.hostname);

// Use lenient mode (`true`) to try to support even non-compliant
// URLs.
this.hostname = toASCII(this.hostname, true);
}

var p = this.port ? ':' + this.port : '';
Expand Down
88 changes: 71 additions & 17 deletions src/node_i18n.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,54 +77,72 @@ bool InitializeICUDirectory(const std::string& path) {
}
}

static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status))
return -1;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;

int32_t len = uidna_nameToUnicodeUTF8(uidna,
input, length,
**buf, buf->length(),
**buf, buf->capacity(),
&info,
&status);

// Do not check info.errors like we do with ToASCII since ToUnicode always
// returns a string, despite any possible errors that may have occurred.

if (status == U_BUFFER_OVERFLOW_ERROR) {
status = U_ZERO_ERROR;
buf->AllocateSufficientStorage(len);
len = uidna_nameToUnicodeUTF8(uidna,
input, length,
**buf, buf->length(),
**buf, buf->capacity(),
&info,
&status);
}

if (U_FAILURE(status))
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
// string, regardless of whether an error occurred.

if (U_FAILURE(status)) {
len = -1;
buf->SetLength(0);
} else {
buf->SetLength(len);
}

uidna_close(uidna);
return len;
}

static int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length,
enum idna_mode mode) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
uint32_t options = // CheckHyphens = false; handled later
UIDNA_CHECK_BIDI | // CheckBidi = true
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
if (mode == IDNA_STRICT) {
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
// VerifyDnsLength = beStrict;
// handled later
}

UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status))
return -1;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;

int32_t len = uidna_nameToASCII_UTF8(uidna,
input, length,
**buf, buf->length(),
**buf, buf->capacity(),
&info,
&status);

Expand All @@ -133,13 +151,45 @@ static int32_t ToASCII(MaybeStackBuffer<char>* buf,
buf->AllocateSufficientStorage(len);
len = uidna_nameToASCII_UTF8(uidna,
input, length,
**buf, buf->length(),
**buf, buf->capacity(),
&info,
&status);
}

if (U_FAILURE(status))
// In UTS #46 which specifies ToASCII, certain error conditions are
// configurable through options, and the WHATWG URL Standard promptly elects
// to disable some of them to accommodate for real-world use cases.
// Unfortunately, ICU4C's IDNA module does not support disabling some of
// these options through `options` above, and thus continues throwing
// unnecessary errors. To counter this situation, we just filter out the
// errors that may have happened afterwards, before deciding whether to
// return an error from this function.

// CheckHyphens = false
// (Specified in the current UTS #46 draft rev. 18.)
// Refs:
// - https://github.com/whatwg/url/issues/53
// - https://github.com/whatwg/url/pull/309
// - http://www.unicode.org/review/pri317/
// - http://www.unicode.org/reports/tr46/tr46-18.html
// - https://www.icann.org/news/announcement-2000-01-07-en
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;

if (mode != IDNA_STRICT) {
// VerifyDnsLength = beStrict
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
}

if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
len = -1;
buf->SetLength(0);
} else {
buf->SetLength(len);
}

uidna_close(uidna);
return len;
Expand Down Expand Up @@ -169,8 +219,12 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsString());
Utf8Value val(env->isolate(), args[0]);
// optional arg
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;

MaybeStackBuffer<char> buf;
int32_t len = ToASCII(&buf, *val, val.length());
int32_t len = ToASCII(&buf, *val, val.length(), mode);

if (len < 0) {
return env->ThrowError("Cannot convert name to ASCII");
Expand Down
24 changes: 24 additions & 0 deletions src/node_i18n.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@ namespace i18n {

bool InitializeICUDirectory(const std::string& path);

enum idna_mode {
// Default mode for maximum compatibility.
IDNA_DEFAULT,
// Ignore all errors in IDNA conversion, if possible.
IDNA_LENIENT,
// Enforce STD3 rules (UseSTD3ASCIIRules) and DNS length restrictions
// (VerifyDnsLength). Corresponds to `beStrict` flag in the "domain to ASCII"
// algorithm.
IDNA_STRICT
};

// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
// https://url.spec.whatwg.org/#concept-domain-to-ascii
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length,
enum idna_mode mode = IDNA_DEFAULT);

// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
// https://url.spec.whatwg.org/#concept-domain-to-unicode
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length);

} // namespace i18n
} // namespace node

Expand Down
Loading

0 comments on commit 79147ae

Please sign in to comment.