From 5be93fd294ec8c16af779d4423dd634a166a7a52 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Fri, 10 Mar 2017 22:05:22 -0800 Subject: [PATCH] src: add native URL class Adds a URL native class for use within the node.js c/c++ code. This is primarily intended to be used by the eventual ES6 modules implementation but can be used generally wherever URL parsing within the c/c++ may be necessary. ```c URL url1("http://example.org"); URL url2("foo", "http://example.org/bar"); URL url3("baz", &url2); ``` While we're at it, reduce reliance on macros to simplify impl. --- src/node_url.cc | 558 ++++++++++++++++++++++++------------------------ src/node_url.h | 91 +++++++- 2 files changed, 365 insertions(+), 284 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 6a9f8f3ca9e1f6..ba3ceec6aca070 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -47,45 +47,6 @@ using v8::Value; } \ } -#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE; -#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE; -#define SPECIAL() \ - { \ - url.flags |= URL_FLAGS_SPECIAL; \ - special = true; \ - } -#define TERMINATE() \ - { \ - url.flags |= URL_FLAGS_TERMINATED; \ - goto done; \ - } -#define URL_FAILED() \ - { \ - url.flags |= URL_FLAGS_FAILED; \ - goto done; \ - } - -#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */ - -#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE) -#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED) - -#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME) -#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME) -#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD) -#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST) -#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH) -#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY) -#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT) - -#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME; -#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME; -#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD; -#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST; -#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH; -#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY; -#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT; - #define UTF8STRING(isolate, str) \ String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \ .ToLocalChecked() @@ -93,7 +54,7 @@ using v8::Value; namespace url { #if defined(NODE_HAVE_I18N_SUPPORT) - static bool ToUnicode(std::string* input, std::string* output) { + static inline bool ToUnicode(std::string* input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) return false; @@ -101,7 +62,7 @@ namespace url { return true; } - static bool ToASCII(std::string* input, std::string* output) { + static inline bool ToASCII(std::string* input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) return false; @@ -110,12 +71,12 @@ namespace url { } #else // Intentional non-ops if ICU is not present. - static bool ToUnicode(std::string* input, std::string* output) { + static inline bool ToUnicode(std::string* input, std::string* output) { *output = *input; return true; } - static bool ToASCII(std::string* input, std::string* output) { + static inline bool ToASCII(std::string* input, std::string* output) { *output = *input; return true; } @@ -619,41 +580,26 @@ namespace url { url->path.pop_back(); } - static void Parse(Environment* env, - Local recv, - const char* input, - const size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb) { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const bool has_base = base_obj->IsObject(); + void URL::Parse(const char* input, + const size_t len, + enum url_parse_state state_override, + struct url_data* url, + const struct url_data* base, + bool has_base) { bool atflag = false; bool sbflag = false; bool uflag = false; bool base_is_file = false; int wskip = 0; - struct url_data base; - struct url_data url; - if (context_obj->IsObject()) - HarvestContext(env, &url, context_obj.As()); - if (has_base) - HarvestBase(env, &base, base_obj.As()); - std::string buffer; - url.scheme.reserve(len); - url.username.reserve(len); - url.password.reserve(len); - url.host.reserve(len); - url.path.reserve(len); - url.query.reserve(len); - url.fragment.reserve(len); + url->scheme.reserve(len); + url->username.reserve(len); + url->password.reserve(len); + url->host.reserve(len); + url->path.reserve(len); + url->query.reserve(len); + url->fragment.reserve(len); buffer.reserve(len); // Set the initial parse state. @@ -665,8 +611,8 @@ namespace url { const char* end = input + len; if (state < kSchemeStart || state > kFragment) { - INVALID_PARSE_STATE(); - goto done; + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } while (p <= end) { @@ -684,7 +630,8 @@ namespace url { continue; } - bool special = url.flags & URL_FLAGS_SPECIAL; + bool special = (url->flags & URL_FLAGS_SPECIAL); + bool cannot_be_base; const bool special_back_slash = (special && ch == '\\'); switch (state) { case kSchemeStart: @@ -695,7 +642,8 @@ namespace url { state = kNoScheme; continue; } else { - TERMINATE() + url->flags |= URL_FLAGS_TERMINATED; + return; } break; case kScheme: @@ -706,23 +654,24 @@ namespace url { } else if (ch == ':' || (has_state_override && ch == kEOL)) { buffer += ':'; if (buffer.size() > 0) { - SET_HAVE_SCHEME() - url.scheme = buffer; + url->flags |= URL_FLAGS_HAS_SCHEME; + url->scheme = buffer; } - if (IsSpecial(url.scheme)) { - SPECIAL() + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; } else { - url.flags &= ~URL_FLAGS_SPECIAL; + url->flags &= ~URL_FLAGS_SPECIAL; } if (has_state_override) - goto done; + return; buffer.clear(); - if (url.scheme == "file:") { + if (url->scheme == "file:") { state = kFile; } else if (special && has_base && - DOES_HAVE_SCHEME(base) && - url.scheme == base.scheme) { + base->flags & URL_FLAGS_HAS_SCHEME && + url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { state = kSpecialAuthoritySlashes; @@ -730,9 +679,9 @@ namespace url { state = kPathOrAuthority; p++; } else { - CANNOT_BE_BASE() - SET_HAVE_PATH() - url.path.push_back(""); + url->flags |= URL_FLAGS_CANNOT_BE_BASE; + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); state = kCannotBeBase; } } else if (!has_state_override) { @@ -741,43 +690,48 @@ namespace url { p = input; continue; } else { - TERMINATE() + url->flags |= URL_FLAGS_TERMINATED; + return; } break; case kNoScheme: - if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) { - URL_FAILED() - } else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') { - SET_HAVE_SCHEME() - url.scheme = base.scheme; - if (IsSpecial(url.scheme)) { - SPECIAL() + cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE; + if (!has_base || (cannot_be_base && ch != '#')) { + url->flags |= URL_FLAGS_FAILED; + return; + } else if (cannot_be_base && ch == '#') { + url->flags |= URL_FLAGS_HAS_SCHEME; + url->scheme = base->scheme; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; } else { - url.flags &= ~URL_FLAGS_SPECIAL; + url->flags &= ~URL_FLAGS_SPECIAL; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - if (DOES_HAVE_QUERY(base)) { - SET_HAVE_QUERY() - url.query = base.query; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } - if (DOES_HAVE_FRAGMENT(base)) { - SET_HAVE_FRAGMENT() - url.fragment = base.fragment; + if (base->flags & URL_FLAGS_HAS_FRAGMENT) { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = base->fragment; } - CANNOT_BE_BASE() + url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; } else if (has_base && - DOES_HAVE_SCHEME(base) && - base.scheme != "file:") { + base->flags & URL_FLAGS_HAS_SCHEME && + base->scheme != "file:") { state = kRelative; continue; } else { - SET_HAVE_SCHEME() - url.scheme = "file:"; - SPECIAL() + url->flags |= URL_FLAGS_HAS_SCHEME; + url->scheme = "file:"; + url->flags |= URL_FLAGS_SPECIAL; + special = true; state = kFile; continue; } @@ -800,106 +754,107 @@ namespace url { } break; case kRelative: - SET_HAVE_SCHEME() - url.scheme = base.scheme; - if (IsSpecial(url.scheme)) { - SPECIAL() + url->flags |= URL_FLAGS_HAS_SCHEME; + url->scheme = base->scheme; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; } else { - url.flags &= ~URL_FLAGS_SPECIAL; + url->flags &= ~URL_FLAGS_SPECIAL; } switch (ch) { case kEOL: - if (DOES_HAVE_USERNAME(base)) { - SET_HAVE_USERNAME() - url.username = base.username; + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - if (DOES_HAVE_PASSWORD(base)) { - SET_HAVE_PASSWORD() - url.password = base.password; + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_QUERY(base)) { - SET_HAVE_QUERY() - url.query = base.query; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - url.port = base.port; + url->port = base->port; break; case '/': state = kRelativeSlash; break; case '?': - if (DOES_HAVE_USERNAME(base)) { - SET_HAVE_USERNAME() - url.username = base.username; + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - if (DOES_HAVE_PASSWORD(base)) { - SET_HAVE_PASSWORD() - url.password = base.password; + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - url.port = base.port; + url->port = base->port; state = kQuery; break; case '#': - if (DOES_HAVE_USERNAME(base)) { - SET_HAVE_USERNAME() - url.username = base.username; + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - if (DOES_HAVE_PASSWORD(base)) { - SET_HAVE_PASSWORD() - url.password = base.password; + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_QUERY(base)) { - SET_HAVE_QUERY() - url.query = base.query; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - url.port = base.port; + url->port = base->port; state = kFragment; break; default: if (special_back_slash) { state = kRelativeSlash; } else { - if (DOES_HAVE_USERNAME(base)) { - SET_HAVE_USERNAME() - url.username = base.username; + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - if (DOES_HAVE_PASSWORD(base)) { - SET_HAVE_PASSWORD() - url.password = base.password; + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; - ShortenUrlPath(&url); + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + ShortenUrlPath(url); } - url.port = base.port; + url->port = base->port; state = kPath; continue; } @@ -909,19 +864,19 @@ namespace url { if (ch == '/' || special_back_slash) { state = kSpecialAuthorityIgnoreSlashes; } else { - if (DOES_HAVE_USERNAME(base)) { - SET_HAVE_USERNAME() - url.username = base.username; + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - if (DOES_HAVE_PASSWORD(base)) { - SET_HAVE_PASSWORD() - url.password = base.password; + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - url.port = base.port; + url->port = base->port; state = kPath; continue; } @@ -949,21 +904,21 @@ namespace url { atflag = true; const size_t blen = buffer.size(); if (blen > 0 && buffer[0] != ':') { - SET_HAVE_USERNAME() + url->flags |= URL_FLAGS_HAS_USERNAME; } for (size_t n = 0; n < blen; n++) { const char bch = buffer[n]; if (bch == ':') { - SET_HAVE_PASSWORD() + url->flags |= URL_FLAGS_HAS_PASSWORD; if (!uflag) { uflag = true; continue; } } if (uflag) { - AppendOrEscape(&url.password, bch, UserinfoEncodeSet); + AppendOrEscape(&url->password, bch, UserinfoEncodeSet); } else { - AppendOrEscape(&url.username, bch, UserinfoEncodeSet); + AppendOrEscape(&url->username, bch, UserinfoEncodeSet); } } buffer.clear(); @@ -982,30 +937,42 @@ namespace url { case kHost: case kHostname: if (ch == ':' && !sbflag) { - if (special && buffer.size() == 0) - URL_FAILED() - SET_HAVE_HOST() - if (!ParseHost(&buffer, &url.host)) - URL_FAILED() + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } buffer.clear(); state = kPort; - if (state_override == kHostname) - TERMINATE() + if (state_override == kHostname) { + url->flags |= URL_FLAGS_TERMINATED; + return; + } } else if (ch == kEOL || ch == '/' || ch == '?' || ch == '#' || special_back_slash) { p--; - if (special && buffer.size() == 0) - URL_FAILED() - SET_HAVE_HOST() - if (!ParseHost(&buffer, &url.host)) - URL_FAILED() + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } buffer.clear(); state = kPathStart; - if (has_state_override) - TERMINATE() + if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; + return; + } } else { if (ch == '[') sbflag = true; @@ -1028,37 +995,39 @@ namespace url { for (size_t i = 0; i < buffer.size(); i++) port = port * 10 + buffer[i] - '0'; if (port >= 0 && port <= 0xffff) { - url.port = NormalizePort(url.scheme, port); + url->port = NormalizePort(url->scheme, port); } else if (!has_state_override) { - URL_FAILED() + url->flags |= URL_FLAGS_FAILED; + return; } buffer.clear(); } state = kPathStart; continue; } else { - URL_FAILED(); + url->flags |= URL_FLAGS_FAILED; + return; } break; case kFile: base_is_file = ( has_base && - DOES_HAVE_SCHEME(base) && - base.scheme == "file:"); + base->flags & URL_FLAGS_HAS_SCHEME && + base->scheme == "file:"); switch (ch) { case kEOL: if (base_is_file) { - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - if (DOES_HAVE_QUERY(base)) { - SET_HAVE_QUERY() - url.query = base.query; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } break; } @@ -1070,31 +1039,31 @@ namespace url { break; case '?': if (base_is_file) { - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - SET_HAVE_QUERY() + url->flags |= URL_FLAGS_HAS_QUERY; state = kQuery; break; } case '#': if (base_is_file) { - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - if (DOES_HAVE_QUERY(base)) { - SET_HAVE_QUERY() - url.query = base.query; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } state = kFragment; break; @@ -1107,15 +1076,15 @@ namespace url { p[2] != '\\' && p[2] != '?' && p[2] != '#'))) { - if (DOES_HAVE_HOST(base)) { - SET_HAVE_HOST() - url.host = base.host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - if (DOES_HAVE_PATH(base)) { - SET_HAVE_PATH() - url.path = base.path; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - ShortenUrlPath(&url); + ShortenUrlPath(url); } state = kPath; continue; @@ -1126,13 +1095,13 @@ namespace url { state = kFileHost; } else { if (has_base && - DOES_HAVE_SCHEME(base) && - base.scheme == "file:" && - DOES_HAVE_PATH(base) && - base.path.size() > 0 && - NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) { - SET_HAVE_PATH() - url.path.push_back(base.path[0]); + base->flags & URL_FLAGS_HAS_SCHEME && + base->scheme == "file:" && + base->flags & URL_FLAGS_HAS_PATH && + base->path.size() > 0 && + NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(base->path[0]); } state = kPath; continue; @@ -1151,9 +1120,11 @@ namespace url { state = kPathStart; } else { if (buffer != "localhost") { - SET_HAVE_HOST() - if (!ParseHost(&buffer, &url.host)) - URL_FAILED() + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } } buffer.clear(); state = kPathStart; @@ -1174,32 +1145,32 @@ namespace url { special_back_slash || (!has_state_override && (ch == '?' || ch == '#'))) { if (IsDoubleDotSegment(buffer)) { - ShortenUrlPath(&url); + ShortenUrlPath(url); if (ch != '/' && !special_back_slash) { - SET_HAVE_PATH() - url.path.push_back(""); + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); } } else if (IsSingleDotSegment(buffer)) { if (ch != '/' && !special_back_slash) { - SET_HAVE_PATH(); - url.path.push_back(""); + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); } } else { - if (DOES_HAVE_SCHEME(url) && - url.scheme == "file:" && - url.path.empty() && + if (url->flags & URL_FLAGS_HAS_SCHEME && + url->scheme == "file:" && + url->path.empty() && buffer.size() == 2 && WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { - url.flags &= ~URL_FLAGS_HAS_HOST; + url->flags &= ~URL_FLAGS_HAS_HOST; buffer[1] = ':'; } - SET_HAVE_PATH() + url->flags |= URL_FLAGS_HAS_PATH; std::string segment(buffer.c_str(), buffer.size()); - url.path.push_back(segment); + url->path.push_back(segment); } buffer.clear(); if (ch == '?') { - SET_HAVE_QUERY() + url->flags |= URL_FLAGS_HAS_QUERY; state = kQuery; } else if (ch == '#') { state = kFragment; @@ -1217,16 +1188,16 @@ namespace url { state = kFragment; break; default: - if (url.path.size() == 0) - url.path.push_back(""); - if (url.path.size() > 0 && ch != kEOL) - AppendOrEscape(&url.path[0], ch, SimpleEncodeSet); + if (url->path.size() == 0) + url->path.push_back(""); + if (url->path.size() > 0 && ch != kEOL) + AppendOrEscape(&url->path[0], ch, SimpleEncodeSet); } break; case kQuery: if (ch == kEOL || (!has_state_override && ch == '#')) { - SET_HAVE_QUERY() - url.query = buffer; + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = buffer; buffer.clear(); if (ch == '#') state = kFragment; @@ -1237,8 +1208,8 @@ namespace url { case kFragment: switch (ch) { case kEOL: - SET_HAVE_FRAGMENT() - url.fragment = buffer; + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = buffer; break; case 0: break; @@ -1247,14 +1218,39 @@ namespace url { } break; default: - INVALID_PARSE_STATE() - goto done; + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } p++; } + } + + static void Parse(Environment* env, + Local recv, + const char* input, + const size_t len, + enum url_parse_state state_override, + Local base_obj, + Local context_obj, + Local cb) { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const bool has_base = base_obj->IsObject(); - done: + struct url_data base; + struct url_data url; + if (context_obj->IsObject()) + HarvestContext(env, &url, context_obj.As()); + if (has_base) + HarvestBase(env, &base, base_obj.As()); + + URL::Parse(input, len, state_override, &url, &base, has_base); + if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) + return; // Define the return value placeholders const Local undef = Undefined(isolate); @@ -1271,22 +1267,22 @@ namespace url { }; argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - if (!IS_FAILED(url.flags)) { - if (DOES_HAVE_SCHEME(url)) + if (!(url.flags & URL_FLAGS_FAILED)) { + if (url.flags & URL_FLAGS_HAS_SCHEME) argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); - if (DOES_HAVE_USERNAME(url)) + if (url.flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); - if (DOES_HAVE_PASSWORD(url)) + if (url.flags & URL_FLAGS_HAS_PASSWORD) argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); - if (DOES_HAVE_HOST(url)) + if (url.flags & URL_FLAGS_HAS_HOST) argv[ARG_HOST] = UTF8STRING(isolate, url.host); - if (DOES_HAVE_QUERY(url)) + if (url.flags & URL_FLAGS_HAS_QUERY) argv[ARG_QUERY] = UTF8STRING(isolate, url.query); - if (DOES_HAVE_FRAGMENT(url)) + if (url.flags & URL_FLAGS_HAS_FRAGMENT) argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); if (url.port > -1) argv[ARG_PORT] = Integer::New(isolate, url.port); - if (DOES_HAVE_PATH(url)) + if (url.flags & URL_FLAGS_HAS_PATH) argv[ARG_PATH] = Copy(env, url.path); } diff --git a/src/node_url.h b/src/node_url.h index ba05cd6fed65d2..49f6de866da501 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -470,19 +470,19 @@ enum url_parse_state { #define XX(name) name, PARSESTATES(XX) #undef XX -} url_parse_state; +}; enum url_flags { #define XX(name, val) name = val, FLAGS(XX) #undef XX -} url_flags; +}; enum url_cb_args { #define XX(name) name, ARGS(XX) #undef XX -} url_cb_args; +}; static inline bool IsSpecial(std::string scheme) { #define XX(name, _) if (scheme == name) return true; @@ -528,6 +528,91 @@ struct url_host { url_host_value value; enum url_host_type type; }; + +class URL { + public: + static void Parse(const char* input, + const size_t len, + enum url_parse_state state_override, + struct url_data* url, + const struct url_data* base, + bool has_base); + + URL(const char* input, const size_t len) { + Parse(input, len, kUnknownState, &context_, nullptr, false); + } + + URL(const char* input, const size_t len, const URL* base) { + if (base != nullptr) + Parse(input, len, kUnknownState, &context_, &(base->context_), true); + else + Parse(input, len, kUnknownState, &context_, nullptr, false); + } + + URL(const char* input, const size_t len, + const char* base, const size_t baselen) { + if (base != nullptr && baselen > 0) { + URL _base(base, baselen); + Parse(input, len, kUnknownState, &context_, &(_base.context_), true); + } else { + Parse(input, len, kUnknownState, &context_, nullptr, false); + } + } + + explicit URL(std::string input) : + URL(input.c_str(), input.length()) {} + + URL(std::string input, const URL* base) : + URL(input.c_str(), input.length(), base) {} + + URL(std::string input, std::string base) : + URL(input.c_str(), input.length(), base.c_str(), base.length()) {} + + int32_t flags() { + return context_.flags; + } + + int port() { + return context_.port; + } + + const std::string& protocol() const { + return context_.scheme; + } + + const std::string& username() const { + return context_.username; + } + + const std::string& password() const { + return context_.password; + } + + const std::string& host() const { + return context_.host; + } + + const std::string& query() const { + return context_.query; + } + + const std::string& fragment() const { + return context_.fragment; + } + + std::string path() { + std::string ret; + for (auto i = context_.path.begin(); i != context_.path.end(); i++) { + ret += '/'; + ret += *i; + } + return ret; + } + + private: + struct url_data context_; +}; + } // namespace url } // namespace node