diff --git a/lib/_http_parser.js b/lib/_http_parser.js index b723cfed74a1f6..ffc9902d798c2d 100644 --- a/lib/_http_parser.js +++ b/lib/_http_parser.js @@ -24,8 +24,6 @@ Misc differences with joyent/http-parser: joyent/http-parser keeps trailing whitespace. This parser keeps neither preceding nor trailing whitespace. - * Enforces CRLF for line endings instead of additionally allowing just LF. - * Does not allow spaces (which are invalid) in header field names. * Smaller maximum chunk/content length (2^53-1 vs 2^64-2). Obviously it's @@ -46,183 +44,32 @@ var LF = 10; var MAX_CHUNK_SIZE = Number.MAX_SAFE_INTEGER; // 9007199254740991 -var UNHEX = { - 48: 0, - 49: 1, - 50: 2, - 51: 3, - 52: 4, - 53: 5, - 54: 6, - 55: 7, - 56: 8, - 57: 9, - 65: 10, - 66: 11, - 67: 12, - 68: 13, - 69: 14, - 70: 15, - 97: 10, - 98: 11, - 99: 12, - 100: 13, - 101: 14, - 102: 15 -}; - -// RFC 7230 recommends at least 8000 max bytes for request line, but no -// recommendation for status lines +// RFC 7230 recommends HTTP implementations support at least 8000 bytes for +// the request line. We use 8190 by default, the same as Apache. +HTTPParser.MAX_REQ_LINE = 8190; +// RFC 7230 does not have any recommendations for minimum response line length +// support. Judging by the (current) longest standard status reason text, the +// typical response line will be 44 bytes or less (not including (CR)LF). Since +// the reason text field is free form though, we will roughly triple that +// amount for the default. +HTTPParser.MAX_RES_LINE = 128; + +// This is the total limit for start line + all headers was copied from +// joyent/http-parser. var MAX_HEADER_BYTES = 80 * 1024; -var RE_CONN_CLOSE = /(?:^|[\t ,]+)close(?:$|[\t ,]+)/i; -var RE_CONN_KEEPALIVE = /(?:^|[\t ,]+)keep\-alive(?:$|[\t ,]+)/i; -var RE_CONN_UPGRADE = /(?:^|[\t ,]+)upgrade(?:$|[\t ,]+)/i; -var RE_TE_CHUNKED = /(?:^|[\t ,]+)chunked(?:$|[\t ,]+)/i; +var RE_CONN_CLOSE = /(?:^|[\t ,]+)close(?:\r?$||[\t ,]+)/i; +var RE_CONN_KEEPALIVE = /(?:^|[\t ,]+)keep\-alive(?:\r?$|[\t ,]+)/i; +var RE_CONN_UPGRADE = /(?:^|[\t ,]+)upgrade(?:\r?$|[\t ,]+)/i; +var RE_TE_CHUNKED = /(?:^|[\t ,]+)chunked(?:\r?$|[\t ,]+)/i; var CC_CONNECT = 'connect'.split('').map(getFirstCharCode); var CC_CONNECTION = 'connection'.split('').map(getFirstCharCode); var CC_TE = 'transfer-encoding'.split('').map(getFirstCharCode); var CC_UPGRADE = 'upgrade'.split('').map(getFirstCharCode); var CC_CONTLEN = 'content-length'.split('').map(getFirstCharCode); - -// URI-parsing Regular Expressions ... - -// Note: double quotes are not allowed anywhere in request URIs, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -// Note: non-ASCII characters are not allowed anywhere in request URIs, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -var RE_PCHAR = /(?:[A-Za-z0-9\-._~!$&'()*+,;=:@"\x80-\xFF]|%[0-9A-Fa-f]{2})/; -var RE_ABS_PATH = new RegExp('(?:/' + RE_PCHAR.source + '*)+'); -// Note: double quotes are not allowed anywhere in request URIs, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -// Note: non-ASCII characters are not allowed anywhere in request URIs, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -var RE_QUERY = /(?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?"\x80-\xFF]|%[0-9A-Fa-f]{2})*/; -// Note: fragments are technically not allowed in the request line, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -var RE_ORIGIN_FORM = new RegExp('(?:' + RE_ABS_PATH.source + '(?:\\?' + - RE_QUERY.source + ')?(?:#' + RE_QUERY.source + - ')?)'); - -var RE_SCHEME = /[A-Za-z][A-Za-z0-9+\-.]*/; -var RE_USERINFO = /(?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*/; -var RE_IPV4_OCTET = /(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/; -var RE_IPV4 = new RegExp('(?:' + RE_IPV4_OCTET.source + '\\.){3}' + - RE_IPV4_OCTET.source); -var RE_H16 = /[0-9A-Fa-f]{1,4}/; -var RE_LS32 = new RegExp('(?:' + RE_H16.source + ':' + RE_H16.source + ')|(?:' + - RE_IPV4.source + ')'); -var RE_H16_COLON = new RegExp('(?:' + RE_H16.source + ':)'); -var RE_IPV6 = new RegExp('(?:' + - // Begin LS32 postfix cases - '(?:' + - [ - RE_H16_COLON.source + '{6}', - '::' + RE_H16_COLON.source + '{5}', - '(?:' + RE_H16.source + ')?::' + RE_H16_COLON.source + '{4}', - '(?:' + RE_H16_COLON.source + '{0,1}' + RE_H16.source + ')?::' + - RE_H16_COLON.source + '{3}', - '(?:' + RE_H16_COLON.source + '{0,2}' + RE_H16.source + ')?::' + - RE_H16_COLON.source + '{2}', - '(?:' + RE_H16_COLON.source + '{0,3}' + RE_H16.source + ')?::' + - RE_H16_COLON.source, - '(?:' + RE_H16_COLON.source + '{0,4}' + RE_H16.source + ')?::', - ].join(')|(?:') + - ')(?:' + RE_LS32.source + ')' + - // End LS32 postfix cases - ')' + - '|(?:(?:' + RE_H16_COLON.source + '{0,5}' + RE_H16.source + ')?::' + - RE_H16.source + ')' + - '|(?:(?:' + RE_H16_COLON.source + '{0,6}' + RE_H16.source + ')?::)'); -var RE_REGNAME = /(?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*/; -var RE_HOST = new RegExp('(?:(?:\\[' + RE_IPV6.source + '\\])|(?:' + - RE_IPV4.source + ')|' + RE_REGNAME.source + ')'); -var RE_AUTHORITY = new RegExp('(?:(?:' + RE_USERINFO.source + '@)?' + - RE_HOST.source + '(?::[0-9]*)?)'); -var RE_PATH_ABEMPTY = new RegExp('(?:/' + RE_PCHAR.source + '*)*'); -var RE_PATH_ROOTLESS = new RegExp('(?:' + RE_PCHAR.source + '+' + - RE_PATH_ABEMPTY.source + ')'); -var RE_PATH_ABSOLUTE = new RegExp('(?:/' + RE_PATH_ROOTLESS.source + '?)'); -var RE_HIER_PART = new RegExp('(?:(?://' + RE_AUTHORITY.source + - RE_PATH_ABEMPTY.source + ')|' + - RE_PATH_ABSOLUTE.source + '|' + - RE_PATH_ROOTLESS.source + '|)'); -// Note: fragments are technically not allowed in the request line, but -// joyent/http-parser allowed it previously so we do too for better backwards -// compatibility ... -var RE_ABSOLUTE_FORM = new RegExp('(?:' + RE_SCHEME.source + ':' + - RE_HIER_PART.source + '(?:\\?' + - RE_QUERY.source + ')?(?:#' + - RE_QUERY.source + ')?)'); - -var RE_REQUEST_TARGET = new RegExp('(?:' + RE_ORIGIN_FORM.source + '|' + - RE_ABSOLUTE_FORM.source + '|' + - RE_AUTHORITY.source + '|\\*)'); -var RE_REQUEST_LINE = new RegExp('^([!#$%\'*+\\-.^_`|~0-9A-Za-z]+) (' + - RE_REQUEST_TARGET.source + - ')(?: HTTP\\/1\\.([01]))?$'); -/* -request-target = origin-form | absolute-form | authority-form | - asterisk-form - origin-form = absolute-path [ "?" query ] - absolute-path = 1*( "/" segment ) - segment = *pchar - pchar = unreserved | pct-encoded | sub-delims | ":" | "@" - unreserved = ALPHA | DIGIT | "-" | "." | "_" | "~" - pct-encoded = "%" HEXDIG HEXDIG - HEXDIG = DIGIT | "A" | "B" | "C" | "D" | "E" | "F" - sub-delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | - "," | ";" | "=" - query = *( pchar | "/" | "?" ) - absolute-form = absolute-URI - absolute-URI = scheme ":" hier-part [ "?" query ] - scheme = alpha *( alpha | digit | "+" | "-" | "." ) - hier-part = "//" authority path-abempty | path-absolute | - path-rootless | path-empty - authority = [ userinfo "@" ] host [ ":" port ] - userinfo = *( unreserved | pct-encoded | sub-delims | ":" ) - host = IP-literal | IPv4address | reg-name - IP-literal = "[" ( IPv6address | IPvFuture ) "]" - IPv6address = 6( h16 ":" ) ls32 - | "::" 5( h16 ":" ) ls32 - | [ h16 ] "::" 4( h16 ":" ) ls32 - | [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - | [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - | [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - | [ *4( h16 ":" ) h16 ] "::" ls32 - | [ *5( h16 ":" ) h16 ] "::" h16 - | [ *6( h16 ":" ) h16 ] "::" - h16 = 1*4HEXDIG - ls32 = ( h16 ":" h16 ) | IPv4address - IPv4address = dec-octet "." dec-octet "." dec-octet "." - dec-octet - dec-octet = DIGIT ; 0-9 - | %x31-39 DIGIT ; 10-99 - | "1" 2DIGIT ; 100-199 - | "2" %x30-34 DIGIT ; 200-249 - | "25" %x30-35 ; 250-255 - reg-name = *( unreserved | pct-encoded | sub-delims ) - port = *DIGIT - path-abempty = *( "/" segment ) - path-absolute = "/" [ segment-nz *( "/" segment ) ] - segment-nz = 1*pchar - path-rootless = segment-nz *( "/" segment ) - path-empty = 0 - authority-form = authority - asterisk-form = "*" -*/ - -// Note: AT LEAST a space is technically required after the status code, but -// joyent/http-parser allows a CRLF immediately following the status code, so we -// do also for backwards compatibility ... -var RE_STATUS_LINE = /^HTTP\/1\.([01]) ([0-9]{3})(?: (.*))?$/; - -var RE_HEADER = /^([!#$%'*+\-.^_`|~0-9A-Za-z]+):(.*)$/; +var REQ_HTTP_VER_BYTES = ' HTTP/1.'.split('').map(getFirstCharCode); +var RES_HTTP_VER_BYTES = REQ_HTTP_VER_BYTES.slice(1); +REQ_HTTP_VER_BYTES.reverse(); var STATE_REQ_LINE = 0; var STATE_STATUS_LINE = 1; @@ -230,9 +77,10 @@ var STATE_HEADER = 2; var STATE_BODY_LITERAL = 3; var STATE_BODY_EOF = 4; var STATE_BODY_CHUNKED_SIZE = 5; -var STATE_BODY_CHUNKED_BYTES = 6; -var STATE_BODY_CHUNKED_BYTES_CRLF = 7; -var STATE_COMPLETE = 8; +var STATE_BODY_CHUNKED_SIZE_IGNORE = 6; +var STATE_BODY_CHUNKED_BYTES = 7; +var STATE_BODY_CHUNKED_BYTES_LF = 8; +var STATE_COMPLETE = 9; var FLAG_CHUNKED = 1 << 0; var FLAG_CONNECTION_KEEP_ALIVE = 1 << 1; @@ -267,11 +115,10 @@ HTTPParser.prototype.reinitialize = function(type) { this._err = null; this._flags = 0; this._contentLen = null; - this._nbytes = 0; + this._nbytes = null; this._nhdrbytes = 0; this._nhdrpairs = 0; this._buf = ''; - this._seenCR = false; // common properties this.headers = []; @@ -309,129 +156,6 @@ HTTPParser.prototype._setError = function(msg) { this._err = err; return err; }; -HTTPParser.prototype._processHdrLine = function(line) { - switch (this._state) { - case STATE_HEADER: - if (line.length === 0) { - // We saw a double CRLF - this._headersEnd(); - return; - } - var headers = this.headers; - var headerslen = headers.length; - var fieldName; - var fieldValue; - var m = RE_HEADER.exec(line); - if (m === null) { - var firstChr = line.charCodeAt(0); - if (firstChr !== 32 & firstChr !== 9) - return this._setError('Malformed header line'); - // RFC 7230 compliant, but less backwards compatible: - var extra = ltrim(line); - if (extra.length > 0) { - if (headerslen === 0) - return this._setError('Malformed header line'); - fieldName = headers[headerslen - 2]; - fieldValue = headers[headerslen - 1] + ' ' + extra; - // Need to re-check value since matched values may now exist ... - if (equalsLower(fieldName, CC_CONNECTION)) { - if (fieldValue.search(RE_CONN_CLOSE) > -1) - this._flags |= FLAG_CONNECTION_CLOSE; - if (fieldValue.search(RE_CONN_KEEPALIVE) > -1) - this._flags |= FLAG_CONNECTION_KEEP_ALIVE; - if (fieldValue.search(RE_CONN_UPGRADE) > -1) - this._flags |= FLAG_CONNECTION_UPGRADE; - } else if (equalsLower(fieldName, CC_TE)) { - if (fieldValue.search(RE_TE_CHUNKED) > -1) - this._flags |= FLAG_CHUNKED; - } else if (equalsLower(fieldName, CC_UPGRADE)) { - this._flags |= FLAG_UPGRADE; - } else if (equalsLower(fieldName, CC_CONTLEN)) { - var val = parseInt(fieldValue, 10); - if (val !== val || val > MAX_CHUNK_SIZE) - return this._setError('Bad Content-Length: ' + inspect(val)); - this._contentLen = val; - } - headers[headerslen - 1] = fieldValue; - } - } else { - // Ensures that trailing whitespace after the last folded line for - // header values gets trimmed - if (headerslen > 0) - headers[headerslen - 1] = trim(headers[headerslen - 1]); - // m[1]: field name - // m[2]: field value - fieldName = m[1]; - fieldValue = m[2]; - if (equalsLower(fieldName, CC_CONNECTION)) { - if (fieldValue.search(RE_CONN_CLOSE) > -1) - this._flags |= FLAG_CONNECTION_CLOSE; - if (fieldValue.search(RE_CONN_KEEPALIVE) > -1) - this._flags |= FLAG_CONNECTION_KEEP_ALIVE; - if (fieldValue.search(RE_CONN_UPGRADE) > -1) - this._flags |= FLAG_CONNECTION_UPGRADE; - } else if (equalsLower(fieldName, CC_TE)) { - if (fieldValue.search(RE_TE_CHUNKED) > -1) - this._flags |= FLAG_CHUNKED; - } else if (equalsLower(fieldName, CC_UPGRADE)) { - this._flags |= FLAG_UPGRADE; - } else if (equalsLower(fieldName, CC_CONTLEN)) { - var val = parseInt(fieldValue, 10); - if (val !== val || val > MAX_CHUNK_SIZE) - return this._setError('Bad Content-Length: ' + inspect(val)); - this._contentLen = val; - } - var maxHeaderPairs = this.maxHeaderPairs; - if (maxHeaderPairs <= 0 || ++this._nhdrpairs < maxHeaderPairs) - headers.push(fieldName, fieldValue); - } - break; - case STATE_REQ_LINE: - // Original HTTP parser ignored blank lines before request/status line, - // so we do that here too ... - if (line.length === 0) - return true; - var m = RE_REQUEST_LINE.exec(line); - if (m === null) - return this._setError('Malformed request line'); - // m[1]: HTTP method - // m[2]: request target - // m[3]: HTTP minor version - this.method = m[1]; - this.url = m[2]; - var minor = m[3]; - if (minor === undefined) { - // HTTP/0.9 ugh... - if (m[1] !== 'GET') - return this._setError('Malformed request line'); - this.httpMajor = 0; - this.httpMinor = 9; - this._headersEnd(); - } else { - this.httpMinor = (minor === '1' ? 1 : 0); - this._state = STATE_HEADER; - } - break; - case STATE_STATUS_LINE: - // Original HTTP parser ignored blank lines before request/status line, - // so we do that here too ... - if (line.length === 0) - return true; - var m = RE_STATUS_LINE.exec(line); - if (m === null) - return this._setError('Malformed status line'); - // m[1]: HTTP minor version - // m[2]: HTTP status code - // m[3]: Reason text - this.httpMinor = (m[1] === '1' ? 1 : 0); - this.statusCode = parseInt(m[2], 10); - this.statusText = m[3] || ''; - this._state = STATE_HEADER; - break; - default: - return this._setError('Unexpected HTTP parser state: ' + this._state); - } -}; HTTPParser.prototype._headersEnd = function() { var flags = this._flags; var type = this.type; @@ -447,8 +171,7 @@ HTTPParser.prototype._headersEnd = function() { var ret; this._buf = ''; - this._seenCR = false; - this._nbytes = 0; + this._nbytes = null; if ((flags & FLAG_CHUNKED) > 0) { this._state = STATE_BODY_CHUNKED_SIZE; @@ -504,8 +227,10 @@ HTTPParser.prototype._executeStartLine = function(data) { if (data.length === 0) return 0; var firstByte = data[0]; - if ((firstByte < 32 || firstByte >= 127) && firstByte !== CR) - return this._setError('Invalid byte(s) in start line'); + if ((firstByte < 32 || firstByte >= 127) && firstByte !== CR && + firstByte !== LF) { + return this._setError('Invalid byte in start line'); + } this.execute = this._executeHeader; return this.execute(data); }; @@ -516,25 +241,40 @@ HTTPParser.prototype._executeHeader = function(data) { return 0; var offset = 0; - var seenCR = this._seenCR; var buf = this._buf; var nhdrbytes = this._nhdrbytes; + var state = this._state; + var headers = this.headers; + var headerslen = headers.length; + var maxHeaderPairs = this.maxHeaderPairs; var ret; while (offset < len) { - if (seenCR) { - seenCR = false; - if (data[offset] === LF) { - // Our internal buffer contains a full line - ++offset; - ret = this._processHdrLine(buf); - buf = ''; - if (typeof ret === 'object') - return ret; - else if (ret === undefined) { - var state = this._state; - if (state !== STATE_HEADER) { - // Begin of body or end of message + ret = indexOfLF(data, len, offset); + if (ret > -1) { + // Our internal buffer contains a full line + var bytesToAdd = ret - offset; + if (bytesToAdd > 0) { + nhdrbytes += bytesToAdd; + if (state === STATE_REQ_LINE && nhdrbytes > HTTPParser.MAX_REQ_LINE) + return this._setError('Request line limit exceeded'); + else if (state === STATE_STATUS_LINE && + nhdrbytes > HTTPParser.MAX_RES_LINE) { + return this._setError('Response line limit exceeded'); + } else if (nhdrbytes > MAX_HEADER_BYTES) + return this._setError('Header limit exceeded'); + buf += data.toString('binary', offset, ret); + } + + offset = ret + 1; + var buflen = buf.length; + + switch (state) { + case STATE_HEADER: + if (buflen === 0 || buf.charCodeAt(0) === CR) { + // We saw a double line ending + this._headersEnd(); + state = this._state; if (state < STATE_COMPLETE && offset < len) { // Execute extra body bytes ret = this.execute(data.slice(offset)); @@ -545,71 +285,219 @@ HTTPParser.prototype._executeHeader = function(data) { this.reinitialize(this.type); return offset; } - } - } else { - // False match - buf += '\r'; - ++nhdrbytes; - if (nhdrbytes > MAX_HEADER_BYTES) { - return this._setError('Header size limit exceeded (' + - MAX_HEADER_BYTES + ')'); - } - } - } - ret = indexOfCRLF(data, len, offset); - if (ret > -1) { - // Our internal buffer contains a full line - var bytesToAdd = ret - offset; - if (bytesToAdd > 0) { - nhdrbytes += bytesToAdd; - if (nhdrbytes > MAX_HEADER_BYTES) { - return this._setError('Header size limit exceeded (' + - MAX_HEADER_BYTES + ')'); - } - buf += data.toString('binary', offset, ret); + var idx = -1; + var fieldName; + var fieldValue; + var valueStart = -1; + var validFieldName = true; + for (var i = 0; i < buflen; ++i) { + var ch = buf.charCodeAt(i); + if (idx === -1) { + if (ch === 58) { // ':' + if (i === 0 || !validFieldName) + return this._setError('Malformed header line'); + idx = i; + } else if (ch < 33 || ch > 126) + validFieldName = false; + } else if (ch !== 32 && ch !== 9) { + valueStart = i; + break; + } + } + if (idx === -1) { + var firstChr = buf.charCodeAt(0); + if (firstChr !== 32 & firstChr !== 9) + return this._setError('Malformed header line'); + // RFC 7230 compliant, but less backwards compatible: + var extra = ltrim(buf); + if (extra.length > 0) { + if (headerslen === 0) + return this._setError('Malformed header line'); + fieldName = headers[headerslen - 2]; + fieldValue = headers[headerslen - 1]; + if (fieldValue.length > 0) { + if (fieldValue.charCodeAt(fieldValue.length - 1) === CR) + fieldValue = fieldValue.slice(0, -1); + if (fieldValue.length > 0) + fieldValue += ' ' + extra; + else + fieldValue = extra; + } else + fieldValue = extra; + // Need to re-check value since matched values may now exist ... + if (equalsLower(fieldName, CC_CONNECTION)) { + if (fieldValue.search(RE_CONN_CLOSE) > -1) + this._flags |= FLAG_CONNECTION_CLOSE; + if (fieldValue.search(RE_CONN_KEEPALIVE) > -1) + this._flags |= FLAG_CONNECTION_KEEP_ALIVE; + if (fieldValue.search(RE_CONN_UPGRADE) > -1) + this._flags |= FLAG_CONNECTION_UPGRADE; + } else if (equalsLower(fieldName, CC_TE)) { + if (fieldValue.search(RE_TE_CHUNKED) > -1) + this._flags |= FLAG_CHUNKED; + } else if (equalsLower(fieldName, CC_UPGRADE)) { + this._flags |= FLAG_UPGRADE; + } else if (equalsLower(fieldName, CC_CONTLEN)) { + var val = parseInt(fieldValue, 10); + if (val !== val || val > MAX_CHUNK_SIZE) + return this._setError('Bad Content-Length: ' + inspect(val)); + this._contentLen = val; + } + headers[headerslen - 1] = fieldValue; + } + } else { + fieldName = buf.slice(0, idx); + fieldValue = valueStart === -1 ? '' : buf.slice(valueStart); + // Ensures that trailing whitespace after the last folded line for + // header values gets trimmed + if (headerslen > 0) + headers[headerslen - 1] = rtrim(headers[headerslen - 1]); + if (equalsLower(fieldName, CC_CONNECTION)) { + if (fieldValue.search(RE_CONN_CLOSE) > -1) + this._flags |= FLAG_CONNECTION_CLOSE; + if (fieldValue.search(RE_CONN_KEEPALIVE) > -1) + this._flags |= FLAG_CONNECTION_KEEP_ALIVE; + if (fieldValue.search(RE_CONN_UPGRADE) > -1) + this._flags |= FLAG_CONNECTION_UPGRADE; + } else if (equalsLower(fieldName, CC_TE)) { + if (fieldValue.search(RE_TE_CHUNKED) > -1) + this._flags |= FLAG_CHUNKED; + } else if (equalsLower(fieldName, CC_UPGRADE)) { + this._flags |= FLAG_UPGRADE; + } else if (equalsLower(fieldName, CC_CONTLEN)) { + var val = parseInt(fieldValue, 10); + if (val !== val || val > MAX_CHUNK_SIZE) + return this._setError('Bad Content-Length: ' + inspect(val)); + this._contentLen = val; + } + if (maxHeaderPairs <= 0 || ++this._nhdrpairs < maxHeaderPairs) { + headers.push(fieldName, fieldValue); + headerslen += 2; + } + } + break; + case STATE_REQ_LINE: + // Original HTTP parser ignored blank lines before request/status + // line, so we do that here too ... + if (buflen === 0 || buf.charCodeAt(0) === CR) + break; + + var firstSP; + var urlStart; + var urlEnd; + var minor; + var end = (buf.charCodeAt(buflen - 1) === CR ? + buflen - 3 : buflen - 2); + // Start working backwards and both validate that the line ends in + // ` HTTP/1.[01]` and find the end of the URL (in case there are + // multiple spaces/tabs separating the URL and HTTP version + var ch = buf.charCodeAt(end + 1); + if (ch === 49) + minor = 1; + else if (ch === 48) + minor = 0; + else + return this._setError('Malformed request line'); + var h = 0; + while (end >= 0) { + var ch = buf.charCodeAt(end); + if (h < 8) { + if (ch !== REQ_HTTP_VER_BYTES[h++]) + return this._setError('Malformed request line'); + } else if (ch >= 33 && ch !== 127) { + urlEnd = end + 1; + break; + } + --end; + } + if (urlEnd === undefined) + return this._setError('Malformed request line'); + + // Now start working forwards and both validate the HTTP method and + // find the start of the URL (in case there are multiple spaces/tabs + // separating the method and the URL + for (var i = 0; i < urlEnd; ++i) { + ch = buf.charCodeAt(i); + if (firstSP !== undefined) { + if (ch >= 33 && ch !== 127) { + urlStart = i; + break; + } + } else if (ch === 32) + firstSP = i; + else if (ch < 33 || ch > 126) + return this._setError('Malformed request line'); + } + if (firstSP === undefined || + urlStart === undefined || + urlStart === urlEnd) { + return this._setError('Malformed request line'); + } + + this.httpMinor = minor; + this.method = buf.slice(0, firstSP); + this.url = buf.slice(urlStart, urlEnd); + state = STATE_HEADER; + break; + case STATE_STATUS_LINE: + // Original HTTP parser ignored blank lines before request/status + // line, so we do that here too ... + if (buflen === 0 || buf.charCodeAt(0) === CR) + break; + + // Validate HTTP version + for (var h = 0; i < 7; ++h) { + if (buf.charCodeAt(i) !== RES_HTTP_VER_BYTES[h]) + return this._setError('Malformed status line'); + } + var minor; + var status = 0; + if (buf.charCodeAt(7) === 49) + minor = 1; + else if (buf.charCodeAt(7) === 48) + minor = 0; + else + return this._setError('Malformed status line'); + if (buf.charCodeAt(8) !== 32) + return this._setError('Malformed status line'); + + // Validate status code + for (var i = 9; i < 12; ++i) { + var ch = buf.charCodeAt(i); + if (ch < 48 || ch > 57) + return this._setError('Malformed status line'); + status *= 10; + status += (ch - 48); + } + + if (buf.charCodeAt(buflen - 1)) + --buflen; + this.httpMinor = minor; + this.statusCode = status; + this.statusText = (buflen > 13 ? buf.slice(13, buflen) : ''); + state = STATE_HEADER; + break; + default: + return this._setError('Unexpected HTTP parser state: ' + state); } - offset = ret + 2; - ret = this._processHdrLine(buf); + buf = ''; - if (typeof ret === 'object') - return ret; - else if (ret === undefined) { - var state = this._state; - if (state !== STATE_HEADER) { - // Begin of body or end of message - if (state < STATE_COMPLETE && offset < len) { - // Execute extra body bytes - ret = this.execute(data.slice(offset)); - if (typeof ret !== 'number') - return ret; - return offset + ret; - } else if (state === STATE_COMPLETE) - this.reinitialize(this.type); - return offset; - } - } } else { - // Check for possible cross-chunk CRLF split - var end; - if (data[len - 1] === CR) { - seenCR = true; - end = len - 1; - } else - end = len; - - nhdrbytes += end - offset; - - if (nhdrbytes > MAX_HEADER_BYTES) { - return this._setError('Header size limit exceeded (' + - MAX_HEADER_BYTES + ')'); - } - buf += data.toString('binary', offset, end); + nhdrbytes += len - offset; + if (state === STATE_REQ_LINE && nhdrbytes > HTTPParser.MAX_REQ_LINE) + return this._setError('Request line limit exceeded'); + else if (state === STATE_STATUS_LINE && + nhdrbytes > HTTPParser.MAX_RES_LINE) { + return this._setError('Response line limit exceeded'); + } else if (nhdrbytes > MAX_HEADER_BYTES) + return this._setError('Header limit exceeded'); + buf += data.toString('binary', offset); break; } } + this._state = state; this._buf = buf; - this._seenCR = seenCR; this._nhdrbytes = nhdrbytes; return len; @@ -621,91 +509,28 @@ HTTPParser.prototype._executeBodyChunked = function(data) { return 0; var offset = 0; - var seenCR = this._seenCR; - var buf = this._buf; var nbytes = this._nbytes; - var ret; - var bytesToAdd; + var state = this._state; + var dec; while (offset < len) { - switch (this._state) { + switch (state) { case STATE_BODY_CHUNKED_SIZE: - if (seenCR) { - seenCR = false; - if (data[offset] === LF) { - // Our internal buffer contains a full line - ++offset; - ret = readChunkSize(buf); - buf = ''; - if (typeof ret !== 'number') { - this.execute = this._executeError; - this._err = ret; - return ret; - } else if (ret === 0) { - this._seenCR = false; - this._buf = ''; - this._flags |= FLAG_TRAILING; - this._state = STATE_HEADER; - this.execute = this._executeHeader; - if (offset < len) { - ret = this.execute(data.slice(offset)); - if (typeof ret !== 'number') - return ret; - return offset + ret; - } - return offset; - } else { - nbytes = ret; - this._state = STATE_BODY_CHUNKED_BYTES; - continue; - } - } else { - // False match - buf += '\r'; - } - } - ret = indexOfCRLF(data, len, offset); - if (ret > -1) { - // Our internal buffer contains a full line - bytesToAdd = ret - offset; - if (bytesToAdd > 0) - buf += data.toString('ascii', offset, ret); - - offset = ret + 2; - ret = readChunkSize(buf); - buf = ''; - - if (typeof ret !== 'number') { - this.execute = this._executeError; - this._err = ret; - return ret; - } else if (ret === 0) { - this._seenCR = false; - this._buf = ''; - this._flags |= FLAG_TRAILING; - this._state = STATE_HEADER; - this.execute = this._executeHeader; - if (offset < len) { - ret = this.execute(data.slice(offset)); - if (typeof ret !== 'number') - return ret; - return offset + ret; - } - return offset; - } else { - nbytes = ret; - this._state = STATE_BODY_CHUNKED_BYTES; - continue; - } - } else { - // Check for possible cross-chunk CRLF split - var end; - if (data[len - 1] === CR) { - seenCR = true; - end = len - 1; + while (offset < len) { + var ch = data[offset]; + dec = hexValue(ch); + if (dec === undefined) { + state = STATE_BODY_CHUNKED_SIZE_IGNORE; + break; + } else if (nbytes === null) + nbytes = dec; + else { + nbytes *= 16; + nbytes += dec; } - buf += data.toString('ascii', offset, end); - offset = len; // break out of while loop + if (nbytes > MAX_CHUNK_SIZE) + return this._setError('Chunk size limit exceeded'); + ++offset; } break; case STATE_BODY_CHUNKED_BYTES: @@ -714,28 +539,60 @@ HTTPParser.prototype._executeBodyChunked = function(data) { this.onBody(data, offset, nbytes); offset += nbytes; nbytes = 0; - this._state = STATE_BODY_CHUNKED_BYTES_CRLF; + state = STATE_BODY_CHUNKED_BYTES_LF; } else { nbytes -= dataleft; this.onBody(data, offset, dataleft); offset = len; } break; - case STATE_BODY_CHUNKED_BYTES_CRLF: - if (nbytes === 0 && data[offset++] === CR) - ++nbytes; - else if (nbytes === 1 && data[offset++] === LF) - this._state = STATE_BODY_CHUNKED_SIZE; - else - return this._setError('Malformed chunk (missing CRLF)'); + case STATE_BODY_CHUNKED_BYTES_LF: + while (offset < len) { + var curByte = data[offset++]; + if (nbytes === 0) { + if (curByte === LF) { + state = STATE_BODY_CHUNKED_SIZE; + nbytes = null; + break; + } else if (curByte === CR) + ++nbytes; + } else if (nbytes === 1 && curByte === LF) { + state = STATE_BODY_CHUNKED_SIZE; + nbytes = null; + break; + } else + return this._setError('Malformed chunk (malformed line ending)'); + } + break; + case STATE_BODY_CHUNKED_SIZE_IGNORE: + // We only reach this state once we receive a non-hex character + while (offset < len) { + if (data[offset++] === LF) { + if (nbytes === 0) { + this._flags |= FLAG_TRAILING; + this._state = STATE_HEADER; + this._nbytes = null; + this.execute = this._executeHeader; + if (offset < len) { + var ret = this.execute(data.slice(offset)); + if (typeof ret !== 'number') + return ret; + return offset + ret; + } + return offset; + } else { + state = STATE_BODY_CHUNKED_BYTES; + break; + } + } + } break; default: return this._setError('Unexpected parser state while reading chunks'); } } - this._buf = buf; - this._seenCR = seenCR; + this._state = state; this._nbytes = nbytes; return len; @@ -800,7 +657,7 @@ HTTPParser.prototype._needsEOF = function(flags, type, status) { // See RFC 2616 section 4.4 if (status === 204 || // No Content status === 304 || // Not Modified - parseInt(status / 100, 10) === 1 || // 1xx e.g. Continue + (status >= 100 && status < 200) || // 1xx e.g. Continue (flags & FLAG_SKIPBODY) > 0) { // response to a HEAD request return false; } @@ -818,65 +675,61 @@ var REQUEST = HTTPParser.REQUEST = 0; HTTPParser.RESPONSE = 1; module.exports = HTTPParser; -function indexOfCRLF(buf, buflen, offset) { - var bo1; +function indexOfLF(buf, buflen, offset) { while (offset < buflen) { - bo1 = buf[offset + 1]; - if (buf[offset] === CR && bo1 === LF) + if (buf[offset] === LF) return offset; - else if (bo1 === CR) - ++offset; - else - offset += 2; + ++offset; } return -1; } -function readChunkSize(str) { - var val, dec; - for (var i = 0; i < str.length; ++i) { - dec = UNHEX[str.charCodeAt(i)]; - if (dec === undefined) - break; - else if (val === undefined) - val = dec; - else { - val *= 16; - val += dec; - } - } - if (val === undefined) - val = new Error('Invalid chunk size'); - else if (val > MAX_CHUNK_SIZE) - val = new Error('Chunk size too big'); - return val; +function hexValue(ch) { + if (ch > 47 && ch < 58) + return ch - 48; + ch |= 0x20; + if (ch > 96 && ch < 123) + return 10 + (ch - 97); } function ltrim(value) { var length = value.length, start; for (start = 0; start < length && - (value.charCodeAt(start) === 32 || value.charCodeAt(start) === 9); + (value.charCodeAt(start) === 32 || value.charCodeAt(start) === 9 || + value.charCodeAt(start) === CR); ++start); - return start > 0 ? value.slice(start) : value; + return (start > 0 ? value.slice(start) : value); } + +function rtrim(value) { + var length = value.length, end; + for (end = length; + end > 0 && + (value.charCodeAt(end - 1) === 32 || value.charCodeAt(end - 1) === 9 || + value.charCodeAt(end - 1) === CR); + --end); + return (end < length ? value.slice(0, end) : value); +} + function trim(value) { var length = value.length, start, end; for (start = 0; start < length && - (value.charCodeAt(start) === 32 || value.charCodeAt(start) === 9); + (value.charCodeAt(start) === 32 || value.charCodeAt(start) === 9 || + value.charCodeAt(start) === CR); ++start); for (end = length; end > start && - (value.charCodeAt(end - 1) === 32 || value.charCodeAt(end - 1) === 9); + (value.charCodeAt(end - 1) === 32 || value.charCodeAt(end - 1) === 9 || + value.charCodeAt(end - 1) === CR); --end); - return start > 0 || end < length ? value.slice(start, end) : value; + return (start > 0 || end < length ? value.slice(start, end) : value); } function equalsLower(input, ref) { var inlen = input.length; - var reflen = ref.length; - if (inlen !== reflen) + if (inlen !== ref.length) return false; for (var i = 0; i < inlen; ++i) { if ((input.charCodeAt(i) | 0x20) !== ref[i])