From a007fe976814e50ae9a9b75b9366bc670c812dd5 Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 19 Aug 2019 23:25:28 -0400 Subject: [PATCH] buffer: consolidate encoding parsing PR-URL: https://github.com/nodejs/node/pull/29217 Reviewed-By: Ben Noordhuis Reviewed-By: Anna Henningsen --- lib/buffer.js | 359 ++++++++++++++++++++++----------------------- src/node_buffer.cc | 15 +- 2 files changed, 186 insertions(+), 188 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index 7753e3239feeb0..85c2df49c8ca80 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -60,6 +60,8 @@ const { const { inspect: utilInspect } = require('internal/util/inspect'); +const { encodings } = internalBinding('string_decoder'); + const { codes: { @@ -109,6 +111,10 @@ let poolSize, poolOffset, allocPool; // do not own the ArrayBuffer allocator. Zero fill is always on in that case. const zeroFill = bindingZeroFill || [0]; +const encodingsMap = Object.create(null); +for (let i = 0; i < encodings.length; ++i) + encodingsMap[encodings[i]] = i; + function createUnsafeBuffer(size) { zeroFill[0] = 0; try { @@ -376,28 +382,16 @@ function allocate(size) { return createUnsafeBuffer(size); } -function fromString(string, encoding) { - let length; - if (typeof encoding !== 'string' || encoding.length === 0) { - if (string.length === 0) - return new FastBuffer(); - encoding = 'utf8'; - length = byteLengthUtf8(string); - } else { - length = byteLength(string, encoding, true); - if (length === -1) - throw new ERR_UNKNOWN_ENCODING(encoding); - if (string.length === 0) - return new FastBuffer(); - } +function fromStringFast(string, ops) { + const length = ops.byteLength(string); if (length >= (Buffer.poolSize >>> 1)) - return createFromString(string, encoding); + return createFromString(string, ops.encodingVal); if (length > (poolSize - poolOffset)) createPool(); let b = new FastBuffer(allocPool, poolOffset, length); - const actual = b.write(string, encoding); + const actual = ops.write(b, string, 0, length); if (actual !== length) { // byteLength() may overestimate. That's a rare case, though. b = new FastBuffer(allocPool, poolOffset, actual); @@ -407,6 +401,23 @@ function fromString(string, encoding) { return b; } +function fromString(string, encoding) { + let ops; + if (typeof encoding !== 'string' || encoding.length === 0) { + if (string.length === 0) + return new FastBuffer(); + ops = encodingOps.utf8; + encoding = undefined; + } else { + ops = getEncodingOps(encoding); + if (ops === undefined) + throw new ERR_UNKNOWN_ENCODING(encoding); + if (string.length === 0) + return new FastBuffer(); + } + return fromStringFast(string, ops); +} + function fromArrayLike(obj) { const length = obj.length; const b = allocate(length); @@ -553,64 +564,149 @@ function base64ByteLength(str, bytes) { return (bytes * 3) >>> 2; } -function byteLength(string, encoding) { - if (typeof string !== 'string') { - if (isArrayBufferView(string) || isAnyArrayBuffer(string)) { - return string.byteLength; - } - - throw new ERR_INVALID_ARG_TYPE( - 'string', ['string', 'Buffer', 'ArrayBuffer'], string - ); +const encodingOps = { + utf8: { + encoding: 'utf8', + encodingVal: encodingsMap.utf8, + byteLength: byteLengthUtf8, + write: (buf, string, offset, len) => buf.utf8Write(string, offset, len), + slice: (buf, start, end) => buf.utf8Slice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir) + }, + ucs2: { + encoding: 'ucs2', + encodingVal: encodingsMap.utf16le, + byteLength: (string) => string.length * 2, + write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len), + slice: (buf, start, end) => buf.ucs2Slice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir) + }, + utf16le: { + encoding: 'utf16le', + encodingVal: encodingsMap.utf16le, + byteLength: (string) => string.length * 2, + write: (buf, string, offset, len) => buf.ucs2Write(string, offset, len), + slice: (buf, start, end) => buf.ucs2Slice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir) + }, + latin1: { + encoding: 'latin1', + encodingVal: encodingsMap.latin1, + byteLength: (string) => string.length, + write: (buf, string, offset, len) => buf.latin1Write(string, offset, len), + slice: (buf, start, end) => buf.latin1Slice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir) + }, + ascii: { + encoding: 'ascii', + encodingVal: encodingsMap.ascii, + byteLength: (string) => string.length, + write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len), + slice: (buf, start, end) => buf.asciiSlice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfBuffer(buf, + fromStringFast(val, encodingOps.ascii), + byteOffset, + encodingsMap.ascii, + dir) + }, + base64: { + encoding: 'base64', + encodingVal: encodingsMap.base64, + byteLength: (string) => base64ByteLength(string, string.length), + write: (buf, string, offset, len) => buf.base64Write(string, offset, len), + slice: (buf, start, end) => buf.base64Slice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfBuffer(buf, + fromStringFast(val, encodingOps.base64), + byteOffset, + encodingsMap.base64, + dir) + }, + hex: { + encoding: 'hex', + encodingVal: encodingsMap.hex, + byteLength: (string) => string.length >>> 1, + write: (buf, string, offset, len) => buf.hexWrite(string, offset, len), + slice: (buf, start, end) => buf.hexSlice(start, end), + indexOf: (buf, val, byteOffset, dir) => + indexOfBuffer(buf, + fromStringFast(val, encodingOps.hex), + byteOffset, + encodingsMap.hex, + dir) } - - const len = string.length; - const mustMatch = (arguments.length > 2 && arguments[2] === true); - if (!mustMatch && len === 0) - return 0; - - if (!encoding) - return (mustMatch ? -1 : byteLengthUtf8(string)); - +}; +function getEncodingOps(encoding) { encoding += ''; switch (encoding.length) { case 4: - if (encoding === 'utf8') return byteLengthUtf8(string); - if (encoding === 'ucs2') return len * 2; + if (encoding === 'utf8') return encodingOps.utf8; + if (encoding === 'ucs2') return encodingOps.ucs2; encoding = encoding.toLowerCase(); - if (encoding === 'utf8') return byteLengthUtf8(string); - if (encoding === 'ucs2') return len * 2; + if (encoding === 'utf8') return encodingOps.utf8; + if (encoding === 'ucs2') return encodingOps.ucs2; break; case 5: - if (encoding === 'utf-8') return byteLengthUtf8(string); - if (encoding === 'ascii') return len; - if (encoding === 'ucs-2') return len * 2; + if (encoding === 'utf-8') return encodingOps.utf8; + if (encoding === 'ascii') return encodingOps.ascii; + if (encoding === 'ucs-2') return encodingOps.ucs2; encoding = encoding.toLowerCase(); - if (encoding === 'utf-8') return byteLengthUtf8(string); - if (encoding === 'ascii') return len; - if (encoding === 'ucs-2') return len * 2; + if (encoding === 'utf-8') return encodingOps.utf8; + if (encoding === 'ascii') return encodingOps.ascii; + if (encoding === 'ucs-2') return encodingOps.ucs2; break; case 7: if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le') - return len * 2; + return encodingOps.utf16le; break; case 8: if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le') - return len * 2; + return encodingOps.utf16le; break; case 6: - if (encoding === 'latin1' || encoding === 'binary') return len; - if (encoding === 'base64') return base64ByteLength(string, len); + if (encoding === 'latin1' || encoding === 'binary') + return encodingOps.latin1; + if (encoding === 'base64') return encodingOps.base64; encoding = encoding.toLowerCase(); - if (encoding === 'latin1' || encoding === 'binary') return len; - if (encoding === 'base64') return base64ByteLength(string, len); + if (encoding === 'latin1' || encoding === 'binary') + return encodingOps.latin1; + if (encoding === 'base64') return encodingOps.base64; break; case 3: if (encoding === 'hex' || encoding.toLowerCase() === 'hex') - return len >>> 1; + return encodingOps.hex; break; } - return (mustMatch ? -1 : byteLengthUtf8(string)); +} + +function byteLength(string, encoding) { + if (typeof string !== 'string') { + if (isArrayBufferView(string) || isAnyArrayBuffer(string)) { + return string.byteLength; + } + + throw new ERR_INVALID_ARG_TYPE( + 'string', ['string', 'Buffer', 'ArrayBuffer'], string + ); + } + + const len = string.length; + const mustMatch = (arguments.length > 2 && arguments[2] === true); + if (!mustMatch && len === 0) + return 0; + + if (!encoding) + return (mustMatch ? -1 : byteLengthUtf8(string)); + + const ops = getEncodingOps(encoding); + if (ops === undefined) + return (mustMatch ? -1 : byteLengthUtf8(string)); + return ops.byteLength(string); } Buffer.byteLength = byteLength; @@ -633,51 +729,6 @@ Object.defineProperty(Buffer.prototype, 'offset', { } }); -function stringSlice(buf, encoding, start, end) { - if (encoding === undefined) return buf.utf8Slice(start, end); - encoding += ''; - switch (encoding.length) { - case 4: - if (encoding === 'utf8') return buf.utf8Slice(start, end); - if (encoding === 'ucs2') return buf.ucs2Slice(start, end); - encoding = encoding.toLowerCase(); - if (encoding === 'utf8') return buf.utf8Slice(start, end); - if (encoding === 'ucs2') return buf.ucs2Slice(start, end); - break; - case 5: - if (encoding === 'utf-8') return buf.utf8Slice(start, end); - if (encoding === 'ascii') return buf.asciiSlice(start, end); - if (encoding === 'ucs-2') return buf.ucs2Slice(start, end); - encoding = encoding.toLowerCase(); - if (encoding === 'utf-8') return buf.utf8Slice(start, end); - if (encoding === 'ascii') return buf.asciiSlice(start, end); - if (encoding === 'ucs-2') return buf.ucs2Slice(start, end); - break; - case 6: - if (encoding === 'latin1' || encoding === 'binary') - return buf.latin1Slice(start, end); - if (encoding === 'base64') return buf.base64Slice(start, end); - encoding = encoding.toLowerCase(); - if (encoding === 'latin1' || encoding === 'binary') - return buf.latin1Slice(start, end); - if (encoding === 'base64') return buf.base64Slice(start, end); - break; - case 3: - if (encoding === 'hex' || encoding.toLowerCase() === 'hex') - return buf.hexSlice(start, end); - break; - case 7: - if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le') - return buf.ucs2Slice(start, end); - break; - case 8: - if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le') - return buf.ucs2Slice(start, end); - break; - } - throw new ERR_UNKNOWN_ENCODING(encoding); -} - Buffer.prototype.copy = function copy(target, targetStart, sourceStart, sourceEnd) { return _copy(this, target, targetStart, sourceStart, sourceEnd); @@ -708,7 +759,15 @@ Buffer.prototype.toString = function toString(encoding, start, end) { if (end <= start) return ''; - return stringSlice(this, encoding, start, end); + + if (encoding === undefined) + return this.utf8Slice(start, end); + + const ops = getEncodingOps(encoding); + if (ops === undefined) + throw new ERR_UNKNOWN_ENCODING(encoding); + + return ops.slice(this, start, end); }; Buffer.prototype.equals = function equals(otherBuffer) { @@ -826,15 +885,25 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { } dir = !!dir; // Cast to bool. - if (typeof val === 'string') { - if (encoding === undefined) { - return indexOfString(buffer, val, byteOffset, encoding, dir); - } - return slowIndexOf(buffer, val, byteOffset, encoding, dir); - } else if (isUint8Array(val)) { - return indexOfBuffer(buffer, val, byteOffset, encoding, dir); - } else if (typeof val === 'number') { + if (typeof val === 'number') return indexOfNumber(buffer, val >>> 0, byteOffset, dir); + + let ops; + if (encoding === undefined) + ops = encodingOps.utf8; + else + ops = getEncodingOps(encoding); + + if (typeof val === 'string') { + if (ops === undefined) + throw new ERR_UNKNOWN_ENCODING(encoding); + return ops.indexOf(buffer, val, byteOffset, dir); + } + + if (isUint8Array(val)) { + const encodingVal = + (ops === undefined ? encodingsMap.utf8 : ops.encodingVal); + return indexOfBuffer(buffer, val, byteOffset, encodingVal, dir); } throw new ERR_INVALID_ARG_TYPE( @@ -842,37 +911,6 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { ); } -function slowIndexOf(buffer, val, byteOffset, encoding, dir) { - let loweredCase = false; - for (;;) { - switch (encoding) { - case 'utf8': - case 'utf-8': - case 'ucs2': - case 'ucs-2': - case 'utf16le': - case 'utf-16le': - case 'latin1': - case 'binary': - return indexOfString(buffer, val, byteOffset, encoding, dir); - - case 'base64': - case 'ascii': - case 'hex': - return indexOfBuffer( - buffer, Buffer.from(val, encoding), byteOffset, encoding, dir); - - default: - if (loweredCase) { - throw new ERR_UNKNOWN_ENCODING(encoding); - } - - encoding = ('' + encoding).toLowerCase(); - loweredCase = true; - } - } -} - Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) { return bidirectionalIndexOf(this, val, byteOffset, encoding, true); }; @@ -985,49 +1023,10 @@ Buffer.prototype.write = function write(string, offset, length, encoding) { if (!encoding) return this.utf8Write(string, offset, length); - encoding += ''; - switch (encoding.length) { - case 4: - if (encoding === 'utf8') return this.utf8Write(string, offset, length); - if (encoding === 'ucs2') return this.ucs2Write(string, offset, length); - encoding = encoding.toLowerCase(); - if (encoding === 'utf8') return this.utf8Write(string, offset, length); - if (encoding === 'ucs2') return this.ucs2Write(string, offset, length); - break; - case 5: - if (encoding === 'utf-8') return this.utf8Write(string, offset, length); - if (encoding === 'ascii') return this.asciiWrite(string, offset, length); - if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length); - encoding = encoding.toLowerCase(); - if (encoding === 'utf-8') return this.utf8Write(string, offset, length); - if (encoding === 'ascii') return this.asciiWrite(string, offset, length); - if (encoding === 'ucs-2') return this.ucs2Write(string, offset, length); - break; - case 7: - if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le') - return this.ucs2Write(string, offset, length); - break; - case 8: - if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le') - return this.ucs2Write(string, offset, length); - break; - case 6: - if (encoding === 'latin1' || encoding === 'binary') - return this.latin1Write(string, offset, length); - if (encoding === 'base64') - return this.base64Write(string, offset, length); - encoding = encoding.toLowerCase(); - if (encoding === 'latin1' || encoding === 'binary') - return this.latin1Write(string, offset, length); - if (encoding === 'base64') - return this.base64Write(string, offset, length); - break; - case 3: - if (encoding === 'hex' || encoding.toLowerCase() === 'hex') - return this.hexWrite(string, offset, length); - break; - } - throw new ERR_UNKNOWN_ENCODING(encoding); + const ops = getEncodingOps(encoding); + if (ops === undefined) + throw new ERR_UNKNOWN_ENCODING(encoding); + return ops.write(this, string, offset, length); }; Buffer.prototype.toJSON = function toJSON() { diff --git a/src/node_buffer.cc b/src/node_buffer.cc index e6a88f649895e8..7332b0b34b035e 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -63,6 +63,7 @@ using v8::Context; using v8::EscapableHandleScope; using v8::FunctionCallbackInfo; using v8::Global; +using v8::Int32; using v8::Integer; using v8::Isolate; using v8::Just; @@ -446,11 +447,9 @@ namespace { void CreateFromString(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); - CHECK(args[1]->IsString()); + CHECK(args[1]->IsInt32()); - enum encoding enc = ParseEncoding(args.GetIsolate(), - args[1].As(), - UTF8); + enum encoding enc = static_cast(args[1].As()->Value()); Local buf; if (New(args.GetIsolate(), args[0].As(), enc).ToLocal(&buf)) args.GetReturnValue().Set(buf); @@ -786,9 +785,10 @@ void IndexOfString(const FunctionCallbackInfo& args) { CHECK(args[1]->IsString()); CHECK(args[2]->IsNumber()); + CHECK(args[3]->IsInt32()); CHECK(args[4]->IsBoolean()); - enum encoding enc = ParseEncoding(isolate, args[3], UTF8); + enum encoding enc = static_cast(args[3].As()->Value()); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); ArrayBufferViewContents buffer(args[0]); @@ -900,11 +900,10 @@ void IndexOfString(const FunctionCallbackInfo& args) { void IndexOfBuffer(const FunctionCallbackInfo& args) { CHECK(args[1]->IsObject()); CHECK(args[2]->IsNumber()); + CHECK(args[3]->IsInt32()); CHECK(args[4]->IsBoolean()); - enum encoding enc = ParseEncoding(args.GetIsolate(), - args[3], - UTF8); + enum encoding enc = static_cast(args[3].As()->Value()); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);