diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 23bd2ad66546ce..65d91bc63d5cde 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -28,51 +28,106 @@ const { copy, latin1Slice, asciiSlice, hexSlice, utf8Slice, ucs2Slice, base64Slice } = process.binding('buffer'); -// Do not cache `Buffer.isEncoding` when checking encoding names as some -// modules monkey-patch it to support additional encodings -function normalizeEncoding(enc) { - const nenc = internalUtil.normalizeEncoding(enc); - if (typeof nenc !== 'string' && - (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))) - throw new Error(`Unknown encoding: ${enc}`); - return nenc || enc; +const encodings = [ + // 0 + [ + 'utf8', // normalized encoding name string + 4, // buffer size + (self) => { self.fillLast = utf8FillLast; } // StringDecoder initialization + ], + // 1 + [ + 'utf16le', + 4, + (self) => { self.text = utf16Text; self.end = utf16End; } + ], + // 2 + [ + 'latin1', + 0, + (self) => { self.text = latin1Text; self.end = simpleEnd; } + ], + // 3 + [ + 'base64', + 3, + (self) => { self.text = base64Text; self.end = base64End; } + ], + // 4 + [ + 'ascii', + 0, + (self) => { self.text = asciiText; self.end = simpleEnd; } + ], + // 5 + [ + 'hex', + 0, + (self) => { self.text = hexText; self.end = simpleEnd; } + ] +]; + +function translateEncoding(enc) { + if (!enc) return 0; + enc += ''; + switch (enc.length) { + case 4: + if (enc === 'utf8') return 0; + if (enc === 'ucs2') return 1; + enc = enc.toLowerCase(); + if (enc === 'utf8') return 0; + if (enc === 'ucs2') return 1; + break; + case 5: + if (enc === 'utf-8') return 0; + if (enc === 'ascii') return 4; + if (enc === 'ucs-2') return 1; + enc = enc.toLowerCase(); + if (enc === 'utf-8') return 0; + if (enc === 'ascii') return 4; + if (enc === 'ucs-2') return 1; + break; + case 7: + return (enc === 'utf16le' || enc.toLowerCase() === 'utf16le' ? 1 : -1); + case 8: + return (enc === 'utf-16le' || enc.toLowerCase() === 'utf-16le' ? 1 : -1); + case 6: + if (enc === 'latin1') return 2; + if (enc === 'binary') return 2; + if (enc === 'base64') return 3; + enc = enc.toLowerCase(); + if (enc === 'latin1') return 2; + if (enc === 'binary') return 2; + if (enc === 'base64') return 3; + break; + case 3: + return (enc === 'hex' || enc.toLowerCase() === 'hex' ? 5 : -1); + } + return -1; } // StringDecoder provides an interface for efficiently splitting a series of // buffers into a series of JS strings without breaking apart multi-byte // characters. +// Do not cache `Buffer.isEncoding` when checking encoding names as some +// modules monkey-patch it to support additional encodings exports.StringDecoder = StringDecoder; -function StringDecoder(encoding) { - this.encoding = normalizeEncoding(encoding); - var nb; - switch (this.encoding) { - case 'utf16le': - this.text = utf16Text; - this.end = utf16End; - nb = 4; - break; - case 'utf8': - this.fillLast = utf8FillLast; - nb = 4; - break; - case 'base64': - this.text = base64Text; - this.end = base64End; - nb = 3; - break; - case 'hex': - this.write = hexText; - this.end = simpleEnd; - return; - case 'latin1': - this.write = latin1Text; - this.end = simpleEnd; - return; - case 'ascii': - this.write = asciiText; - this.end = simpleEnd; - return; +function StringDecoder(enc) { + var info; + const encIdx = translateEncoding(enc); + if (encIdx === -1) { + if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)) + throw new Error(`Unknown encoding: ${enc}`); + this.encoding = enc; + return; + } else { + info = encodings[encIdx]; } + this.encoding = info[0]; + const nb = info[1]; + info[2](this); + if (nb === 0) + return; this.lastNeed = 0; this.lastTotal = 0; this.lastChar = Buffer.allocUnsafe(nb);