string_decoder: refactor encoding normalization

nodejs · May 5, 2017 · 525fabd · 525fabd
1 parent f4e7b55
commit 525fabd
Showing 1 changed file with 93 additions and 38 deletions.
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
@@ -28,51 +28,106 @@ const {
   copy, latin1Slice, asciiSlice, hexSlice, utf8Slice, ucs2Slice, base64Slice
 } = process.binding('buffer');
 
-// Do not cache `Buffer.isEncoding` when checking encoding names as some
-// modules monkey-patch it to support additional encodings
-function normalizeEncoding(enc) {
-  const nenc = internalUtil.normalizeEncoding(enc);
-  if (typeof nenc !== 'string' &&
-      (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
-    throw new Error(`Unknown encoding: ${enc}`);
-  return nenc || enc;
+const encodings = [
+  // 0
+  [
+    'utf8', // normalized encoding name string
+    4, // buffer size
+    (self) => { self.fillLast = utf8FillLast; } // StringDecoder initialization
+  ],
+  // 1
+  [
+    'utf16le',
+    4,
+    (self) => { self.text = utf16Text; self.end = utf16End; }
+  ],
+  // 2
+  [
+    'latin1',
+    0,
+    (self) => { self.text = latin1Text; self.end = simpleEnd; }
+  ],
+  // 3
+  [
+    'base64',
+    3,
+    (self) => { self.text = base64Text; self.end = base64End; }
+  ],
+  // 4
+  [
+    'ascii',
+    0,
+    (self) => { self.text = asciiText; self.end = simpleEnd; }
+  ],
+  // 5
+  [
+    'hex',
+    0,
+    (self) => { self.text = hexText; self.end = simpleEnd; }
+  ]
+];
+
+function translateEncoding(enc) {
+  if (!enc) return 0;
+  enc += '';
+  switch (enc.length) {
+    case 4:
+      if (enc === 'utf8') return 0;
+      if (enc === 'ucs2') return 1;
+      enc = enc.toLowerCase();
+      if (enc === 'utf8') return 0;
+      if (enc === 'ucs2') return 1;
+      break;
+    case 5:
+      if (enc === 'utf-8') return 0;
+      if (enc === 'ascii') return 4;
+      if (enc === 'ucs-2') return 1;
+      enc = enc.toLowerCase();
+      if (enc === 'utf-8') return 0;
+      if (enc === 'ascii') return 4;
+      if (enc === 'ucs-2') return 1;
+      break;
+    case 7:
+      return (enc === 'utf16le' || enc.toLowerCase() === 'utf16le' ? 1 : -1);
+    case 8:
+      return (enc === 'utf-16le' || enc.toLowerCase() === 'utf-16le' ? 1 : -1);
+    case 6:
+      if (enc === 'latin1') return 2;
+      if (enc === 'binary') return 2;
+      if (enc === 'base64') return 3;
+      enc = enc.toLowerCase();
+      if (enc === 'latin1') return 2;
+      if (enc === 'binary') return 2;
+      if (enc === 'base64') return 3;
+      break;
+    case 3:
+      return (enc === 'hex' || enc.toLowerCase() === 'hex' ? 5 : -1);
+  }
+  return -1;
 }
 
 // StringDecoder provides an interface for efficiently splitting a series of
 // buffers into a series of JS strings without breaking apart multi-byte
 // characters.
+// Do not cache `Buffer.isEncoding` when checking encoding names as some
+// modules monkey-patch it to support additional encodings
 exports.StringDecoder = StringDecoder;
-function StringDecoder(encoding) {
-  this.encoding = normalizeEncoding(encoding);
-  var nb;
-  switch (this.encoding) {
-    case 'utf16le':
-      this.text = utf16Text;
-      this.end = utf16End;
-      nb = 4;
-      break;
-    case 'utf8':
-      this.fillLast = utf8FillLast;
-      nb = 4;
-      break;
-    case 'base64':
-      this.text = base64Text;
-      this.end = base64End;
-      nb = 3;
-      break;
-    case 'hex':
-      this.write = hexText;
-      this.end = simpleEnd;
-      return;
-    case 'latin1':
-      this.write = latin1Text;
-      this.end = simpleEnd;
-      return;
-    case 'ascii':
-      this.write = asciiText;
-      this.end = simpleEnd;
-      return;
+function StringDecoder(enc) {
+  var info;
+  const encIdx = translateEncoding(enc);
+  if (encIdx === -1) {
+    if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
+      throw new Error(`Unknown encoding: ${enc}`);
+    this.encoding = enc;
+    return;
+  } else {
+    info = encodings[encIdx];
   }
+  this.encoding = info[0];
+  const nb = info[1];
+  info[2](this);
+  if (nb === 0)
+    return;
   this.lastNeed = 0;
   this.lastTotal = 0;
   this.lastChar = Buffer.allocUnsafe(nb);