From 8b79a170f34db9e6d653573115949954665ebeb6 Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Tue, 28 Feb 2017 17:57:58 +0100
Subject: [PATCH 1/3] buffer: make `*Slice` methods available on binding

Makes the string slice methods of buffers available
on the binding object in addition to the `Buffer` prototype.

This enables subsequent `string_decoder` changes to use these
methods directly without performance loss, since all parameters
are known by the string decoder in those cases.
---
 src/node_buffer.cc | 54 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 15 deletions(-)
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index 367af6592ff39b..40504abfc0fbc2 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -453,17 +453,20 @@ void CreateFromString(const FunctionCallbackInfo<Value>& args) {
 
 
 template <encoding encoding>
-void StringSlice(const FunctionCallbackInfo<Value>& args) {
+inline void StringSlice(const FunctionCallbackInfo<Value>& args,
+                        Local<Value> buffer_arg,
+                        Local<Value> start_arg,
+                        Local<Value> end_arg) {
   Environment* env = Environment::GetCurrent(args);
   Isolate* isolate = env->isolate();
 
-  THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
-  SPREAD_BUFFER_ARG(args.This(), ts_obj);
+  THROW_AND_RETURN_UNLESS_BUFFER(env, buffer_arg);
+  SPREAD_BUFFER_ARG(buffer_arg, ts_obj);
 
   if (ts_obj_length == 0)
     return args.GetReturnValue().SetEmptyString();
 
-  SLICE_START_END(args[0], args[1], ts_obj_length)
+  SLICE_START_END(start_arg, end_arg, ts_obj_length)
 
   Local<Value> error;
   MaybeLocal<Value> ret =
@@ -482,17 +485,20 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
 
 
 template <>
-void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
+inline void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args,
+                              Local<Value> buffer_arg,
+                              Local<Value> start_arg,
+                              Local<Value> end_arg) {
   Isolate* isolate = args.GetIsolate();
-  Environment* env = Environment::GetCurrent(isolate);
+  Environment* env = Environment::GetCurrent(args);
 
-  THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
-  SPREAD_BUFFER_ARG(args.This(), ts_obj);
+  THROW_AND_RETURN_UNLESS_BUFFER(env, buffer_arg);
+  SPREAD_BUFFER_ARG(buffer_arg, ts_obj);
 
   if (ts_obj_length == 0)
     return args.GetReturnValue().SetEmptyString();
 
-  SLICE_START_END(args[0], args[1], ts_obj_length)
+  SLICE_START_END(start_arg, end_arg, ts_obj_length)
   length /= 2;
 
   const char* data = ts_obj_data + start;
@@ -540,6 +546,17 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
 }
 
 
+template <encoding encoding>
+void StringSliceProto(const FunctionCallbackInfo<Value>& args) {
+  return StringSlice<encoding>(args, args.This(), args[0], args[1]);
+}
+
+template <encoding encoding>
+void StringSliceStandalone(const FunctionCallbackInfo<Value>& args) {
+  return StringSlice<encoding>(args, args[0], args[1], args[2]);
+}
+
+
 // bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
 void Copy(const FunctionCallbackInfo<Value> &args) {
   Environment* env = Environment::GetCurrent(args);
@@ -1186,12 +1203,12 @@ void SetupBufferJS(const FunctionCallbackInfo<Value>& args) {
   Local<Object> proto = args[0].As<Object>();
   env->set_buffer_prototype_object(proto);
 
-  env->SetMethod(proto, "asciiSlice", StringSlice<ASCII>);
-  env->SetMethod(proto, "base64Slice", StringSlice<BASE64>);
-  env->SetMethod(proto, "latin1Slice", StringSlice<LATIN1>);
-  env->SetMethod(proto, "hexSlice", StringSlice<HEX>);
-  env->SetMethod(proto, "ucs2Slice", StringSlice<UCS2>);
-  env->SetMethod(proto, "utf8Slice", StringSlice<UTF8>);
+  env->SetMethod(proto, "asciiSlice", StringSliceProto<ASCII>);
+  env->SetMethod(proto, "base64Slice", StringSliceProto<BASE64>);
+  env->SetMethod(proto, "latin1Slice", StringSliceProto<LATIN1>);
+  env->SetMethod(proto, "hexSlice", StringSliceProto<HEX>);
+  env->SetMethod(proto, "ucs2Slice", StringSliceProto<UCS2>);
+  env->SetMethod(proto, "utf8Slice", StringSliceProto<UTF8>);
 
   env->SetMethod(proto, "asciiWrite", StringWrite<ASCII>);
   env->SetMethod(proto, "base64Write", StringWrite<BASE64>);
@@ -1244,6 +1261,13 @@ void Initialize(Local<Object> target,
   env->SetMethod(target, "swap32", Swap32);
   env->SetMethod(target, "swap64", Swap64);
 
+  env->SetMethod(target, "asciiSlice", StringSliceStandalone<ASCII>);
+  env->SetMethod(target, "base64Slice", StringSliceStandalone<BASE64>);
+  env->SetMethod(target, "latin1Slice", StringSliceStandalone<LATIN1>);
+  env->SetMethod(target, "hexSlice", StringSliceStandalone<HEX>);
+  env->SetMethod(target, "ucs2Slice", StringSliceStandalone<UCS2>);
+  env->SetMethod(target, "utf8Slice", StringSliceStandalone<UTF8>);
+
   target->Set(env->context(),
               FIXED_ONE_BYTE_STRING(env->isolate(), "kMaxLength"),
               Integer::NewFromUnsigned(env->isolate(), kMaxLength)).FromJust();

From f4e7b557a8219751b7810b9e16e98ec482dfebb3 Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Fri, 23 Dec 2016 13:50:57 +0100
Subject: [PATCH 2/3] string_decoder: support Uint8Array input to methods

This is a bit odd since `string_decoder` does currently not
perform any type checking. Also, this adds an explicit check
for `string` input, which does not really make sense but is relied upon
by our test suite.
---
 doc/api/string_decoder.md            | 19 ++++++---
 lib/string_decoder.js                | 58 +++++++++++++++++++---------
 test/parallel/test-string-decoder.js | 39 +++++++++++--------
 3 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md
index a92f62ebcdfea9..75506dc564cf30 100644
--- a/doc/api/string_decoder.md
+++ b/doc/api/string_decoder.md
@@ -2,9 +2,9 @@
 
 > Stability: 2 - Stable
 
-The `string_decoder` module provides an API for decoding `Buffer` objects into
-strings in a manner that preserves encoded multi-byte UTF-8 and UTF-16
-characters. It can be accessed using:
+The `string_decoder` module provides an API for decoding `Buffer` and
+`Uint8Array` objects into strings in a manner that preserves encoded multi-byte
+UTF-8 and UTF-16 characters. It can be accessed using:
 
 ```js
 const StringDecoder = require('string_decoder').StringDecoder;
@@ -53,9 +53,14 @@ Creates a new `StringDecoder` instance.
 ### stringDecoder.end([buffer])
 <!-- YAML
 added: v0.9.3
+changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/11613
+    description: The `buffer` argument can now be a `Uint8Array` instance.
 -->
 
-* `buffer` {Buffer} A `Buffer` containing the bytes to decode.
+* `buffer` {Buffer|Uint8Array} A `Buffer` or `Uint8Array` containing the bytes
+  to decode.
 
 Returns any remaining input stored in the internal buffer as a string. Bytes
 representing incomplete UTF-8 and UTF-16 characters will be replaced with
@@ -68,13 +73,17 @@ is performed before returning the remaining input.
 <!-- YAML
 added: v0.1.99
 changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/11613
+    description: The `buffer` argument can now be a `Uint8Array` instance.
   - version: REPLACEME
     pr-url: https://github.com/nodejs/node/pull/9618
     description: Each invalid character is now replaced by a single replacement
                  character instead of one for each individual byte.
 -->
 
-* `buffer` {Buffer} A `Buffer` containing the bytes to decode.
+* `buffer` {Buffer|Uint8Array} A `Buffer` or `Uint8Array` containing the bytes
+  to decode.
 
 Returns a decoded string, ensuring that any incomplete multibyte characters at
 the end of the `Buffer` are omitted from the returned string and stored in an
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index eee9a7d9273ebf..23bd2ad66546ce 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -24,6 +24,9 @@
 const Buffer = require('buffer').Buffer;
 const internalUtil = require('internal/util');
 const isEncoding = Buffer[internalUtil.kIsEncodingSymbol];
+const {
+  copy, latin1Slice, asciiSlice, hexSlice, utf8Slice, ucs2Slice, base64Slice
+} = process.binding('buffer');
 
 // Do not cache `Buffer.isEncoding` when checking encoding names as some
 // modules monkey-patch it to support additional encodings
@@ -57,8 +60,16 @@ function StringDecoder(encoding) {
       this.end = base64End;
       nb = 3;
       break;
-    default:
-      this.write = simpleWrite;
+    case 'hex':
+      this.write = hexText;
+      this.end = simpleEnd;
+      return;
+    case 'latin1':
+      this.write = latin1Text;
+      this.end = simpleEnd;
+      return;
+    case 'ascii':
+      this.write = asciiText;
       this.end = simpleEnd;
       return;
   }
@@ -67,9 +78,12 @@ function StringDecoder(encoding) {
   this.lastChar = Buffer.allocUnsafe(nb);
 }
 
+// TODO(addaleax): This method should not accept strings as input.
 StringDecoder.prototype.write = function(buf) {
   if (buf.length === 0)
     return '';
+  if (typeof buf === 'string')
+    return buf;
   var r;
   var i;
   if (this.lastNeed) {
@@ -94,10 +108,10 @@ StringDecoder.prototype.text = utf8Text;
 // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
 StringDecoder.prototype.fillLast = function(buf) {
   if (this.lastNeed <= buf.length) {
-    buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);
+    copy(buf, this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);
     return this.lastChar.toString(this.encoding, 0, this.lastTotal);
   }
-  buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);
+  copy(buf, this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);
   this.lastNeed -= buf.length;
 };
 
@@ -185,10 +199,10 @@ function utf8FillLast(buf) {
   if (r !== undefined)
     return r;
   if (this.lastNeed <= buf.length) {
-    buf.copy(this.lastChar, p, 0, this.lastNeed);
-    return this.lastChar.toString(this.encoding, 0, this.lastTotal);
+    copy(buf, this.lastChar, p, 0, this.lastNeed);
+    return utf8Slice(this.lastChar, 0, this.lastTotal);
   }
-  buf.copy(this.lastChar, p, 0, buf.length);
+  copy(buf, this.lastChar, p, 0, buf.length);
   this.lastNeed -= buf.length;
 }
 
@@ -198,11 +212,11 @@ function utf8FillLast(buf) {
 function utf8Text(buf, i) {
   const total = utf8CheckIncomplete(this, buf, i);
   if (!this.lastNeed)
-    return buf.toString('utf8', i);
+    return utf8Slice(buf, i, buf.length);
   this.lastTotal = total;
   const end = buf.length - (total - this.lastNeed);
-  buf.copy(this.lastChar, 0, end);
-  return buf.toString('utf8', i, end);
+  copy(buf, this.lastChar, 0, end);
+  return utf8Slice(buf, i, end);
 }
 
 // For UTF-8, a replacement character is added when ending on a partial
@@ -220,7 +234,7 @@ function utf8End(buf) {
 // decode the last character properly.
 function utf16Text(buf, i) {
   if ((buf.length - i) % 2 === 0) {
-    const r = buf.toString('utf16le', i);
+    const r = ucs2Slice(buf, i, buf.length);
     if (r) {
       const c = r.charCodeAt(r.length - 1);
       if (c >= 0xD800 && c <= 0xDBFF) {
@@ -236,7 +250,7 @@ function utf16Text(buf, i) {
   this.lastNeed = 1;
   this.lastTotal = 2;
   this.lastChar[0] = buf[buf.length - 1];
-  return buf.toString('utf16le', i, buf.length - 1);
+  return ucs2Slice(buf, i, buf.length - 1);
 }
 
 // For UTF-16LE we do not explicitly append special replacement characters if we
@@ -245,7 +259,7 @@ function utf16End(buf) {
   const r = (buf && buf.length ? this.write(buf) : '');
   if (this.lastNeed) {
     const end = this.lastTotal - this.lastNeed;
-    return r + this.lastChar.toString('utf16le', 0, end);
+    return r + ucs2Slice(this.lastChar, 0, end);
   }
   return r;
 }
@@ -253,7 +267,7 @@ function utf16End(buf) {
 function base64Text(buf, i) {
   const n = (buf.length - i) % 3;
   if (n === 0)
-    return buf.toString('base64', i);
+    return base64Slice(buf, i, buf.length);
   this.lastNeed = 3 - n;
   this.lastTotal = 3;
   if (n === 1) {
@@ -262,20 +276,28 @@ function base64Text(buf, i) {
     this.lastChar[0] = buf[buf.length - 2];
     this.lastChar[1] = buf[buf.length - 1];
   }
-  return buf.toString('base64', i, buf.length - n);
+  return base64Slice(buf, i, buf.length - n);
 }
 
 
 function base64End(buf) {
   const r = (buf && buf.length ? this.write(buf) : '');
   if (this.lastNeed)
-    return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed);
+    return r + base64Slice(this.lastChar, 0, 3 - this.lastNeed);
   return r;
 }
 
 // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
-function simpleWrite(buf) {
-  return buf.toString(this.encoding);
+function latin1Text(buf) {
+  return latin1Slice(buf, 0, buf.length);
+}
+
+function asciiText(buf) {
+  return asciiSlice(buf, 0, buf.length);
+}
+
+function hexText(buf) {
+  return hexSlice(buf, 0, buf.length);
 }
 
 function simpleEnd(buf) {
diff --git a/test/parallel/test-string-decoder.js b/test/parallel/test-string-decoder.js
index af88b809b89640..aec92cc78a76e9 100644
--- a/test/parallel/test-string-decoder.js
+++ b/test/parallel/test-string-decoder.js
@@ -106,7 +106,7 @@ assert.strictEqual(decoder.end(), '\ufffd');
 
 // Additional utf8Text test
 decoder = new StringDecoder('utf8');
-assert.strictEqual(decoder.text(Buffer.from([0x41]), 2), '');
+assert.strictEqual(decoder.text(Buffer.from([0x41]), 1), '');
 
 // Additional UTF-16LE surrogate pair tests
 decoder = new StringDecoder('utf16le');
@@ -144,23 +144,28 @@ function test(encoding, input, expected, singleSequence) {
   } else {
     sequences = [singleSequence];
   }
-  sequences.forEach((sequence) => {
-    const decoder = new StringDecoder(encoding);
-    let output = '';
-    sequence.forEach((write) => {
-      output += decoder.write(input.slice(write[0], write[1]));
+  for (const useUint8array of [ false, true ]) {
+    sequences.forEach((sequence) => {
+      const decoder = new StringDecoder(encoding);
+      let output = '';
+      sequence.forEach((write) => {
+        let slice = input.slice(write[0], write[1]);
+        if (useUint8array)
+          slice = new Uint8Array(slice);
+        output += decoder.write(slice);
+      });
+      output += decoder.end();
+      if (output !== expected) {
+        const message =
+          'Expected "' + unicodeEscape(expected) + '", ' +
+          'but got "' + unicodeEscape(output) + '"\n' +
+          'input: ' + input.toString('hex').match(/.{2}/g) + '\n' +
+          'Write sequence: ' + JSON.stringify(sequence) + '\n' +
+          'Full Decoder State: ' + inspect(decoder);
+        assert.fail(output, expected, message);
+      }
     });
-    output += decoder.end();
-    if (output !== expected) {
-      const message =
-        'Expected "' + unicodeEscape(expected) + '", ' +
-        'but got "' + unicodeEscape(output) + '"\n' +
-        'input: ' + input.toString('hex').match(/.{2}/g) + '\n' +
-        'Write sequence: ' + JSON.stringify(sequence) + '\n' +
-        'Full Decoder State: ' + inspect(decoder);
-      assert.fail(output, expected, message);
-    }
-  });
+  }
 }
 
 // unicodeEscape prints the str contents as unicode escape codes.

From 525fabd7ed6d610d09a00e7271796be7276b6217 Mon Sep 17 00:00:00 2001
From: Brian White <mscdex@mscdex.net>
Date: Wed, 12 Apr 2017 21:04:52 -0400
Subject: [PATCH 3/3] string_decoder: refactor encoding normalization

---
 lib/string_decoder.js | 131 ++++++++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 38 deletions(-)

diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index 23bd2ad66546ce..65d91bc63d5cde 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -28,51 +28,106 @@ const {
   copy, latin1Slice, asciiSlice, hexSlice, utf8Slice, ucs2Slice, base64Slice
 } = process.binding('buffer');
 
-// Do not cache `Buffer.isEncoding` when checking encoding names as some
-// modules monkey-patch it to support additional encodings
-function normalizeEncoding(enc) {
-  const nenc = internalUtil.normalizeEncoding(enc);
-  if (typeof nenc !== 'string' &&
-      (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
-    throw new Error(`Unknown encoding: ${enc}`);
-  return nenc || enc;
+const encodings = [
+  // 0
+  [
+    'utf8', // normalized encoding name string
+    4, // buffer size
+    (self) => { self.fillLast = utf8FillLast; } // StringDecoder initialization
+  ],
+  // 1
+  [
+    'utf16le',
+    4,
+    (self) => { self.text = utf16Text; self.end = utf16End; }
+  ],
+  // 2
+  [
+    'latin1',
+    0,
+    (self) => { self.text = latin1Text; self.end = simpleEnd; }
+  ],
+  // 3
+  [
+    'base64',
+    3,
+    (self) => { self.text = base64Text; self.end = base64End; }
+  ],
+  // 4
+  [
+    'ascii',
+    0,
+    (self) => { self.text = asciiText; self.end = simpleEnd; }
+  ],
+  // 5
+  [
+    'hex',
+    0,
+    (self) => { self.text = hexText; self.end = simpleEnd; }
+  ]
+];
+
+function translateEncoding(enc) {
+  if (!enc) return 0;
+  enc += '';
+  switch (enc.length) {
+    case 4:
+      if (enc === 'utf8') return 0;
+      if (enc === 'ucs2') return 1;
+      enc = enc.toLowerCase();
+      if (enc === 'utf8') return 0;
+      if (enc === 'ucs2') return 1;
+      break;
+    case 5:
+      if (enc === 'utf-8') return 0;
+      if (enc === 'ascii') return 4;
+      if (enc === 'ucs-2') return 1;
+      enc = enc.toLowerCase();
+      if (enc === 'utf-8') return 0;
+      if (enc === 'ascii') return 4;
+      if (enc === 'ucs-2') return 1;
+      break;
+    case 7:
+      return (enc === 'utf16le' || enc.toLowerCase() === 'utf16le' ? 1 : -1);
+    case 8:
+      return (enc === 'utf-16le' || enc.toLowerCase() === 'utf-16le' ? 1 : -1);
+    case 6:
+      if (enc === 'latin1') return 2;
+      if (enc === 'binary') return 2;
+      if (enc === 'base64') return 3;
+      enc = enc.toLowerCase();
+      if (enc === 'latin1') return 2;
+      if (enc === 'binary') return 2;
+      if (enc === 'base64') return 3;
+      break;
+    case 3:
+      return (enc === 'hex' || enc.toLowerCase() === 'hex' ? 5 : -1);
+  }
+  return -1;
 }
 
 // StringDecoder provides an interface for efficiently splitting a series of
 // buffers into a series of JS strings without breaking apart multi-byte
 // characters.
+// Do not cache `Buffer.isEncoding` when checking encoding names as some
+// modules monkey-patch it to support additional encodings
 exports.StringDecoder = StringDecoder;
-function StringDecoder(encoding) {
-  this.encoding = normalizeEncoding(encoding);
-  var nb;
-  switch (this.encoding) {
-    case 'utf16le':
-      this.text = utf16Text;
-      this.end = utf16End;
-      nb = 4;
-      break;
-    case 'utf8':
-      this.fillLast = utf8FillLast;
-      nb = 4;
-      break;
-    case 'base64':
-      this.text = base64Text;
-      this.end = base64End;
-      nb = 3;
-      break;
-    case 'hex':
-      this.write = hexText;
-      this.end = simpleEnd;
-      return;
-    case 'latin1':
-      this.write = latin1Text;
-      this.end = simpleEnd;
-      return;
-    case 'ascii':
-      this.write = asciiText;
-      this.end = simpleEnd;
-      return;
+function StringDecoder(enc) {
+  var info;
+  const encIdx = translateEncoding(enc);
+  if (encIdx === -1) {
+    if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
+      throw new Error(`Unknown encoding: ${enc}`);
+    this.encoding = enc;
+    return;
+  } else {
+    info = encodings[encIdx];
   }
+  this.encoding = info[0];
+  const nb = info[1];
+  info[2](this);
+  if (nb === 0)
+    return;
   this.lastNeed = 0;
   this.lastTotal = 0;
   this.lastChar = Buffer.allocUnsafe(nb);