From e2bd2e4bbbc71b36c5a6bf1ee3b35fe5d2300997 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 6 Nov 2022 15:27:41 -0500 Subject: [PATCH] util: improve textdecoder decode performance PR-URL: https://github.com/nodejs/node/pull/45294 Reviewed-By: Anna Henningsen Reviewed-By: Matteo Collina Reviewed-By: James M Snell Reviewed-By: Minwoo Jung Reviewed-By: Rich Trott --- benchmark/util/text-decoder.js | 19 ++++++++++++++++ lib/internal/encoding.js | 2 +- src/node_i18n.cc | 40 +++++++++++++++++++++++----------- 3 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 benchmark/util/text-decoder.js diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js new file mode 100644 index 00000000000000..ad845f7c92d0c7 --- /dev/null +++ b/benchmark/util/text-decoder.js @@ -0,0 +1,19 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + encoding: ['utf-8', 'latin1', 'iso-8859-3'], + ignoreBOM: [0, 1], + len: [256, 1024 * 16, 1024 * 512], + n: [1e6] +}); + +function main({ encoding, len, n, ignoreBOM }) { + const buf = Buffer.allocUnsafe(len); + const decoder = new TextDecoder(encoding, { ignoreBOM }); + + bench.start(); + decoder.decode(buf); + bench.end(n); +} diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 3a3d558361e118..2ab85d9d9acb06 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -438,7 +438,7 @@ function makeTextDecoderICU() { if (typeof ret === 'number') { throw new ERR_ENCODING_INVALID_ENCODED_DATA(this.encoding, ret); } - return ret.toString('ucs2'); + return ret; } } diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 581d52a7d05738..ed7b72c31f975e 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -50,6 +50,7 @@ #include "node_buffer.h" #include "node_errors.h" #include "node_internals.h" +#include "string_bytes.h" #include "util-inl.h" #include "v8.h" @@ -96,7 +97,6 @@ using v8::NewStringType; using v8::Object; using v8::ObjectTemplate; using v8::String; -using v8::Uint8Array; using v8::Value; namespace i18n { @@ -445,7 +445,6 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { UErrorCode status = U_ZERO_ERROR; MaybeStackBuffer result; - MaybeLocal ret; UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; @@ -501,19 +500,34 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { converter->set_bom_seen(true); } } - ret = ToBufferEndian(env, &result); - if (omit_initial_bom && !ret.IsEmpty()) { + + Local error; + UChar* output = result.out(); + size_t beginning = 0; + size_t length = result.length() * sizeof(UChar); + + if (omit_initial_bom) { // Perform `ret = ret.slice(2)`. - CHECK(ret.ToLocalChecked()->IsUint8Array()); - Local orig_ret = ret.ToLocalChecked().As(); - ret = Buffer::New(env, - orig_ret->Buffer(), - orig_ret->ByteOffset() + 2, - orig_ret->ByteLength() - 2) - .FromMaybe(Local()); + beginning += 2; + length -= 2; } - if (!ret.IsEmpty()) - args.GetReturnValue().Set(ret.ToLocalChecked()); + + char* value = reinterpret_cast(output) + beginning; + + if (IsBigEndian()) { + SwapBytes16(value, length); + } + + MaybeLocal encoded = + StringBytes::Encode(env->isolate(), value, length, UCS2, &error); + + Local ret; + if (encoded.ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } else { + args.GetReturnValue().Set(error); + } + return; }