Skip to content

Commit

Permalink
add buffer.transcode for nodejs_compat
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Jul 30, 2024
1 parent 3f94280 commit 935fe01
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 34 deletions.
3 changes: 3 additions & 0 deletions src/node/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
SlowBuffer,
isAscii,
isUtf8,
transcode,
} from 'node-internal:internal_buffer';

// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
Expand All @@ -30,6 +31,7 @@ export {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};

export default {
Expand All @@ -46,4 +48,5 @@ export default {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};
1 change: 1 addition & 0 deletions src/node/internal/buffer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ export function decode(buffer: Uint8Array, state: Uint8Array): string;
export function flush(state: Uint8Array): string;
export function isAscii(value: ArrayBufferView): boolean;
export function isUtf8(value: ArrayBufferView): boolean;
export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string): ArrayBuffer;
12 changes: 12 additions & 0 deletions src/node/internal/internal_buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2294,6 +2294,18 @@ export function isUtf8(value: ArrayBufferView) {
return bufferUtil.isUtf8(value);
}

export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string) {
const normalizedFromEncoding = normalizeEncoding(fromEncoding);
if (!Buffer.isEncoding(normalizedFromEncoding)) {
throw new ERR_UNKNOWN_ENCODING(fromEncoding);
}
const normalizedToEncoding = normalizeEncoding(toEncoding);
if (!Buffer.isEncoding(normalizedToEncoding)) {
throw new ERR_UNKNOWN_ENCODING(toEncoding);
}
return bufferUtil.transcode(source, fromEncoding, toEncoding);
}

export default {
Buffer,
constants,
Expand Down
56 changes: 22 additions & 34 deletions src/workerd/api/node/buffer.c++
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#include "buffer-string-search.h"
#include <workerd/jsg/buffersource.h>
#include <kj/encoding.h>
#include <algorithm>
#include <kj/array.h>
#include "simdutf.h"
#include "i18n.h"

#include <algorithm>

// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
// To avoid warnings, undefine them before redefining them.
Expand Down Expand Up @@ -85,36 +88,6 @@ void SwapBytes(kj::ArrayPtr<kj::byte> bytes) {
}
}

enum class Encoding {
ASCII,
LATIN1,
UTF8,
UTF16LE,
BASE64,
BASE64URL,
HEX,
};

Encoding getEncoding(kj::StringPtr encoding) {
if (encoding == "utf8"_kj) {
return Encoding::UTF8;
} else if (encoding == "ascii") {
return Encoding::ASCII;
} else if (encoding == "latin1") {
return Encoding::LATIN1;
} else if (encoding == "utf16le") {
return Encoding::UTF16LE;
} else if (encoding == "base64") {
return Encoding::BASE64;
} else if (encoding == "base64url") {
return Encoding::BASE64URL;
} else if (encoding == "hex") {
return Encoding::HEX;
}

KJ_UNREACHABLE;
}

kj::Maybe<uint> tryFromHexDigit(char c) {
if ('0' <= c && c <= '9') {
return c - '0';
Expand Down Expand Up @@ -216,8 +189,9 @@ uint32_t writeInto(
dest.first(amountToCopy).copyFrom(bytes.first(amountToCopy));
return amountToCopy;
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

kj::Array<kj::byte> decodeStringImpl(
Expand Down Expand Up @@ -272,8 +246,9 @@ kj::Array<kj::byte> decodeStringImpl(
string.writeInto(js, buf, options);
return decodeHexTruncated(buf, strict);
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}
} // namespace

Expand Down Expand Up @@ -561,8 +536,9 @@ jsg::JsString toStringImpl(
case Encoding::HEX: {
return js.str(kj::encodeHex(slice));
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

} // namespace
Expand Down Expand Up @@ -876,5 +852,17 @@ bool BufferUtil::isUtf8(kj::Array<kj::byte> buffer) {
return simdutf::validate_utf8(buffer.asChars().begin(), buffer.size());
}

kj::Array<kj::byte> BufferUtil::transcode(kj::Array<kj::byte> source, kj::String rawFromEncoding, kj::String rawToEncoding) {
auto fromEncoding = getEncoding(rawFromEncoding.asPtr());
auto toEncoding = getEncoding(rawToEncoding.asPtr());

// TODO: Throw appropriate error here.
if (!i18n::canBeTranscoded(fromEncoding) || !i18n::canBeTranscoded(toEncoding)) {
JSG_FAIL_REQUIRE(Error, "Unable to transcode Buffer");
}

return i18n::transcode(source.asPtr(), fromEncoding, toEncoding);
}

} // namespace workerd::api::node {

4 changes: 4 additions & 0 deletions src/workerd/api/node/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class BufferUtil final: public jsg::Object {
jsg::JsString flush(jsg::Lock& js, kj::Array<kj::byte> state);
bool isAscii(kj::Array<kj::byte> bytes);
bool isUtf8(kj::Array<kj::byte> bytes);
kj::Array<kj::byte> transcode(kj::Array<kj::byte> source,
kj::String fromEncoding,
kj::String toEncoding);

JSG_RESOURCE_TYPE(BufferUtil) {
JSG_METHOD(byteLength);
Expand All @@ -94,6 +97,7 @@ class BufferUtil final: public jsg::Object {
JSG_METHOD(write);
JSG_METHOD(isAscii);
JSG_METHOD(isUtf8);
// JSG_METHOD(trancode);

// For StringDecoder
JSG_METHOD(decode);
Expand Down
159 changes: 159 additions & 0 deletions src/workerd/api/node/i18n.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#include "i18n.h"

#include <kj/common.h>
#include <kj/debug.h>
#include <kj/one-of.h>

#include <jsg/exception.h>
#include <workerd/api/trace.h>

#include <string>

namespace workerd::api::node {

namespace i18n {

namespace {

const char* getEncodingName(Encoding input) {
switch (input) {
case Encoding::ASCII:
return "us-ascii";
case Encoding::LATIN1:
return "iso8859-1";
case Encoding::UCS2:
return "utf16le";
case Encoding::UTF8:
return "utf-8";
default:
KJ_UNREACHABLE;
}
}

typedef TranscodeResult (*TranscodeImpl)(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
Encoding toEncoding);

TranscodeResult TranscodeDefault(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
Encoding toEncoding) {
Converter to(toEncoding);
std::string substitude(to.minSize(), '?');
to.setSubstitudeChars(substitude);

Converter from(fromEncoding);

auto limit = source.size() + to.maxSize();
KJ_STACK_ARRAY(kj::byte, out, limit, 0, limit);
char* target = out.asChars().begin();
const char* source_ = source.asChars().begin();
UErrorCode status{};
ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, &source_, source_ + source.size(),
nullptr, nullptr, nullptr, nullptr, true, true, &status);

if (U_SUCCESS(status)) {
return out.slice(0, target - out.asChars().begin()).attach();
}

return status;
}

TranscodeResult TranscodeToUCS2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
Encoding toEncoding) {
UErrorCode status{};
const size_t length_in_chars = source.size() * sizeof(UChar);
Converter from(fromEncoding);
KJ_STACK_ARRAY(UChar, out, source.size(), 0, source.size());
const auto source_ = source.asChars().begin();
ucnv_toUChars(from.conv(), out.begin(), length_in_chars, source_, source.size(), &status);

if (U_SUCCESS(status)) {
return out.asBytes().attach();
}
return status;
}

TranscodeResult TranscodeFromUCS2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
Encoding toEncoding) {
UErrorCode status{};
const size_t length_in_chars = source.size() * sizeof(UChar);
// Transcode from UCS2.
Converter to(toEncoding);
// KJ_STACK_ARRAY(kj::byte, dest, length_in_chars, 0, length_in_chars);
// dest.copyFrom(source.asConst().slice(0, length_in_chars));

// const uint32_t len = ucnv_fromUChars(to.conv(), dest.begin(), length_in_chars,
*sourcebuf, length_in_chars, status);

if (U_SUCCESS(status)) {
return out.asBytes().attach();
}
return status;
}

TranscodeResult TranscodeUcs2FromUtf8(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, Encoding toEncoding) {

}

TranscodeResult TranscodeUtf8FromUcs2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, Encoding toEncoding) {

}

} // namespace

Converter::Converter(Encoding encoding, std::string_view substitude) {
UErrorCode status = U_ZERO_ERROR;
auto name = getEncodingName(encoding);
auto conv = ucnv_open(name, &status);
KJ_ASSERT(U_SUCCESS(status));
conv_.reset(conv);
setSubstitudeChars(substitude);
}

Converter::Converter(UConverter* converter, std::string_view substitude) : conv_(converter) {
setSubstitudeChars(substitude);
}

kj::Array<kj::byte> transcode(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
Encoding toEncoding) {
TranscodeImpl transcode_function = &TranscodeDefault;
switch (fromEncoding) {
case Encoding::ASCII:
case Encoding::LATIN1:
if (toEncoding == Encoding::UCS2) {
transcode_function = &TranscodeToUCS2;
}
break;
case Encoding::UTF8:
if (toEncoding == Encoding::UCS2) {
transcode_function = &TranscodeUcs2FromUtf8;
}
break;
case Encoding::UCS2:
switch (toEncoding) {
case Encoding::UCS2:
transcode_function = &TranscodeDefault;
break;
case Encoding::UTF8:
transcode_function = &TranscodeUtf8FromUcs2;
break;
default:
transcode_function = &TranscodeFromUCS2;
}
default:
KJ_UNREACHABLE;
}

auto result = transcode_function(source, fromEncoding, toEncoding);
KJ_SWITCH_ONEOF(result) {
KJ_CASE_ONEOF(value, UErrorCode) {
JSG_FAIL_REQUIRE(Error, "Unable to transcode Buffer");
}
KJ_CASE_ONEOF(v, kj::Array<kj::byte>) {
return kj::mv(v);
}
}
KJ_UNREACHABLE;
}

} // namespace i18n

} // namespace workerd::api::node
Loading

0 comments on commit 935fe01

Please sign in to comment.