-
Notifications
You must be signed in to change notification settings - Fork 29.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
src: enable avx512 optimization for base64 encoding #43717
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,32 @@ | ||
/* | ||
* Copyright 2019 Daniel Lemire, Wojciech Muła | ||
|
||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, this | ||
* list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of the copyright holder nor the names of its contributors | ||
* may be used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
#ifndef SRC_BASE64_INL_H_ | ||
#define SRC_BASE64_INL_H_ | ||
|
||
|
@@ -120,7 +149,7 @@ size_t base64_decode(char* const dst, const size_t dstlen, | |
} | ||
|
||
|
||
inline size_t base64_encode(const char* src, | ||
inline size_t base64_encode_scalar(const char* src, | ||
size_t slen, | ||
char* dst, | ||
size_t dlen, | ||
|
@@ -182,6 +211,96 @@ inline size_t base64_encode(const char* src, | |
return dlen; | ||
} | ||
|
||
|
||
#if (defined(__x86_64) || defined(__x86_64__)) && \ | ||
(defined(__linux) || defined(__linux__)) | ||
#pragma GCC target("avx512vl", "avx512vbmi") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be a bit cleaner to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I chose #pragma so to explicitly reset the pragma to avoid polution on other code. |
||
#include <immintrin.h> | ||
inline size_t base64_encode_avx512vl(const char* src, | ||
size_t slen, | ||
char* dst, | ||
size_t dlen, | ||
Base64Mode mode) { | ||
// Do the exact check as scalar algorithm. | ||
CHECK(dlen >= base64_encoded_size(slen, mode) && | ||
"not enough space provided for base64 encode"); | ||
size_t dlen_remain = dlen; | ||
const char* lookup_tbl = base64_select_table(mode); | ||
// 32-bit input | ||
// [ 0 0 0 0 0 0 0 0|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// b3 b2 b1 b0 c5 c4 c3 c2|a5 a4 a3 a2 a1 a0 b5 b4] | ||
// output order [1, 2, 0, 1] | ||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0] | ||
|
||
const __m512i shuffle_input = _mm512_setr_epi32( | ||
0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, | ||
0x0d0e0c0d, 0x10110f10, 0x13141213, 0x16171516, | ||
0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, | ||
0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); | ||
const __m512i lookup = _mm512_loadu_si512((const __m512i*)(lookup_tbl)); | ||
|
||
while (slen >= 64) { | ||
const __m512i v = _mm512_loadu_si512((const __m512i*)src); | ||
|
||
// Reorder bytes | ||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0] | ||
const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); | ||
|
||
// After multishift a single 32-bit lane has following layout | ||
// [c1 c0 d5 d4 d3 d2 d1 d0|b1 b0 c5 c4 c3 c2 c1 c0| | ||
// a1 a0 b5 b4 b3 b2 b1 b0|d1 d0 a5 a4 a3 a2 a1 a0] | ||
// (a = [10:17], b = [4:11], c = [22:27], d = [16:21]) | ||
|
||
// 48, 54, 36, 42, 16, 22, 4, 10 | ||
const __m512i shifts = _mm512_set1_epi64(0x3036242a1016040alu); | ||
const __m512i indices = _mm512_multishift_epi64_epi8(shifts, in); | ||
|
||
// Note: the two higher bits of each indices' byte have garbage | ||
// but the following permutexvar instruction masks them out | ||
|
||
// Translation 6-bit values to ASCII. | ||
const __m512i result = _mm512_permutexvar_epi8(indices, lookup); | ||
|
||
_mm512_storeu_si512(reinterpret_cast<__m512i*>(dst), result); | ||
|
||
dlen_remain -= 64; | ||
dst += 64; | ||
src += 48; | ||
slen -= 48; | ||
} | ||
|
||
if (slen > 0) | ||
base64_encode_scalar(src, slen, dst, dlen_remain, mode); | ||
|
||
return dlen; | ||
} | ||
|
||
|
||
inline size_t base64_encode(const char* src, | ||
size_t slen, | ||
char* dst, | ||
size_t dlen, | ||
Base64Mode mode) { | ||
if (__builtin_cpu_supports("avx512vl") && \ | ||
__builtin_cpu_supports("avx512vbmi")) { | ||
return base64_encode_avx512vl(src, slen, dst, dlen, mode); | ||
} else { | ||
return base64_encode_scalar(src, slen, dst, dlen, mode); | ||
} | ||
} | ||
#pragma GCC reset_options | ||
#else | ||
inline size_t base64_encode(const char* src, | ||
size_t slen, | ||
char* dst, | ||
size_t dlen, | ||
Base64Mode mode) { | ||
return base64_encode_scalar(src, slen, dst, dlen, mode); | ||
} | ||
#endif | ||
|
||
} // namespace node | ||
|
||
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why this operation wouldn't apply outside of Linux? Should this be a check for gcc/clang instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The original code is for Linux GCC and I do not have a windows to test for it. I would try to remove the line 216 and add gcc/clang check and let the bot to test it. Thanks!