diff --git a/src/System.Memory/src/System/Buffers/Text/Base64.cs b/src/System.Memory/src/System/Buffers/Text/Base64.cs index 1ed3db8d2232..69493f64b065 100644 --- a/src/System.Memory/src/System/Buffers/Text/Base64.cs +++ b/src/System.Memory/src/System/Buffers/Text/Base64.cs @@ -141,38 +141,30 @@ static Base64() } [Conditional("DEBUG")] - private static unsafe void AssertRead(ref byte src, ref byte srcStart, int srcLength) + private static unsafe void AssertRead(byte* src, byte* srcStart, int srcLength) { - fixed (byte* pSrc = &src) - fixed (byte* pSrcStart = &srcStart) + int vectorElements = Unsafe.SizeOf(); + byte* readEnd = src + vectorElements; + byte* srcEnd = srcStart + srcLength; + + if (readEnd > srcEnd) { - int vectorElements = Unsafe.SizeOf(); - byte* readEnd = pSrc + vectorElements; - byte* srcEnd = pSrcStart + srcLength; - - if (readEnd > srcEnd) - { - int srcIndex = (int)(pSrc - pSrcStart); - throw new InvalidOperationException($"Read for {typeof(TVector)} is not within safe bounds. srcIndex: {srcIndex}, srcLength: {srcLength}"); - } + int srcIndex = (int)(src - srcStart); + throw new InvalidOperationException($"Read for {typeof(TVector)} is not within safe bounds. srcIndex: {srcIndex}, srcLength: {srcLength}"); } } [Conditional("DEBUG")] - private static unsafe void AssertWrite(ref byte dest, ref byte destStart, int destLength) + private static unsafe void AssertWrite(byte* dest, byte* destStart, int destLength) { - fixed (byte* pDest = &dest) - fixed (byte* pDestStart = &destStart) + int vectorElements = Unsafe.SizeOf(); + byte* writeEnd = dest + vectorElements; + byte* destEnd = destStart + destLength; + + if (writeEnd > destEnd) { - int vectorElements = Unsafe.SizeOf(); - byte* writeEnd = pDest + vectorElements; - byte* destEnd = pDestStart + destLength; - - if (writeEnd > destEnd) - { - int destIndex = (int)(pDest - pDestStart); - throw new InvalidOperationException($"Write for {typeof(TVector)} is not within safe bounds. destIndex: {destIndex}, destLength: {destLength}"); - } + int destIndex = (int)(dest - destStart); + throw new InvalidOperationException($"Write for {typeof(TVector)} is not within safe bounds. destIndex: {destIndex}, destLength: {destLength}"); } } } diff --git a/src/System.Memory/src/System/Buffers/Text/Base64Decoder.cs b/src/System.Memory/src/System/Buffers/Text/Base64Decoder.cs index 112796577d94..52af26d77531 100644 --- a/src/System.Memory/src/System/Buffers/Text/Base64Decoder.cs +++ b/src/System.Memory/src/System/Buffers/Text/Base64Decoder.cs @@ -3,10 +3,14 @@ // See the LICENSE file in the project root for more information. using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using Internal.Runtime.CompilerServices; + +#if BIT64 +using nuint = System.UInt64; +#else +using nuint = System.UInt32; +#endif namespace System.Buffers.Text { @@ -23,7 +27,7 @@ public static partial class Base64 /// The output span which contains the result of the operation, i.e. the decoded binary data. /// The number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary. /// The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. - /// True (default) when the input span contains the entire data to decode. + /// True (default) when the input span contains the entire data to decode. /// Set to false only if it is known that the input span contains partial data with more data to follow. /// It returns the OperationStatus enum values: /// - Done - on successful processing of the entire input span @@ -31,182 +35,192 @@ public static partial class Base64 /// - NeedMoreData - only if isFinalBlock is false and the input is not a multiple of 4, otherwise the partial input would be considered as InvalidData /// - InvalidData - if the input contains bytes outside of the expected base 64 range, or if it contains invalid/more than two padding characters, /// or if the input is incomplete (i.e. not a multiple of 4) and isFinalBlock is true. - /// - public static OperationStatus DecodeFromUtf8(ReadOnlySpan utf8, Span bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) + /// + public static unsafe OperationStatus DecodeFromUtf8(ReadOnlySpan utf8, Span bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { - // PERF: use uint to avoid the sign-extensions - uint sourceIndex = 0; - uint destIndex = 0; - if (utf8.IsEmpty) - goto DoneExit; - - ref byte srcBytes = ref MemoryMarshal.GetReference(utf8); - ref byte destBytes = ref MemoryMarshal.GetReference(bytes); - - int srcLength = utf8.Length & ~0x3; // only decode input up to the closest multiple of 4. - int destLength = bytes.Length; - int maxSrcLength = srcLength; - int decodedLength = GetMaxDecodedFromUtf8Length(srcLength); - - // max. 2 padding chars - if (destLength < decodedLength - 2) { - // For overflow see comment below - maxSrcLength = destLength / 3 * 4; + bytesConsumed = 0; + bytesWritten = 0; + return OperationStatus.Done; } - if (Avx2.IsSupported && maxSrcLength >= 45) + fixed (byte* srcBytes = utf8) + fixed (byte* destBytes = bytes) + fixed (sbyte* decodingMap = s_decodingMap) { - Avx2Decode(ref srcBytes, ref destBytes, maxSrcLength, destLength, ref sourceIndex, ref destIndex); + int srcLength = utf8.Length & ~0x3; // only decode input up to the closest multiple of 4. + int destLength = bytes.Length; + int maxSrcLength = srcLength; + int decodedLength = GetMaxDecodedFromUtf8Length(srcLength); - if (sourceIndex == srcLength) - goto DoneExit; - } - else if (Ssse3.IsSupported && maxSrcLength >= 24) - { - Ssse3Decode(ref srcBytes, ref destBytes, maxSrcLength, destLength, ref sourceIndex, ref destIndex); + // max. 2 padding chars + if (destLength < decodedLength - 2) + { + // For overflow see comment below + maxSrcLength = destLength / 3 * 4; + } - if (sourceIndex == srcLength) - goto DoneExit; - } + byte* src = srcBytes; + byte* dest = destBytes; + byte* srcEnd = srcBytes + (nuint)srcLength; + byte* srcMax = srcBytes + (nuint)maxSrcLength; - // Last bytes could have padding characters, so process them separately and treat them as valid only if isFinalBlock is true - // if isFinalBlock is false, padding characters are considered invalid - int skipLastChunk = isFinalBlock ? 4 : 0; + if (maxSrcLength >= 24) + { + byte* end = srcMax - 45; + if (Avx2.IsSupported && (end >= src)) + { + Avx2Decode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); + + if (src == srcEnd) + goto DoneExit; + } + + end = srcMax - 24; + if (Ssse3.IsSupported && (end >= src)) + { + Ssse3Decode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); + + if (src == srcEnd) + goto DoneExit; + } + } - if (destLength >= decodedLength) - { - maxSrcLength = srcLength - skipLastChunk; - } - else - { - // This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733) - // Therefore, (destLength / 3) * 4 will always be less than 2147483641 - maxSrcLength = (destLength / 3) * 4; - } + // Last bytes could have padding characters, so process them separately and treat them as valid only if isFinalBlock is true + // if isFinalBlock is false, padding characters are considered invalid + bool isDestinationTooSmall; - ref sbyte decodingMap = ref s_decodingMap[0]; + if (destLength >= decodedLength) + { + isDestinationTooSmall = false; + maxSrcLength = isFinalBlock ? srcLength - 4 : srcLength; + } + else + { + // This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733) + // Therefore, (destLength / 3) * 4 will always be less than 2147483641 + maxSrcLength = (destLength / 3) * 4; + isDestinationTooSmall = true; + } - // In order to elide the movsxd in the loop - if (sourceIndex < maxSrcLength) - { - do + srcMax = srcBytes + (nuint)maxSrcLength; + while (src < srcMax) { - int result = Decode(ref Unsafe.Add(ref srcBytes, (IntPtr)sourceIndex), ref decodingMap); + int result = Decode(src, decodingMap); if (result < 0) goto InvalidDataExit; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, (IntPtr)destIndex), result); - destIndex += 3; - sourceIndex += 4; + WriteThreeLowOrderBytes(dest, result); + src += 4; + dest += 3; } - while (sourceIndex < (uint)maxSrcLength); - } - if (maxSrcLength != srcLength - skipLastChunk) - goto DestinationTooSmallExit; + if (isDestinationTooSmall) + goto DestinationTooSmallExit; - // If input is less than 4 bytes, srcLength == sourceIndex == 0 - // If input is not a multiple of 4, sourceIndex == srcLength != 0 - if (sourceIndex == srcLength) - { - if (isFinalBlock) - goto InvalidDataExit; - goto NeedMoreDataExit; - } + // If input is less than 4 bytes, srcLength == sourceIndex == 0 + // If input is not a multiple of 4, sourceIndex == srcLength != 0 + if (src == srcEnd) + { + if (isFinalBlock) + goto InvalidDataExit; + goto NeedMoreDataExit; + } - // if isFinalBlock is false, we will never reach this point + // if isFinalBlock is false, we will never reach this point - // Handle last four bytes. There are 0, 1, 2 padding chars. - uint t0, t1, t2, t3; - t0 = Unsafe.Add(ref srcBytes, (IntPtr)(uint)(srcLength - 4)); - t1 = Unsafe.Add(ref srcBytes, (IntPtr)(uint)(srcLength - 3)); - t2 = Unsafe.Add(ref srcBytes, (IntPtr)(uint)(srcLength - 2)); - t3 = Unsafe.Add(ref srcBytes, (IntPtr)(uint)(srcLength - 1)); + // Handle last four bytes. There are 0, 1, 2 padding chars. + uint t0 = srcEnd[-4]; + uint t1 = srcEnd[-3]; + uint t2 = srcEnd[-2]; + uint t3 = srcEnd[-1]; - int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); - int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); + int i0 = decodingMap[t0]; + int i1 = decodingMap[t1]; - i0 <<= 18; - i1 <<= 12; + i0 <<= 18; + i1 <<= 12; - i0 |= i1; + i0 |= i1; - if (t3 != EncodingPad) - { - int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); - int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); + byte* destMax = destBytes + (nuint)destLength; - i2 <<= 6; + if (t3 != EncodingPad) + { + int i2 = decodingMap[t2]; + int i3 = decodingMap[t3]; - i0 |= i3; - i0 |= i2; + i2 <<= 6; - if (i0 < 0) - goto InvalidDataExit; - if (destIndex > destLength - 3) - goto DestinationTooSmallExit; + i0 |= i3; + i0 |= i2; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, (IntPtr)destIndex), i0); - destIndex += 3; - } - else if (t2 != EncodingPad) - { - int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); + if (i0 < 0) + goto InvalidDataExit; + if (dest + 3 > destMax) + goto DestinationTooSmallExit; - i2 <<= 6; + WriteThreeLowOrderBytes(dest, i0); + dest += 3; + } + else if (t2 != EncodingPad) + { + int i2 = decodingMap[t2]; - i0 |= i2; + i2 <<= 6; - if (i0 < 0) - goto InvalidDataExit; - if (destIndex > destLength - 2) - goto DestinationTooSmallExit; + i0 |= i2; - Unsafe.Add(ref destBytes, (IntPtr)destIndex) = (byte)(i0 >> 16); - Unsafe.Add(ref destBytes, (IntPtr)(destIndex + 1)) = (byte)(i0 >> 8); - destIndex += 2; - } - else - { - if (i0 < 0) - goto InvalidDataExit; - if (destIndex > destLength - 1) - goto DestinationTooSmallExit; + if (i0 < 0) + goto InvalidDataExit; + if (dest + 2 > destMax) + goto DestinationTooSmallExit; - Unsafe.Add(ref destBytes, (IntPtr)destIndex) = (byte)(i0 >> 16); - destIndex += 1; - } + dest[0] = (byte)(i0 >> 16); + dest[1] = (byte)(i0 >> 8); + dest += 2; + } + else + { + if (i0 < 0) + goto InvalidDataExit; + if (dest + 1 > destMax) + goto DestinationTooSmallExit; - sourceIndex += 4; + dest[0] = (byte)(i0 >> 16); + dest += 1; + } - if (srcLength != utf8.Length) - goto InvalidDataExit; + src += 4; - DoneExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.Done; + if (srcLength != utf8.Length) + goto InvalidDataExit; + + DoneExit: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.Done; - DestinationTooSmallExit: - if (srcLength != utf8.Length && isFinalBlock) - goto InvalidDataExit; // if input is not a multiple of 4, and there is no more data, return invalid data instead + DestinationTooSmallExit: + if (srcLength != utf8.Length && isFinalBlock) + goto InvalidDataExit; // if input is not a multiple of 4, and there is no more data, return invalid data instead - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.DestinationTooSmall; + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.DestinationTooSmall; - NeedMoreDataExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.NeedMoreData; + NeedMoreDataExit: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.NeedMoreData; - InvalidDataExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.InvalidData; + InvalidDataExit: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.InvalidData; + } } /// @@ -233,109 +247,103 @@ public static int GetMaxDecodedFromUtf8Length(int length) /// The number of bytes written into the buffer. /// It returns the OperationStatus enum values: /// - Done - on successful processing of the entire input span - /// - InvalidData - if the input contains bytes outside of the expected base 64 range, or if it contains invalid/more than two padding characters, + /// - InvalidData - if the input contains bytes outside of the expected base 64 range, or if it contains invalid/more than two padding characters, /// or if the input is incomplete (i.e. not a multiple of 4). /// It does not return DestinationTooSmall since that is not possible for base 64 decoding. - /// It does not return NeedMoreData since this method tramples the data in the buffer and + /// It does not return NeedMoreData since this method tramples the data in the buffer and /// hence can only be called once with all the data in the buffer. - /// - public static OperationStatus DecodeFromUtf8InPlace(Span buffer, out int bytesWritten) + /// + public static unsafe OperationStatus DecodeFromUtf8InPlace(Span buffer, out int bytesWritten) { - int bufferLength = buffer.Length; - uint sourceIndex = 0; - uint destIndex = 0; + fixed (byte* bufferBytes = buffer) + fixed (sbyte* decodingMap = s_decodingMap) + { + int bufferLength = buffer.Length; + uint sourceIndex = 0; + uint destIndex = 0; - // only decode input if it is a multiple of 4 - if (bufferLength != ((bufferLength >> 2) * 4)) - goto InvalidExit; - if (bufferLength == 0) - goto DoneExit; + // only decode input if it is a multiple of 4 + if (bufferLength != ((bufferLength >> 2) * 4)) + goto InvalidExit; + if (bufferLength == 0) + goto DoneExit; - ref byte bufferBytes = ref MemoryMarshal.GetReference(buffer); + while (sourceIndex < bufferLength - 4) + { + int result = Decode(bufferBytes + sourceIndex, decodingMap); + if (result < 0) + goto InvalidExit; + WriteThreeLowOrderBytes(bufferBytes + destIndex, result); + destIndex += 3; + sourceIndex += 4; + } - ref sbyte decodingMap = ref s_decodingMap[0]; + uint t0 = bufferBytes[bufferLength - 4]; + uint t1 = bufferBytes[bufferLength - 3]; + uint t2 = bufferBytes[bufferLength - 2]; + uint t3 = bufferBytes[bufferLength - 1]; - while (sourceIndex < bufferLength - 4) - { - int result = Decode(ref Unsafe.Add(ref bufferBytes, (IntPtr)sourceIndex), ref decodingMap); - if (result < 0) - goto InvalidExit; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref bufferBytes, (IntPtr)destIndex), result); - destIndex += 3; - sourceIndex += 4; - } + int i0 = decodingMap[t0]; + int i1 = decodingMap[t1]; - uint t0, t1, t2, t3; - uint n = (uint)(bufferLength - 4); - t0 = Unsafe.Add(ref bufferBytes, (IntPtr)n); - t1 = Unsafe.Add(ref bufferBytes, (IntPtr)(n + 1)); - t2 = Unsafe.Add(ref bufferBytes, (IntPtr)(n + 2)); - t3 = Unsafe.Add(ref bufferBytes, (IntPtr)(n + 3)); + i0 <<= 18; + i1 <<= 12; - int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); - int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); + i0 |= i1; - i0 <<= 18; - i1 <<= 12; + if (t3 != EncodingPad) + { + int i2 = decodingMap[t2]; + int i3 = decodingMap[t3]; - i0 |= i1; + i2 <<= 6; - if (t3 != EncodingPad) - { - int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); - int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); + i0 |= i3; + i0 |= i2; - i2 <<= 6; + if (i0 < 0) + goto InvalidExit; - i0 |= i3; - i0 |= i2; + WriteThreeLowOrderBytes(bufferBytes + destIndex, i0); + destIndex += 3; + } + else if (t2 != EncodingPad) + { + int i2 = decodingMap[t2]; - if (i0 < 0) - goto InvalidExit; + i2 <<= 6; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref bufferBytes, (IntPtr)destIndex), i0); - destIndex += 3; - } - else if (t2 != EncodingPad) - { - int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); + i0 |= i2; - i2 <<= 6; + if (i0 < 0) + goto InvalidExit; - i0 |= i2; + bufferBytes[destIndex] = (byte)(i0 >> 16); + bufferBytes[destIndex + 1] = (byte)(i0 >> 8); + destIndex += 2; + } + else + { + if (i0 < 0) + goto InvalidExit; - if (i0 < 0) - goto InvalidExit; + bufferBytes[destIndex] = (byte)(i0 >> 16); + destIndex += 1; + } - Unsafe.Add(ref bufferBytes, (IntPtr)destIndex) = (byte)(i0 >> 16); - Unsafe.Add(ref bufferBytes, (IntPtr)(destIndex + 1)) = (byte)(i0 >> 8); - destIndex += 2; - } - else - { - if (i0 < 0) - goto InvalidExit; + DoneExit: + bytesWritten = (int)destIndex; + return OperationStatus.Done; - Unsafe.Add(ref bufferBytes, (IntPtr)destIndex) = (byte)(i0 >> 16); - destIndex += 1; + InvalidExit: + bytesWritten = (int)destIndex; + return OperationStatus.InvalidData; } - - DoneExit: - bytesWritten = (int)destIndex; - return OperationStatus.Done; - - InvalidExit: - bytesWritten = (int)destIndex; - return OperationStatus.InvalidData; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Avx2Decode(ref byte src, ref byte destBytes, int sourceLength, int destLength, ref uint sourceIndex, ref uint destIndex) + private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) { - ref byte srcStart = ref src; - ref byte destStart = ref destBytes; - ref byte simdSrcEnd = ref Unsafe.Add(ref src, (IntPtr)((uint)sourceLength - 45 + 1)); - // The JIT won't hoist these "constants", so help it Vector256 lutHi = s_avxDecodeLutHi; Vector256 lutLo = s_avxDecodeLutLo; @@ -346,11 +354,14 @@ private static void Avx2Decode(ref byte src, ref byte destBytes, int sourceLengt Vector256 shuffleVec = s_avxDecodeShuffleVec; Vector256 permuteVec = s_avxDecodePermuteVec; + byte* src = srcBytes; + byte* dest = destBytes; + //while (remaining >= 45) do { - AssertRead>(ref src, ref srcStart, sourceLength); - Vector256 str = Unsafe.As>(ref src); + AssertRead>(src, srcStart, sourceLength); + Vector256 str = Avx.LoadVector256(src).AsSByte(); Vector256 hiNibbles = Avx2.And(Avx2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F); Vector256 loNibbles = Avx2.And(str, mask2F); @@ -371,31 +382,21 @@ private static void Avx2Decode(ref byte src, ref byte destBytes, int sourceLengt output = Avx2.Shuffle(output.AsSByte(), shuffleVec).AsInt32(); str = Avx2.PermuteVar8x32(output, permuteVec).AsSByte(); - AssertWrite>(ref destBytes, ref destStart, destLength); - // As has better CQ than WriteUnaligned - // https://github.com/dotnet/coreclr/issues/21132 - Unsafe.As>(ref destBytes) = str; + AssertWrite>(dest, destStart, destLength); + Avx.Store(dest, str.AsByte()); - src = ref Unsafe.Add(ref src, 32); - destBytes = ref Unsafe.Add(ref destBytes, 24); + src += 32; + dest += 24; } - while (Unsafe.IsAddressLessThan(ref src, ref simdSrcEnd)); - - // Cast to ulong to avoid the overflow-check. Codegen for x86 is still good. - sourceIndex = (uint)(ulong)Unsafe.ByteOffset(ref srcStart, ref src); - destIndex = (uint)(ulong)Unsafe.ByteOffset(ref destStart, ref destBytes); + while (src <= srcEnd); - src = ref srcStart; - destBytes = ref destStart; + srcBytes = src; + destBytes = dest; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Ssse3Decode(ref byte src, ref byte destBytes, int sourceLength, int destLength, ref uint sourceIndex, ref uint destIndex) + private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) { - ref byte srcStart = ref src; - ref byte destStart = ref destBytes; - ref byte simdSrcEnd = ref Unsafe.Add(ref src, (IntPtr)((uint)sourceLength - 24 + 1)); - // The JIT won't hoist these "constants", so help it Vector128 lutHi = s_sseDecodeLutHi; Vector128 lutLo = s_sseDecodeLutLo; @@ -405,11 +406,14 @@ private static void Ssse3Decode(ref byte src, ref byte destBytes, int sourceLeng Vector128 shuffleConstant1 = Vector128.Create(0x00011000).AsInt16(); Vector128 shuffleVec = s_sseDecodeShuffleVec; + byte* src = srcBytes; + byte* dest = destBytes; + //while (remaining >= 24) do { - AssertRead>(ref src, ref srcStart, sourceLength); - Vector128 str = Unsafe.As>(ref src); + AssertRead>(src, srcStart, sourceLength); + Vector128 str = Sse2.LoadVector128(src).AsSByte(); Vector128 hiNibbles = Sse2.And(Sse2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F); Vector128 loNibbles = Sse2.And(str, mask2F); @@ -428,38 +432,30 @@ private static void Ssse3Decode(ref byte src, ref byte destBytes, int sourceLeng Vector128 output = Sse2.MultiplyAddAdjacent(merge_ab_and_bc, shuffleConstant1); str = Ssse3.Shuffle(output.AsSByte(), shuffleVec); - AssertWrite>(ref destBytes, ref destStart, destLength); - // As has better CQ than WriteUnaligned - // https://github.com/dotnet/coreclr/issues/21132 - Unsafe.As>(ref destBytes) = str; + AssertWrite>(dest, destStart, destLength); + Sse2.Store(dest, str.AsByte()); - src = ref Unsafe.Add(ref src, 16); - destBytes = ref Unsafe.Add(ref destBytes, 12); + src += 16; + dest += 12; } - while (Unsafe.IsAddressLessThan(ref src, ref simdSrcEnd)); - - // Cast to ulong to avoid the overflow-check. Codegen for x86 is still good. - sourceIndex = (uint)(ulong)Unsafe.ByteOffset(ref srcStart, ref src); - destIndex = (uint)(ulong)Unsafe.ByteOffset(ref destStart, ref destBytes); + while (src <= srcEnd); - src = ref srcStart; - destBytes = ref destStart; + srcBytes = src; + destBytes = dest; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int Decode(ref byte encodedBytes, ref sbyte decodingMap) + private static unsafe int Decode(byte* encodedBytes, sbyte* decodingMap) { - uint t0, t1, t2, t3; - - t0 = encodedBytes; - t1 = Unsafe.Add(ref encodedBytes, 1); - t2 = Unsafe.Add(ref encodedBytes, 2); - t3 = Unsafe.Add(ref encodedBytes, 3); + nuint t0 = encodedBytes[0]; + nuint t1 = encodedBytes[1]; + nuint t2 = encodedBytes[2]; + nuint t3 = encodedBytes[3]; - int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); - int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); - int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); - int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); + int i0 = decodingMap[t0]; + int i1 = decodingMap[t1]; + int i2 = decodingMap[t2]; + int i3 = decodingMap[t3]; i0 <<= 18; i1 <<= 12; @@ -473,11 +469,11 @@ private static int Decode(ref byte encodedBytes, ref sbyte decodingMap) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteThreeLowOrderBytes(ref byte destination, int value) + private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) { - destination = (byte)(value >> 16); - Unsafe.Add(ref destination, 1) = (byte)(value >> 8); - Unsafe.Add(ref destination, 2) = (byte)value; + destination[0] = (byte)(value >> 16); + destination[1] = (byte)(value >> 8); + destination[2] = (byte)(value); } // Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests) diff --git a/src/System.Memory/src/System/Buffers/Text/Base64Encoder.cs b/src/System.Memory/src/System/Buffers/Text/Base64Encoder.cs index fceef6665fd0..54a355deac63 100644 --- a/src/System.Memory/src/System/Buffers/Text/Base64Encoder.cs +++ b/src/System.Memory/src/System/Buffers/Text/Base64Encoder.cs @@ -3,11 +3,16 @@ // See the LICENSE file in the project root for more information. using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; +#if BIT64 +using nuint = System.UInt64; +#else +using nuint = System.UInt32; +#endif + namespace System.Buffers.Text { // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx2 @@ -25,112 +30,113 @@ public static partial class Base64 /// The output span which contains the result of the operation, i.e. the UTF-8 encoded text in base 64. /// The number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary. /// The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. - /// True (default) when the input span contains the entire data to encode. + /// True (default) when the input span contains the entire data to encode. /// Set to false only if it is known that the input span contains partial data with more data to follow. /// It returns the OperationStatus enum values: /// - Done - on successful processing of the entire input span /// - DestinationTooSmall - if there is not enough space in the output span to fit the encoded input /// - NeedMoreData - only if isFinalBlock is false, otherwise the output is padded if the input is not a multiple of 3 /// It does not return InvalidData since that is not possible for base 64 encoding. - /// - public static OperationStatus EncodeToUtf8(ReadOnlySpan bytes, Span utf8, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) + /// + public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan bytes, Span utf8, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { - // PERF: use uint to avoid the sign-extensions - uint sourceIndex = 0; - uint destIndex = 0; - if (bytes.IsEmpty) - goto DoneExit; - - ref byte srcBytes = ref MemoryMarshal.GetReference(bytes); - ref byte destBytes = ref MemoryMarshal.GetReference(utf8); - - int srcLength = bytes.Length; - int destLength = utf8.Length; - int maxSrcLength = srcLength; - - if (srcLength <= MaximumEncodeLength && destLength >= GetMaxEncodedToUtf8Length(srcLength)) - { - maxSrcLength = srcLength; - } - else { - maxSrcLength = (destLength >> 2) * 3; + bytesConsumed = 0; + bytesWritten = 0; + return OperationStatus.Done; } - if (srcLength < 16) - goto Scalar; - - if (Avx2.IsSupported && maxSrcLength >= 32) + fixed (byte* srcBytes = bytes) + fixed (byte* destBytes = utf8) + fixed (byte* encodingMap = s_encodingMap) { - Avx2Encode(ref srcBytes, ref destBytes, maxSrcLength, destLength, ref sourceIndex, ref destIndex); - - if (sourceIndex == srcLength) - goto DoneExit; - } + int srcLength = bytes.Length; + int destLength = utf8.Length; + int maxSrcLength; - if (Ssse3.IsSupported && (maxSrcLength >= (int)sourceIndex + 16)) - { - Ssse3Encode(ref srcBytes, ref destBytes, maxSrcLength, destLength, ref sourceIndex, ref destIndex); + if (srcLength <= MaximumEncodeLength && destLength >= GetMaxEncodedToUtf8Length(srcLength)) + { + maxSrcLength = srcLength; + } + else + { + maxSrcLength = (destLength >> 2) * 3; + } - if (sourceIndex == srcLength) - goto DoneExit; - } + byte* src = srcBytes; + byte* dest = destBytes; + byte* srcEnd = srcBytes + (nuint)srcLength; + byte* srcMax = srcBytes + (nuint)maxSrcLength; - Scalar: - maxSrcLength -= 2; - uint result = 0; + if (maxSrcLength >= 16) + { + byte* end = srcMax - 32; + if (Avx2.IsSupported && (end >= src)) + { + Avx2Encode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); + + if (src == srcEnd) + goto DoneExit; + } + + end = srcMax - 16; + if (Ssse3.IsSupported && (end >= src)) + { + Ssse3Encode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); + + if (src == srcEnd) + goto DoneExit; + } + } - ref byte encodingMap = ref s_encodingMap[0]; + uint result = 0; - // In order to elide the movsxd in the loop - if (sourceIndex < maxSrcLength) - { - do + srcMax -= 2; + while (src < srcMax) { - result = Encode(ref Unsafe.Add(ref srcBytes, (IntPtr)sourceIndex), ref encodingMap); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref destBytes, (IntPtr)destIndex), result); - destIndex += 4; - sourceIndex += 3; + result = Encode(src, encodingMap); + Unsafe.WriteUnaligned(dest, result); + src += 3; + dest += 4; } - while (sourceIndex < (uint)maxSrcLength); - } - if (maxSrcLength != srcLength - 2) - goto DestinationTooSmallExit; + if (srcMax + 2 != srcEnd) + goto DestinationTooSmallExit; - if (!isFinalBlock) - goto NeedMoreDataExit; + if (!isFinalBlock) + goto NeedMoreData; - if (sourceIndex == srcLength - 1) - { - result = EncodeAndPadTwo(ref Unsafe.Add(ref srcBytes, (IntPtr)sourceIndex), ref encodingMap); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref destBytes, (IntPtr)destIndex), result); - destIndex += 4; - sourceIndex += 1; - } - else if (sourceIndex == srcLength - 2) - { - result = EncodeAndPadOne(ref Unsafe.Add(ref srcBytes, (IntPtr)sourceIndex), ref encodingMap); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref destBytes, (IntPtr)destIndex), result); - destIndex += 4; - sourceIndex += 2; - } + if (src + 1 == srcEnd) + { + result = EncodeAndPadTwo(src, encodingMap); + Unsafe.WriteUnaligned(dest, result); + src += 1; + dest += 4; + } + else if (src + 2 == srcEnd) + { + result = EncodeAndPadOne(src, encodingMap); + Unsafe.WriteUnaligned(dest, result); + src += 2; + dest += 4; + } - DoneExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.Done; + DoneExit: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.Done; - NeedMoreDataExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.NeedMoreData; + DestinationTooSmallExit: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.DestinationTooSmall; - DestinationTooSmallExit: - bytesConsumed = (int)sourceIndex; - bytesWritten = (int)destIndex; - return OperationStatus.DestinationTooSmall; + NeedMoreData: + bytesConsumed = (int)(src - srcBytes); + bytesWritten = (int)(dest - destBytes); + return OperationStatus.NeedMoreData; + } } /// @@ -149,12 +155,12 @@ public static int GetMaxEncodedToUtf8Length(int length) } /// - /// Encode the span of binary data (in-place) into UTF-8 encoded text represented as base 64. + /// Encode the span of binary data (in-place) into UTF-8 encoded text represented as base 64. /// The encoded text output is larger than the binary data contained in the input (the operation inflates the data). /// - /// The input span which contains binary data that needs to be encoded. + /// The input span which contains binary data that needs to be encoded. /// It needs to be large enough to fit the result of the operation. - /// The amount of binary data contained within the buffer that needs to be encoded + /// The amount of binary data contained within the buffer that needs to be encoded /// (and needs to be smaller than the buffer length). /// The number of bytes written into the buffer. /// It returns the OperationStatus enum values: @@ -162,63 +168,60 @@ public static int GetMaxEncodedToUtf8Length(int length) /// - DestinationTooSmall - if there is not enough space in the buffer beyond dataLength to fit the result of encoding the input /// It does not return NeedMoreData since this method tramples the data in the buffer and hence can only be called once with all the data in the buffer. /// It does not return InvalidData since that is not possible for base 64 encoding. - /// - public static OperationStatus EncodeToUtf8InPlace(Span buffer, int dataLength, out int bytesWritten) + /// + public static unsafe OperationStatus EncodeToUtf8InPlace(Span buffer, int dataLength, out int bytesWritten) { - int encodedLength = GetMaxEncodedToUtf8Length(dataLength); - if (buffer.Length < encodedLength) - goto FalseExit; - - int leftover = dataLength - (dataLength / 3) * 3; // how many bytes after packs of 3 + fixed (byte* bufferBytes = buffer) + fixed (byte* encodingMap = s_encodingMap) + { + int encodedLength = GetMaxEncodedToUtf8Length(dataLength); + if (buffer.Length < encodedLength) + goto FalseExit; - // PERF: use uint to avoid the sign-extensions - uint destinationIndex = (uint)(encodedLength - 4); - uint sourceIndex = (uint)(dataLength - leftover); - uint result = 0; + int leftover = dataLength - (dataLength / 3) * 3; // how many bytes after packs of 3 - ref byte encodingMap = ref s_encodingMap[0]; - ref byte bufferBytes = ref MemoryMarshal.GetReference(buffer); + // PERF: use nuint to avoid the sign-extensions + nuint destinationIndex = (nuint)(encodedLength - 4); + nuint sourceIndex = (nuint)(dataLength - leftover); + uint result = 0; - // encode last pack to avoid conditional in the main loop - if (leftover != 0) - { - if (leftover == 1) + // encode last pack to avoid conditional in the main loop + if (leftover != 0) { - result = EncodeAndPadTwo(ref Unsafe.Add(ref bufferBytes, (IntPtr)sourceIndex), ref encodingMap); + if (leftover == 1) + { + result = EncodeAndPadTwo(bufferBytes + sourceIndex, encodingMap); + } + else + { + result = EncodeAndPadOne(bufferBytes + sourceIndex, encodingMap); + } + + Unsafe.WriteUnaligned(bufferBytes + destinationIndex, result); + destinationIndex -= 4; } - else + + sourceIndex -= 3; + while ((int)sourceIndex >= 0) { - result = EncodeAndPadOne(ref Unsafe.Add(ref bufferBytes, (IntPtr)sourceIndex), ref encodingMap); + result = Encode(bufferBytes + sourceIndex, encodingMap); + Unsafe.WriteUnaligned(bufferBytes + destinationIndex, result); + destinationIndex -= 4; + sourceIndex -= 3; } - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bufferBytes, (IntPtr)destinationIndex), result); - destinationIndex -= 4; - } + bytesWritten = encodedLength; + return OperationStatus.Done; - sourceIndex -= 3; - while ((int)sourceIndex >= 0) - { - result = Encode(ref Unsafe.Add(ref bufferBytes, (IntPtr)sourceIndex), ref encodingMap); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bufferBytes, (IntPtr)destinationIndex), result); - destinationIndex -= 4; - sourceIndex -= 3; + FalseExit: + bytesWritten = 0; + return OperationStatus.DestinationTooSmall; } - - bytesWritten = encodedLength; - return OperationStatus.Done; - - FalseExit: - bytesWritten = 0; - return OperationStatus.DestinationTooSmall; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Avx2Encode(ref byte src, ref byte dest, int sourceLength, int destLength, ref uint sourceIndex, ref uint destIndex) + private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) { - ref byte srcStart = ref src; - ref byte destStart = ref dest; - ref byte simdSrcEnd = ref Unsafe.Add(ref src, (IntPtr)((uint)sourceLength - 28)); // 28 = 32 - 4 - // The JIT won't hoist these "constants", so help it Vector256 shuffleVec = s_avxEncodeShuffleVec; Vector256 shuffleConstant0 = Vector256.Create(0x0fc0fc00).AsSByte(); @@ -229,13 +232,19 @@ private static void Avx2Encode(ref byte src, ref byte dest, int sourceLength, in Vector256 translationContant1 = Vector256.Create((sbyte)25); Vector256 lut = s_avxEncodeLut; + byte* src = srcBytes; + byte* dest = destBytes; + // first load is done at c-0 not to get a segfault - AssertRead>(ref src, ref srcStart, sourceLength); - Vector256 str = Unsafe.As>(ref src); + AssertRead>(src, srcStart, sourceLength); + Vector256 str = Avx.LoadVector256(src).AsSByte(); - // shift by 4 bytes, as required by enc_reshuffle + // shift by 4 bytes, as required by Reshuffle str = Avx2.PermuteVar8x32(str.AsInt32(), s_avxEncodePermuteVec).AsSByte(); + // Next loads are done at src-4, as required by Reshuffle, so shift it once + src -= 4; + while (true) { // Reshuffle @@ -252,41 +261,27 @@ private static void Avx2Encode(ref byte src, ref byte dest, int sourceLength, in Vector256 tmp = Avx2.Subtract(indices.AsSByte(), mask); str = Avx2.Add(str, Avx2.Shuffle(lut, tmp)); - AssertWrite>(ref dest, ref destStart, destLength); - // As has better CQ than WriteUnaligned - // https://github.com/dotnet/coreclr/issues/21132 - Unsafe.As>(ref dest) = str; + AssertWrite>(dest, destStart, destLength); + Avx.Store(dest, str.AsByte()); - src = ref Unsafe.Add(ref src, 24); - dest = ref Unsafe.Add(ref dest, 32); + src += 24; + dest += 32; - if (Unsafe.IsAddressGreaterThan(ref src, ref simdSrcEnd)) + if (src > srcEnd) break; - // Load at c-4, as required by enc_reshuffle - AssertRead>(ref Unsafe.Add(ref src, -4), ref srcStart, sourceLength); - str = Unsafe.As>(ref Unsafe.Add(ref src, -4)); + // Load at src-4, as required by Reshuffle (already shifted by -4) + AssertRead>(src, srcStart, sourceLength); + str = Avx.LoadVector256(src).AsSByte(); } - // Cast to ulong to avoid the overflow-check. Codegen for x86 is still good. - sourceIndex = (uint)(ulong)Unsafe.ByteOffset(ref srcStart, ref src); - destIndex = (uint)(ulong)Unsafe.ByteOffset(ref destStart, ref dest); - - src = ref srcStart; - dest = ref destStart; + srcBytes = src + 4; + destBytes = dest; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Ssse3Encode(ref byte src, ref byte dest, int sourceLength, int destLength, ref uint sourceIndex, ref uint destIndex) + private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) { - ref byte srcStart = ref src; - ref byte destStart = ref dest; - ref byte simdSrcEnd = ref Unsafe.Add(ref src, (IntPtr)((uint)sourceLength - 16 + 1)); - - // Shift to workspace - src = ref Unsafe.Add(ref src, (IntPtr)sourceIndex); - dest = ref Unsafe.Add(ref dest, (IntPtr)destIndex); - // The JIT won't hoist these "constants", so help it Vector128 shuffleVec = s_sseEncodeShuffleVec; Vector128 shuffleConstant0 = Vector128.Create(0x0fc0fc00).AsSByte(); @@ -297,11 +292,14 @@ private static void Ssse3Encode(ref byte src, ref byte dest, int sourceLength, i Vector128 translationContant1 = Vector128.Create((sbyte)25); Vector128 lut = s_sseEncodeLut; + byte* src = srcBytes; + byte* dest = destBytes; + //while (remaining >= 16) - while (Unsafe.IsAddressLessThan(ref src, ref simdSrcEnd)) + do { - AssertRead>(ref src, ref srcStart, sourceLength); - Vector128 str = Unsafe.As>(ref src); + AssertRead>(src, srcStart, sourceLength); + Vector128 str = Sse2.LoadVector128(src).AsSByte(); // Reshuffle str = Ssse3.Shuffle(str, shuffleVec); @@ -317,57 +315,61 @@ private static void Ssse3Encode(ref byte src, ref byte dest, int sourceLength, i Vector128 tmp = Sse2.Subtract(indices.AsSByte(), mask); str = Sse2.Add(str, Ssse3.Shuffle(lut, tmp)); - AssertWrite>(ref dest, ref destStart, destLength); - // As has better CQ than WriteUnaligned - // https://github.com/dotnet/coreclr/issues/21132 - Unsafe.As>(ref dest) = str; + AssertWrite>(dest, destStart, destLength); + Sse2.Store(dest, str.AsByte()); - src = ref Unsafe.Add(ref src, 12); - dest = ref Unsafe.Add(ref dest, 16); + src += 12; + dest += 16; } + while (src <= srcEnd); - // Cast to ulong to avoid the overflow-check. Codegen for x86 is still good. - sourceIndex = (uint)(ulong)Unsafe.ByteOffset(ref srcStart, ref src); - destIndex = (uint)(ulong)Unsafe.ByteOffset(ref destStart, ref dest); - - src = ref srcStart; - dest = ref destStart; + srcBytes = src; + destBytes = dest; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint Encode(ref byte threeBytes, ref byte encodingMap) + private static unsafe uint Encode(byte* threeBytes, byte* encodingMap) { - uint i = (uint)((threeBytes << 16) | (Unsafe.Add(ref threeBytes, 1) << 8) | Unsafe.Add(ref threeBytes, 2)); + nuint t0 = threeBytes[0]; + nuint t1 = threeBytes[1]; + nuint t2 = threeBytes[2]; - uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); - uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); - uint i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); - uint i3 = Unsafe.Add(ref encodingMap, (IntPtr)(i & 0x3F)); + nuint i = (t0 << 16) | (t1 << 8) | t2; - return i0 | (i1 << 8) | (i2 << 16) | (i3 << 24); + nuint i0 = encodingMap[i >> 18]; + nuint i1 = encodingMap[(i >> 12) & 0x3F]; + nuint i2 = encodingMap[(i >> 6) & 0x3F]; + nuint i3 = encodingMap[i & 0x3F]; + + return (uint)(i0 | (i1 << 8) | (i2 << 16) | (i3 << 24)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint EncodeAndPadOne(ref byte twoBytes, ref byte encodingMap) + private static unsafe uint EncodeAndPadOne(byte* twoBytes, byte* encodingMap) { - uint i = (uint)((twoBytes << 16) | (Unsafe.Add(ref twoBytes, 1) << 8)); + nuint t0 = twoBytes[0]; + nuint t1 = twoBytes[1]; + + nuint i = (t0 << 16) | (t1 << 8); - uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); - uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); - uint i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); + nuint i0 = encodingMap[i >> 18]; + nuint i1 = encodingMap[(i >> 12) & 0x3F]; + nuint i2 = encodingMap[(i >> 6) & 0x3F]; - return i0 | (i1 << 8) | (i2 << 16) | (EncodingPad << 24); + return (uint)(i0 | (i1 << 8) | (i2 << 16) | (EncodingPad << 24)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint EncodeAndPadTwo(ref byte oneByte, ref byte encodingMap) + private static unsafe uint EncodeAndPadTwo(byte* oneByte, byte* encodingMap) { - uint i = (uint)(oneByte << 8); + nuint t0 = oneByte[0]; + + nuint i = t0 << 8; - uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 10)); - uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 4) & 0x3F)); + nuint i0 = encodingMap[i >> 10]; + nuint i1 = encodingMap[(i >> 4) & 0x3F]; - return i0 | (i1 << 8) | (EncodingPad << 16) | (EncodingPad << 24); + return (uint)(i0 | (i1 << 8) | (EncodingPad << 16) | (EncodingPad << 24)); } // Pre-computing this table using a custom string(s_characters) and GenerateEncodingMapAndVerify (found in tests)