Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release/5.0-preview8] ARM64 HWIntrinsic usage in System.Text.Unicode #39738

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using System.Numerics;

Expand Down Expand Up @@ -78,7 +79,7 @@ static Utf16Utility()
long tempUtf8CodeUnitCountAdjustment = 0;
int tempScalarCountAdjustment = 0;

if (Sse2.IsSupported)
if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported)
{
if (inputLength >= Vector128<ushort>.Count)
{
Expand All @@ -87,17 +88,34 @@ static Utf16Utility()
Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
Vector128<ushort> vectorZero = Vector128<ushort>.Zero;

Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
Vector128.Create(0x80402010_08040201).AsByte() :
Vector128.Create(0x01020408_10204080).AsByte();

do
{
Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
uint mask;
Vector128<ushort> utf16Data;
if (AdvSimd.Arm64.IsSupported)
{
utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned
}
else
{
utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
}

Vector128<ushort> charIsNonAscii;
if (Sse41.IsSupported)

if (AdvSimd.Arm64.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
}
else if (Sse41.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)

charIsNonAscii = Sse41.Min(utf16Data, vector0080);
}
else
Expand All @@ -111,16 +129,34 @@ static Utf16Utility()

#if DEBUG
// Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
uint debugMask;
if (AdvSimd.Arm64.IsSupported)
{
debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte(), bitMask128);
}
else
{
debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
}
Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'.");
#endif // DEBUG

// Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
// input was 0x0800 <= [value]. This also handles the missing range a few lines above.

Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
Vector128<ushort> charIsThreeByteUtf8Encoded;
uint mask;

mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
if (AdvSimd.IsSupported)
{
charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte(), bitMask128);
}
else
{
charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
}

// Each even bit of mask will be 1 only if the char was >= 0x0080,
// and each odd bit of mask will be 1 only if the char was >= 0x0800.
Expand Down Expand Up @@ -151,9 +187,16 @@ static Utf16Utility()
// Surrogates need to be special-cased for two reasons: (a) we need
// to account for the fact that we over-counted in the addition above;
// and (b) they require separate validation.

utf16Data = Sse2.Add(utf16Data, vectorA800);
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
if (AdvSimd.Arm64.IsSupported)
{
utf16Data = AdvSimd.Add(utf16Data, vectorA800);
mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte(), bitMask128);
}
else
{
utf16Data = Sse2.Add(utf16Data, vectorA800);
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
}

if (mask != 0)
{
Expand All @@ -178,7 +221,15 @@ static Utf16Utility()
// Since 'mask' already has 00 in these positions (since the corresponding char
// wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.

uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
uint mask2;
if (AdvSimd.Arm64.IsSupported)
{
mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte(), bitMask128);
}
else
{
mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
}

// 'lowSurrogatesMask' has its bits occur in pairs:
// - 01 if the corresponding char was a low surrogate char,
Expand Down Expand Up @@ -433,5 +484,20 @@ static Utf16Utility()
scalarCountAdjustment = tempScalarCountAdjustment;
return pInputBuffer;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bitMask128)
{
Debug.Assert(AdvSimd.Arm64.IsSupported);

Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128);

// self-pairwise add until all flags have moved to the first two bytes of the vector
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
return extractedBits.AsUInt16().ToScalar();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

#if SYSTEM_PRIVATE_CORELIB
Expand Down Expand Up @@ -882,7 +883,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
// is not enabled.

Unsafe.SkipInit(out Vector128<short> nonAsciiUtf16DataMask);
if (Sse41.X64.IsSupported)
if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian))
{
nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char
}
Expand Down Expand Up @@ -940,10 +941,8 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2;
uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining);

if (Sse41.X64.IsSupported)
if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian))
{
Debug.Assert(BitConverter.IsLittleEndian, "SSE41 requires little-endian.");

// Try reading and writing 8 elements per iteration.
uint maxIters = minElementsRemaining / 8;
ulong possibleNonAsciiQWord;
Expand All @@ -952,14 +951,30 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
for (i = 0; (uint)i < maxIters; i++)
{
utf16Data = Unsafe.ReadUnaligned<Vector128<short>>(pInputBuffer);
if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask))

if (AdvSimd.IsSupported)
{
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
}
Vector128<short> isUtf16DataNonAscii = AdvSimd.CompareTest(utf16Data, nonAsciiUtf16DataMask);
bool hasNonAsciiDataInVector = AdvSimd.Arm64.MinPairwise(isUtf16DataNonAscii, isUtf16DataNonAscii).AsUInt64().ToScalar() != 0;

// narrow and write
if (hasNonAsciiDataInVector)
{
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
}

Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64());
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
AdvSimd.Store(pOutputBuffer, lower);
}
else
{
if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask))
{
goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
}

// narrow and write
Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64());
}

pInputBuffer += 8;
pOutputBuffer += 8;
Expand All @@ -978,7 +993,16 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
}

utf16Data = Vector128.CreateScalarUnsafe(possibleNonAsciiQWord).AsInt16();
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));

if (AdvSimd.IsSupported)
{
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0);
}
else
{
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
}

pInputBuffer += 4;
pOutputBuffer += 4;
Expand All @@ -990,7 +1014,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
LoopTerminatedDueToNonAsciiDataInVectorLocal:

outputBytesRemaining -= 8 * i;
possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64());

if (Sse2.X64.IsSupported)
{
possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64());
}
else
{
possibleNonAsciiQWord = utf16Data.AsUInt64().ToScalar();
}

// Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector,
// then check whether it's all-ASCII. If so, narrow and write to the destination
Expand All @@ -1000,7 +1032,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt

if (Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) // all chars in first QWORD are ASCII
{
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
if (AdvSimd.IsSupported)
{
Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data);
AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0);
}
else
{
Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
}
pInputBuffer += 4;
pOutputBuffer += 4;
outputBytesRemaining -= 4;
Expand Down
Loading