Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable R2R for NarrowUtf16ToAscii / WidentAsciiToUtf16 #90361

Merged
merged 2 commits into from
Aug 11, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 1 addition & 128 deletions src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -347,54 +347,6 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n

byte* pOriginalBuffer = pBuffer;

// Before we drain off byte-by-byte, try a generic vectorized loop.
// Only run the loop if we have at least two vectors we can pull out.
// Note use of SBYTE instead of BYTE below; we're using the two's-complement
// representation of negative integers to act as a surrogate for "is ASCII?".

if (Vector.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector<sbyte>.Count)
{
uint SizeOfVectorInBytes = (uint)Vector<sbyte>.Count; // JIT will make this a const

if (Vector.GreaterThanOrEqualAll(Unsafe.ReadUnaligned<Vector<sbyte>>(pBuffer), Vector<sbyte>.Zero))
{
// The first several elements of the input buffer were ASCII. Bump up the pointer to the
// next aligned boundary, then perform aligned reads from here on out until we find non-ASCII
// data or we approach the end of the buffer. It's possible we'll reread data; this is ok.

byte* pFinalVectorReadPos = pBuffer + bufferLength - SizeOfVectorInBytes;
pBuffer = (byte*)(((nuint)pBuffer + SizeOfVectorInBytes) & ~(nuint)(SizeOfVectorInBytes - 1));

#if DEBUG
long numBytesRead = pBuffer - pOriginalBuffer;
Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVectorInBytes, "We should've made forward progress of at least one byte.");
Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer.");
#endif

Debug.Assert(pBuffer <= pFinalVectorReadPos, "Should be able to read at least one vector.");

do
{
Debug.Assert((nuint)pBuffer % SizeOfVectorInBytes == 0, "Vector read should be aligned.");
if (Vector.LessThanAny(Unsafe.Read<Vector<sbyte>>(pBuffer), Vector<sbyte>.Zero))
{
break; // found non-ASCII data
}

pBuffer += SizeOfVectorInBytes;
} while (pBuffer <= pFinalVectorReadPos);

// Adjust the remaining buffer length for the number of elements we just consumed.

bufferLength -= (nuint)pBuffer;
bufferLength += (nuint)pOriginalBuffer;
}
}

// At this point, the buffer length wasn't enough to perform a vectorized search, or we did perform
// a vectorized search and encountered non-ASCII data. In either case go down a non-vectorized code
// path to drain any remaining ASCII bytes.
//
// We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads.
// This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII bytes.

Expand Down Expand Up @@ -1616,7 +1568,7 @@ internal static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAscii
uint utf16Data32BitsHigh = 0, utf16Data32BitsLow = 0;
ulong utf16Data64Bits = 0;

if (Vector128.IsHardwareAccelerated && BitConverter.IsLittleEndian && elementCount >= 2 * (uint)Vector128<byte>.Count)
if (BitConverter.IsLittleEndian && Vector128.IsHardwareAccelerated && elementCount >= 2 * (uint)Vector128<byte>.Count)
{
// Since there's overhead to setting up the vectorized code path, we only want to
// call into it after a quick probe to ensure the next immediate characters really are ASCII.
Expand Down Expand Up @@ -1652,56 +1604,6 @@ internal static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAscii
currentOffset = NarrowUtf16ToAscii_Intrinsified(pUtf16Buffer, pAsciiBuffer, elementCount);
}
}
else if (Vector.IsHardwareAccelerated)
{
uint SizeOfVector = (uint)sizeof(Vector<byte>); // JIT will make this a const

// Only bother vectorizing if we have enough data to do so.
if (elementCount >= 2 * SizeOfVector)
{
// Since there's overhead to setting up the vectorized code path, we only want to
// call into it after a quick probe to ensure the next immediate characters really are ASCII.
// If we see non-ASCII data, we'll jump immediately to the draining logic at the end of the method.

if (IntPtr.Size >= 8)
{
utf16Data64Bits = Unsafe.ReadUnaligned<ulong>(pUtf16Buffer);
if (!AllCharsInUInt64AreAscii(utf16Data64Bits))
{
goto FoundNonAsciiDataIn64BitRead;
}
}
else
{
utf16Data32BitsHigh = Unsafe.ReadUnaligned<uint>(pUtf16Buffer);
utf16Data32BitsLow = Unsafe.ReadUnaligned<uint>(pUtf16Buffer + 4 / sizeof(char));
if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow))
{
goto FoundNonAsciiDataIn64BitRead;
}
}

Vector<ushort> maxAscii = new Vector<ushort>(0x007F);

nuint finalOffsetWhereCanLoop = elementCount - 2 * SizeOfVector;
do
{
Vector<ushort> utf16VectorHigh = Unsafe.ReadUnaligned<Vector<ushort>>(pUtf16Buffer + currentOffset);
Vector<ushort> utf16VectorLow = Unsafe.ReadUnaligned<Vector<ushort>>(pUtf16Buffer + currentOffset + Vector<ushort>.Count);

if (Vector.GreaterThanAny(Vector.BitwiseOr(utf16VectorHigh, utf16VectorLow), maxAscii))
{
break; // found non-ASCII data
}

// TODO: Is the below logic also valid for big-endian platforms?
Vector<byte> asciiVector = Vector.Narrow(utf16VectorHigh, utf16VectorLow);
Unsafe.WriteUnaligned(pAsciiBuffer + currentOffset, asciiVector);

currentOffset += SizeOfVector;
} while (currentOffset <= finalOffsetWhereCanLoop);
}
}

Debug.Assert(currentOffset <= elementCount);
nuint remainingElementCount = elementCount - currentOffset;
Expand Down Expand Up @@ -2455,35 +2357,6 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
} while (currentOffset <= finalOffsetWhereCanRunLoop);
}
}
else if (Vector.IsHardwareAccelerated)
{
uint SizeOfVector = (uint)sizeof(Vector<byte>); // JIT will make this a const

// Only bother vectorizing if we have enough data to do so.
if (elementCount >= SizeOfVector)
{
// Note use of SBYTE instead of BYTE below; we're using the two's-complement
// representation of negative integers to act as a surrogate for "is ASCII?".

nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector;
do
{
Vector<sbyte> asciiVector = Unsafe.ReadUnaligned<Vector<sbyte>>(pAsciiBuffer + currentOffset);
if (Vector.LessThanAny(asciiVector, Vector<sbyte>.Zero))
{
break; // found non-ASCII data
}

Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector<ushort> utf16LowVector, out Vector<ushort> utf16HighVector);

// TODO: Is the below logic also valid for big-endian platforms?
Unsafe.WriteUnaligned(pUtf16Buffer + currentOffset, utf16LowVector);
Unsafe.WriteUnaligned(pUtf16Buffer + currentOffset + Vector<ushort>.Count, utf16HighVector);

currentOffset += SizeOfVector;
} while (currentOffset <= finalOffsetWhereCanLoop);
}
}

Debug.Assert(currentOffset <= elementCount);
nuint remainingElementCount = elementCount - currentOffset;
Expand Down