diff --git a/src/libraries/Common/src/System/HexConverter.cs b/src/libraries/Common/src/System/HexConverter.cs index ccce1cb691f10..5d4071b4f525b 100644 --- a/src/libraries/Common/src/System/HexConverter.cs +++ b/src/libraries/Common/src/System/HexConverter.cs @@ -281,7 +281,7 @@ public static bool TryDecodeFromUtf16_Vector128(ReadOnlySpan chars, Span nibbles = Vector128.Min(t2 - Vector128.Create((byte)0xF0), t4); // Any high bit is a sign that input is not a valid hex data - if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2) || + if (!Utf16Utility.AllCharsInVectorAreAscii(vec1 | vec2) || Vector128.AddSaturate(nibbles, Vector128.Create((byte)(127 - 15))).ExtractMostSignificantBits() != 0) { // Input is either non-ASCII or invalid hex data diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs index 36854cd07bab7..b7de19aab570d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs @@ -78,39 +78,73 @@ internal static int CompareStringIgnoreCaseNonAscii(ref char strA, int lengthA, return OrdinalCasing.CompareStringIgnoreCase(ref strA, lengthA, ref strB, lengthB); } - private static bool EqualsIgnoreCase_Vector128(ref char charA, ref char charB, int length) + private static bool EqualsIgnoreCase_Vector(ref char charA, ref char charB, int length) + where TVector : struct, ISimdVector { - Debug.Assert(length >= Vector128.Count); - Debug.Assert(Vector128.IsHardwareAccelerated); + Debug.Assert(length >= TVector.Count); nuint lengthU = (nuint)length; - nuint lengthToExamine = lengthU - (nuint)Vector128.Count; + nuint lengthToExamine = lengthU - (nuint)TVector.Count; nuint i = 0; - Vector128 vec1; - Vector128 vec2; + TVector vec1; + TVector vec2; + TVector loweringMask = TVector.Create(0x20); + TVector vecA = TVector.Create('a'); + TVector vecZMinusA = TVector.Create('z' - 'a'); do { - vec1 = Vector128.LoadUnsafe(ref charA, i); - vec2 = Vector128.LoadUnsafe(ref charB, i); + vec1 = TVector.LoadUnsafe(ref Unsafe.As(ref charA), i); + vec2 = TVector.LoadUnsafe(ref Unsafe.As(ref charB), i); - if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2)) + if (!Utf16Utility.AllCharsInVectorAreAscii(vec1 | vec2)) { goto NON_ASCII; } - if (!Utf16Utility.Vector128OrdinalIgnoreCaseAscii(vec1, vec2)) + TVector notEquals = ~TVector.Equals(vec1, vec2); + if (!notEquals.Equals(TVector.Zero)) { - return false; - } + // not exact match - i += (nuint)Vector128.Count; + vec1 |= loweringMask; + vec2 |= loweringMask; + if (TVector.GreaterThanAny((vec1 - vecA) & notEquals, vecZMinusA) || !vec1.Equals(vec2)) + { + return false; // first input isn't in [A-Za-z], and not exact match of lowered + } + } + i += (nuint)TVector.Count; } while (i <= lengthToExamine); - // Use scalar path for trailing elements - return i == lengthU || EqualsIgnoreCase(ref Unsafe.Add(ref charA, i), ref Unsafe.Add(ref charB, i), (int)(lengthU - i)); + // Handle trailing elements + if (i != lengthU) + { + i = lengthU - (nuint)TVector.Count; + vec1 = TVector.LoadUnsafe(ref Unsafe.As(ref charA), i); + vec2 = TVector.LoadUnsafe(ref Unsafe.As(ref charB), i); + + if (!Utf16Utility.AllCharsInVectorAreAscii(vec1 | vec2)) + { + goto NON_ASCII; + } + + TVector notEquals = ~TVector.Equals(vec1, vec2); + if (!notEquals.Equals(TVector.Zero)) + { + // not exact match + + vec1 |= loweringMask; + vec2 |= loweringMask; + if (TVector.GreaterThanAny((vec1 - vecA) & notEquals, vecZMinusA) || !vec1.Equals(vec2)) + { + return false; // first input isn't in [A-Za-z], and not exact match of lowered + } + } + } + return true; NON_ASCII: - if (Utf16Utility.AllCharsInVector128AreAscii(vec1) || Utf16Utility.AllCharsInVector128AreAscii(vec2)) + if (Utf16Utility.AllCharsInVectorAreAscii(vec1) || Utf16Utility.AllCharsInVectorAreAscii(vec2)) { // No need to use the fallback if one of the inputs is full-ASCII return false; @@ -129,8 +163,15 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length { return EqualsIgnoreCase_Scalar(ref charA, ref charB, length); } - - return EqualsIgnoreCase_Vector128(ref charA, ref charB, length); + if (Vector512.IsHardwareAccelerated && length >= Vector512.Count) + { + return EqualsIgnoreCase_Vector>(ref charA, ref charB, length); + } + if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) + { + return EqualsIgnoreCase_Vector>(ref charA, ref charB, length); + } + return EqualsIgnoreCase_Vector>(ref charA, ref charB, length); } internal static bool EqualsIgnoreCase_Scalar(ref char charA, ref char charB, int length) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs index 6e956bbbcbe3b..2057d02c1570f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs @@ -278,41 +278,13 @@ internal static bool UInt64OrdinalIgnoreCaseAscii(ulong valueA, ulong valueB) } /// - /// Returns true iff the Vector128 represents 8 ASCII UTF-16 characters in machine endianness. + /// Returns true iff the TVector represents ASCII UTF-16 characters in machine endianness. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool AllCharsInVector128AreAscii(Vector128 vec) + internal static bool AllCharsInVectorAreAscii(TVector vec) + where TVector : struct, ISimdVector { - return (vec & Vector128.Create(unchecked((ushort)~0x007F))) == Vector128.Zero; - } - - /// - /// Given two Vector128 that represent 8 ASCII UTF-16 characters each, returns true iff - /// the two inputs are equal using an ordinal case-insensitive comparison. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool Vector128OrdinalIgnoreCaseAscii(Vector128 vec1, Vector128 vec2) - { - // ASSUMPTION: Caller has validated that input values are ASCII. - - // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A' - Vector128 lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'A')) + vec1.AsSByte(); - Vector128 lowIndicator2 = Vector128.Create((sbyte)(0x80 - 'A')) + vec2.AsSByte(); - - // the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'A' and <= 'Z' - Vector128 combIndicator1 = - Vector128.LessThan(Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator1); - Vector128 combIndicator2 = - Vector128.LessThan(Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator2); - - // Convert both vectors to lower case by adding 0x20 bit for all [A-Z][a-z] characters - Vector128 lcVec1 = - Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1) + vec1.AsSByte(); - Vector128 lcVec2 = - Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator2) + vec2.AsSByte(); - - // Compare two lowercased vectors - return (lcVec1 ^ lcVec2) == Vector128.Zero; + return (vec & TVector.Create(unchecked((ushort)~0x007F))).Equals(TVector.Zero); } } }