diff --git a/src/Markdig/Helpers/CharacterMap.cs b/src/Markdig/Helpers/CharacterMap.cs
index 932abb47c..067ac263e 100644
--- a/src/Markdig/Helpers/CharacterMap.cs
+++ b/src/Markdig/Helpers/CharacterMap.cs
@@ -7,6 +7,11 @@
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
+#if NETCOREAPP3_1_OR_GREATER
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
namespace Markdig.Helpers
{
@@ -16,6 +21,10 @@ namespace Markdig.Helpers
///
public sealed class CharacterMap where T : class
{
+#if NETCOREAPP3_1_OR_GREATER
+ private readonly Vector128 _asciiBitmap;
+#endif
+
private readonly T[] asciiMap;
private readonly Dictionary? nonAsciiMap;
private readonly BoolVector128 isOpeningCharacter;
@@ -39,6 +48,11 @@ public CharacterMap(IEnumerable> maps)
if (openingChar < 128)
{
maxChar = Math.Max(maxChar, openingChar);
+
+ if (openingChar == 0)
+ {
+ ThrowHelper.ArgumentOutOfRangeException("Null is not a valid opening character.", nameof(maps));
+ }
}
else
{
@@ -64,6 +78,23 @@ public CharacterMap(IEnumerable> maps)
nonAsciiMap[openingChar] = state.Value;
}
}
+
+#if NETCOREAPP3_1_OR_GREATER
+ if (nonAsciiMap is null)
+ {
+ long bitmap_0_3 = 0;
+ long bitmap_4_7 = 0;
+
+ foreach (char openingChar in OpeningCharacters)
+ {
+ int position = (openingChar >> 4) | ((openingChar & 0x0F) << 3);
+ if (position < 64) bitmap_0_3 |= 1L << position;
+ else bitmap_4_7 |= 1L << (position - 64);
+ }
+
+ _asciiBitmap = Vector128.Create(bitmap_0_3, bitmap_4_7).AsByte();
+ }
+#endif
}
///
@@ -105,9 +136,63 @@ public T? this[uint openingChar]
/// Index position within the string of the first opening character found in the specified text; if not found, returns -1
public int IndexOfOpeningCharacter(string text, int start, int end)
{
+ Debug.Assert(text is not null);
+ Debug.Assert(start >= 0 && end >= 0);
+ Debug.Assert(end - start + 1 >= 0);
+ Debug.Assert(end - start + 1 <= text.Length);
+
if (nonAsciiMap is null)
{
#if NETCOREAPP3_1_OR_GREATER
+ if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
+ {
+ // Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
+ // Optimized for sets in the [1, 127] range
+
+ int lengthMinusOne = end - start;
+ int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128.Count - 1);
+ int finalStart = start + charsToProcessVectorized;
+
+ if (start < finalStart)
+ {
+ ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
+ Vector128 bitmap = _asciiBitmap;
+ do
+ {
+ // Load 32 bytes (16 chars) into two Vector128s (chars)
+ // Drop the high byte of each char
+ // Pack the remaining bytes into a single Vector128
+ Vector128 input = Sse2.PackUnsignedSaturate(
+ Unsafe.ReadUnaligned>(ref Unsafe.As(ref textStartRef)),
+ Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref textStartRef, Vector128.Count))));
+
+ // Extract the higher nibble of each character ((input >> 4) & 0xF)
+ Vector128 higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));
+
+ // Lookup the matching higher nibble for each character based on the lower nibble
+ // PSHUFB will set the result to 0 for any non-ASCII (> 127) character
+ Vector128 bitsets = Ssse3.Shuffle(bitmap, input);
+
+ // Calculate a bitmask (1 << (higherNibble % 8)) for each character
+ Vector128 bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);
+
+ // Check which characters are present in the set
+ // We are relying on bitsets being zero for non-ASCII characters
+ Vector128 result = Sse2.And(bitsets, bitmask);
+
+ if (!result.Equals(Vector128.Zero))
+ {
+ int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128.Zero));
+ return start + BitOperations.TrailingZeroCount((uint)resultMask);
+ }
+
+ start += 2 * Vector128.Count;
+ textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128.Count);
+ }
+ while (start != finalStart);
+ }
+ }
+
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (; start <= end; start++)
{