Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize CharacterMap.IndexOfOpeningCharacter #622

Merged
merged 1 commit into from
Apr 20, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions src/Markdig/Helpers/CharacterMap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
#if NETCOREAPP3_1_OR_GREATER
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace Markdig.Helpers
{
Expand All @@ -16,6 +21,10 @@ namespace Markdig.Helpers
/// <typeparam name="T"></typeparam>
public sealed class CharacterMap<T> where T : class
{
#if NETCOREAPP3_1_OR_GREATER
private readonly Vector128<byte> _asciiBitmap;
#endif

private readonly T[] asciiMap;
private readonly Dictionary<uint, T>? nonAsciiMap;
private readonly BoolVector128 isOpeningCharacter;
Expand All @@ -39,6 +48,11 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
if (openingChar < 128)
{
maxChar = Math.Max(maxChar, openingChar);

if (openingChar == 0)
{
ThrowHelper.ArgumentOutOfRangeException("Null is not a valid opening character.", nameof(maps));
}
}
else
{
Expand All @@ -64,6 +78,23 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
nonAsciiMap[openingChar] = state.Value;
}
}

#if NETCOREAPP3_1_OR_GREATER
if (nonAsciiMap is null)
{
long bitmap_0_3 = 0;
long bitmap_4_7 = 0;

foreach (char openingChar in OpeningCharacters)
{
int position = (openingChar >> 4) | ((openingChar & 0x0F) << 3);
if (position < 64) bitmap_0_3 |= 1L << position;
else bitmap_4_7 |= 1L << (position - 64);
}

_asciiBitmap = Vector128.Create(bitmap_0_3, bitmap_4_7).AsByte();
}
#endif
}

/// <summary>
Expand Down Expand Up @@ -105,9 +136,63 @@ public T? this[uint openingChar]
/// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
public int IndexOfOpeningCharacter(string text, int start, int end)
{
Debug.Assert(text is not null);
Debug.Assert(start >= 0 && end >= 0);
Debug.Assert(end - start + 1 >= 0);
Debug.Assert(end - start + 1 <= text.Length);

if (nonAsciiMap is null)
{
#if NETCOREAPP3_1_OR_GREATER
if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
{
// Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
// Optimized for sets in the [1, 127] range

int lengthMinusOne = end - start;
int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128<short>.Count - 1);
int finalStart = start + charsToProcessVectorized;

if (start < finalStart)
{
ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
Vector128<byte> bitmap = _asciiBitmap;
do
{
// Load 32 bytes (16 chars) into two Vector128<short>s (chars)
// Drop the high byte of each char
// Pack the remaining bytes into a single Vector128<byte>
Vector128<byte> input = Sse2.PackUnsignedSaturate(
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref textStartRef)),
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref textStartRef, Vector128<short>.Count))));

// Extract the higher nibble of each character ((input >> 4) & 0xF)
Vector128<byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));

// Lookup the matching higher nibble for each character based on the lower nibble
// PSHUFB will set the result to 0 for any non-ASCII (> 127) character
Vector128<byte> bitsets = Ssse3.Shuffle(bitmap, input);

// Calculate a bitmask (1 << (higherNibble % 8)) for each character
Vector128<byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);

// Check which characters are present in the set
// We are relying on bitsets being zero for non-ASCII characters
Vector128<byte> result = Sse2.And(bitsets, bitmask);

if (!result.Equals(Vector128<byte>.Zero))
{
int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128<byte>.Zero));
return start + BitOperations.TrailingZeroCount((uint)resultMask);
}

start += 2 * Vector128<short>.Count;
textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128<short>.Count);
}
while (start != finalStart);
}
}

ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (; start <= end; start++)
{
Expand Down