Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve dictionary lookup perf for OrdinalIgnoreCase #36252

Merged
merged 15 commits into from
Aug 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

using Common.System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Runtime.Serialization;
using Xunit;

namespace System.Collections.Tests
Expand Down Expand Up @@ -610,5 +612,37 @@ public void TrimExcess_Generic_DoesInvalidateEnumeration()
}

#endregion

#region Non-randomized comparers
[Fact]
public void Dictionary_Comparer_NonRandomizedStringComparers()
{
RunTest(null);
Copy link
Member

@danmoseley danmoseley May 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be better to use MemberData in order to get separate pass/fail results for each of these comparers?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that initially, but it was a bit cryptic to match the failure back to the argument instance. Maybe there's some secret sauce to getting a friendlier error message in that case?

Right now you get a line number as part of the exception if there's a failure. And while it's not ideal, at least it's a straightforward mapping.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. Possibly it would require some trick such as passing a 2nd parameter that was the comparer name.

RunTest(EqualityComparer<string>.Default);
RunTest(StringComparer.Ordinal);
RunTest(StringComparer.OrdinalIgnoreCase);
RunTest(StringComparer.InvariantCulture);
RunTest(StringComparer.InvariantCultureIgnoreCase);
RunTest(StringComparer.Create(CultureInfo.InvariantCulture, ignoreCase: false));
RunTest(StringComparer.Create(CultureInfo.InvariantCulture, ignoreCase: true));

void RunTest(IEqualityComparer<string> comparer)
{
// First, instantiate the dictionary and check its Comparer property

Dictionary<string, object> dict = new Dictionary<string, object>(comparer);
object expected = comparer ?? EqualityComparer<string>.Default;

Assert.Same(expected, dict.Comparer);

// Then pretend to serialize the dictionary and check the stored Comparer instance

SerializationInfo si = new SerializationInfo(typeof(Dictionary<string, object>), new FormatterConverter());
dict.GetObjectData(si, new StreamingContext(StreamingContextStates.All));

Assert.Same(expected, si.GetValue("Comparer", typeof(IEqualityComparer<string>)));
}
}
#endregion
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Numerics;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Runtime.Serialization;
using Xunit;

namespace System.Collections.Tests
Expand Down Expand Up @@ -37,5 +41,238 @@ public static void OutOfBoundsRegression()
dictionary.Remove(key);
}
}

[Fact]
public static void ComparerImplementations_Dictionary_WithWellKnownStringComparers()
{
Type nonRandomizedOrdinalComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.NonRandomizedStringEqualityComparer+OrdinalComparer", throwOnError: true);
Type nonRandomizedOrdinalIgnoreCaseComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.NonRandomizedStringEqualityComparer+OrdinalIgnoreCaseComparer", throwOnError: true);
Type randomizedOrdinalComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.RandomizedStringEqualityComparer+OrdinalComparer", throwOnError: true);
Type randomizedOrdinalIgnoreCaseComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.RandomizedStringEqualityComparer+OrdinalIgnoreCaseComparer", throwOnError: true);

// null comparer

RunDictionaryTest(
equalityComparer: null,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: EqualityComparer<string>.Default.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// EqualityComparer<string>.Default comparer

RunDictionaryTest(
equalityComparer: EqualityComparer<string>.Default,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: EqualityComparer<string>.Default.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// Ordinal comparer

RunDictionaryTest(
equalityComparer: StringComparer.Ordinal,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// OrdinalIgnoreCase comparer

RunDictionaryTest(
equalityComparer: StringComparer.OrdinalIgnoreCase,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType,
expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType);

// linguistic comparer (not optimized)

RunDictionaryTest(
equalityComparer: StringComparer.InvariantCulture,
expectedInternalComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(),
expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(),
expectedComparerAfterCollisionThreshold: StringComparer.InvariantCulture.GetType());

static void RunDictionaryTest(
IEqualityComparer<string> equalityComparer,
Type expectedInternalComparerBeforeCollisionThreshold,
Type expectedPublicComparerBeforeCollisionThreshold,
Type expectedComparerAfterCollisionThreshold)
{
RunCollectionTestCommon(
() => new Dictionary<string, object>(equalityComparer),
(dictionary, key) => dictionary.Add(key, null),
(dictionary, key) => dictionary.ContainsKey(key),
dictionary => dictionary.Comparer,
expectedInternalComparerBeforeCollisionThreshold,
expectedPublicComparerBeforeCollisionThreshold,
expectedComparerAfterCollisionThreshold);
}
}

[Fact]
public static void ComparerImplementations_HashSet_WithWellKnownStringComparers()
{
Type nonRandomizedOrdinalComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.NonRandomizedStringEqualityComparer+OrdinalComparer", throwOnError: true);
Type nonRandomizedOrdinalIgnoreCaseComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.NonRandomizedStringEqualityComparer+OrdinalIgnoreCaseComparer", throwOnError: true);
Type randomizedOrdinalComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.RandomizedStringEqualityComparer+OrdinalComparer", throwOnError: true);
Type randomizedOrdinalIgnoreCaseComparerType = typeof(object).Assembly.GetType("System.Collections.Generic.RandomizedStringEqualityComparer+OrdinalIgnoreCaseComparer", throwOnError: true);

// null comparer

RunHashSetTest(
equalityComparer: null,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: EqualityComparer<string>.Default.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// EqualityComparer<string>.Default comparer

RunHashSetTest(
equalityComparer: EqualityComparer<string>.Default,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: EqualityComparer<string>.Default.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// Ordinal comparer

RunHashSetTest(
equalityComparer: StringComparer.Ordinal,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType,
expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType);

// OrdinalIgnoreCase comparer

RunHashSetTest(
equalityComparer: StringComparer.OrdinalIgnoreCase,
expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType,
expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase.GetType(),
expectedComparerAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType);

// linguistic comparer (not optimized)

RunHashSetTest(
equalityComparer: StringComparer.InvariantCulture,
expectedInternalComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(),
expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(),
expectedComparerAfterCollisionThreshold: StringComparer.InvariantCulture.GetType());

static void RunHashSetTest(
IEqualityComparer<string> equalityComparer,
Type expectedInternalComparerBeforeCollisionThreshold,
Type expectedPublicComparerBeforeCollisionThreshold,
Type expectedComparerAfterCollisionThreshold)
{
RunCollectionTestCommon(
() => new HashSet<string>(equalityComparer),
(set, key) => Assert.True(set.Add(key)),
(set, key) => set.Contains(key),
set => set.Comparer,
expectedInternalComparerBeforeCollisionThreshold,
expectedPublicComparerBeforeCollisionThreshold,
expectedComparerAfterCollisionThreshold);
}
}

private static void RunCollectionTestCommon<TCollection>(
Func<TCollection> collectionFactory,
Action<TCollection, string> addKeyCallback,
Func<TCollection, string, bool> containsKeyCallback,
Func<TCollection, IEqualityComparer<string>> getComparerCallback,
Type expectedInternalComparerBeforeCollisionThreshold,
Type expectedPublicComparerBeforeCollisionThreshold,
Type expectedComparerAfterCollisionThreshold)
{
TCollection collection = collectionFactory();
List<string> allKeys = new List<string>();

const int StartOfRange = 0xE020; // use the Unicode Private Use range to avoid accidentally creating strings that really do compare as equal OrdinalIgnoreCase
const int Stride = 0x40; // to ensure we don't accidentally reset the 0x20 bit of the seed, which is used to negate OrdinalIgnoreCase effects

// First, go right up to the collision threshold, but don't exceed it.

for (int i = 0; i < 100; i++)
{
string newKey = GenerateCollidingString(i * Stride + StartOfRange);
Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal
Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase

addKeyCallback(collection, newKey);
allKeys.Add(newKey);
}

FieldInfo internalComparerField = collection.GetType().GetField("_comparer", BindingFlags.NonPublic | BindingFlags.Instance);
Assert.NotNull(internalComparerField);

Assert.Equal(expectedInternalComparerBeforeCollisionThreshold, internalComparerField.GetValue(collection)?.GetType());
Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection).GetType());

// Now exceed the collision threshold, which should rebucket entries.
// Continue adding a few more entries to ensure we didn't corrupt internal state.

for (int i = 100; i < 110; i++)
{
string newKey = GenerateCollidingString(i * Stride + StartOfRange);
Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal
Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase

addKeyCallback(collection, newKey);
allKeys.Add(newKey);
}

Assert.Equal(expectedComparerAfterCollisionThreshold, internalComparerField.GetValue(collection)?.GetType());
Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection).GetType()); // shouldn't change this return value after collision threshold met

// And validate that all strings are present in the dictionary.

foreach (string key in allKeys)
{
Assert.True(containsKeyCallback(collection, key));
}

// Also make sure we didn't accidentally put the internal comparer in the serialized object data.

collection = collectionFactory();
SerializationInfo si = new SerializationInfo(collection.GetType(), new FormatterConverter());
((ISerializable)collection).GetObjectData(si, new StreamingContext());

object serializedComparer = si.GetValue("Comparer", typeof(IEqualityComparer<string>));
Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, serializedComparer.GetType());
}

private static Lazy<Func<string, int>> _lazyGetNonRandomizedHashCodeDel = new Lazy<Func<string, int>>(
() => GetStringHashCodeOpenDelegate("GetNonRandomizedHashCode"));

private static Lazy<Func<string, int>> _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel = new Lazy<Func<string, int>>(
() => GetStringHashCodeOpenDelegate("GetNonRandomizedHashCodeOrdinalIgnoreCase"));

// Generates a string with a well-known non-randomized hash code:
// - string.GetNonRandomizedHashCode returns 0.
// - string.GetNonRandomizedHashCodeOrdinalIgnoreCase returns 0x24716ca0.
// Provide a different seed to produce a different string.
private static string GenerateCollidingString(int seed)
{
return string.Create(8, seed, (span, seed) =>
{
Span<byte> asBytes = MemoryMarshal.AsBytes(span);

uint hash1 = (5381 << 16) + 5381;
uint hash2 = BitOperations.RotateLeft(hash1, 5) + hash1;

MemoryMarshal.Write(asBytes, ref seed);
MemoryMarshal.Write(asBytes.Slice(4), ref hash2); // set hash2 := 0 (for Ordinal)

hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (uint)seed;
hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1);

MemoryMarshal.Write(asBytes.Slice(8), ref hash1); // set hash1 := 0 (for Ordinal)
});
}

private static Func<string, int> GetStringHashCodeOpenDelegate(string methodName)
{
MethodInfo method = typeof(string).GetMethod(methodName, BindingFlags.Instance | BindingFlags.NonPublic);
Assert.NotNull(method);

return method.CreateDelegate<Func<string, int>>(target: null); // create open delegate unbound to 'this'
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IEnumerable.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IEnumerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IEqualityComparer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IInternalStringEqualityComparer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IList.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\InsertionBehavior.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\IReadOnlyCollection.cs" />
Expand All @@ -180,6 +181,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\KeyNotFoundException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\KeyValuePair.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\List.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\RandomizedStringEqualityComparer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\NonRandomizedStringEqualityComparer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\Generic\ValueListBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Collections\HashHelpers.cs" />
Expand Down
Loading