Skip to content
This repository has been archived by the owner on Aug 2, 2023. It is now read-only.

DictionarySlim #2458

Merged
merged 96 commits into from
Nov 17, 2018
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
96 commits
Select commit Hold shift + click to select a range
445c7a3
RefDictionary start
Aug 28, 2018
1de719c
style fixes
Sep 1, 2018
f0469cc
add first perf test
Sep 3, 2018
94e5f0b
small style fixes, more sizes
Sep 4, 2018
ddafa87
further review
Sep 8, 2018
fbdfec2
capacity need to be power of 2 for som reason
Sep 9, 2018
e6a649e
bit faster by reordering fields
Sep 9, 2018
da8da30
refdictionary add knucleotide perf tests
Sep 11, 2018
fd98afc
fix size power of 2 bug
Sep 11, 2018
6c43333
groupby performance test
Sep 12, 2018
a4f8901
add Values and other method
Sep 12, 2018
ce76a03
IEquatable<T> FTW
Sep 15, 2018
559c575
resize improvements and more tests
Sep 15, 2018
2ce9b3b
simplify enumerator
Sep 17, 2018
8c4da5e
small insert changes
Sep 18, 2018
00022d2
add isfasterthan test
Sep 25, 2018
bc648ae
remove old baseline version
Oct 6, 2018
e10a16d
split out Resize()
Oct 6, 2018
40e2c53
prime, capacity, inline and rename to DictionarySlim
Nov 2, 2018
239bd67
latest knucleotide, comments
Nov 4, 2018
be5c603
Primize capacity
danmoseley Nov 5, 2018
78e2082
Calculate hash only once
danmoseley Nov 5, 2018
1796e32
Update Hack to #9
danmoseley Nov 5, 2018
ce4005c
Remove impl
danmoseley Nov 5, 2018
d26e0fd
By ref in indexer
danmoseley Nov 5, 2018
ff2ff0c
merge tests
Nov 6, 2018
af911e2
remove FSharp reference
Nov 6, 2018
a61a040
de var
Nov 6, 2018
245ec57
Revert "By ref in indexer"
Nov 6, 2018
02d0e9f
remove for value types
Nov 6, 2018
ebf704a
general test data, test knucleotide result
Nov 6, 2018
74a14a6
uint with better perf
Nov 7, 2018
22ec024
fasta license
Nov 7, 2018
984226b
add simple DebugView
Nov 7, 2018
f347160
move usings, simplify Capacity, move assign in resize
Nov 7, 2018
2a69556
complete remove, add copyto
Nov 8, 2018
4aeed46
reduce bounds checking
Nov 8, 2018
f0a0b2a
split indexer for better perf
Nov 9, 2018
2fb8e87
initial arrays, enumerate tests
Nov 9, 2018
f133827
remove perf test
Nov 9, 2018
f4f2e03
add collision loop detection
Nov 10, 2018
97c94ac
Resource strings for concurrency error
danmoseley Nov 10, 2018
7a2b09b
Enable errors in HashHelpers
danmoseley Nov 10, 2018
9978719
Extract dummy arrays into class
danmoseley Nov 10, 2018
fb08a58
nested enumerator, only int initial array shared more
Nov 10, 2018
22d7388
fix initial buckets
Nov 10, 2018
0243dcf
aghh fixed correctly now
Nov 10, 2018
04e327e
Improve CopyTo tests
danmoseley Nov 10, 2018
917a53f
Avoid boxing on GetEnumerator()
danmoseley Nov 10, 2018
9ea2f40
Make namespace match MultiValueDictionary
danmoseley Nov 10, 2018
5a2dab1
Fix benchmark on Linux
danmoseley Nov 10, 2018
9f02f30
Minor spacing
danmoseley Nov 11, 2018
9c8c248
NuGet keyword, version
danmoseley Nov 11, 2018
18d8d45
Remove dead strings
danmoseley Nov 11, 2018
b891486
Implement IC and IROC on K/V enumerators
danmoseley Nov 11, 2018
e5d61f6
Ensure BDN diagnostics dll loads
danmoseley Nov 11, 2018
4db5d9b
Disable slow running benchmarks by default
danmoseley Nov 11, 2018
d445de3
Cherry pick BDN version update
danmoseley Nov 11, 2018
e69d479
Merge branch 'master' into anthonylloydmaster
danmoseley Nov 11, 2018
fee0b8f
Break out HashHelpers
danmoseley Nov 11, 2018
0fef43b
Test that Remove releases ref
danmoseley Nov 11, 2018
ec7799c
Extract local
danmoseley Nov 11, 2018
6c8e3c1
Extract local in enumerators
danmoseley Nov 11, 2018
3592cd9
Server Concurrent GC
danmoseley Nov 11, 2018
5cbc958
Modulo over buckets length
danmoseley Nov 11, 2018
65ad0d6
remove old sign bit comment
Nov 11, 2018
436532d
don't auto prop Count
Nov 11, 2018
39ce99f
Debugger display
danmoseley Nov 11, 2018
65b8075
Merge branch 'master' of https://github.com/anthonylloyd/corefxlab in…
danmoseley Nov 11, 2018
53fbe07
static GetHashCode
danmoseley Nov 11, 2018
3a92c9e
manually inline methods
Nov 11, 2018
b238d9b
Remove Current autoproperty
danmoseley Nov 12, 2018
acbe391
rename KeyEnumerable to KeyCollection, same for Value
Nov 12, 2018
299b99c
Make IsReadOnly explicitly implemented
danmoseley Nov 13, 2018
7c2eff8
Add TryGetValue
danmoseley Nov 13, 2018
12a5a6e
Change KNuc to TryGetValue
danmoseley Nov 13, 2018
ecf5f50
KNuc: Sum over values directly
danmoseley Nov 13, 2018
33e6965
Break out field initialization
danmoseley Nov 13, 2018
8826672
add throwhelper and null argument exceptions
Nov 13, 2018
629aa08
try power 2 with & (length-1)
Nov 13, 2018
db5ab0e
remove by moving last instead of freeList
Nov 13, 2018
8538069
collisionCount down
Nov 14, 2018
419b923
check capacity
Nov 14, 2018
15fd49e
check capacity
Nov 14, 2018
d78e222
Revert "collisionCount down"
AnthonyLloyd Nov 14, 2018
0bc9c4e
add string perf plus mix up others
Nov 14, 2018
0f0dec3
add ctor and remove tests
Nov 14, 2018
d479287
revert 'remove by moving last instead of freeList'
Nov 15, 2018
c9d382b
revert 'remove by moving last instead of freeList'
Nov 15, 2018
4601c08
Validate CopyTo array
danmoseley Nov 16, 2018
04ad0ab
Implicit impl for CopyTo and Reset on enumerators matching Dictionary
danmoseley Nov 16, 2018
bfff22f
Handle capacity 0 and 1
danmoseley Nov 16, 2018
a5b1faa
Corruption protection on Remove()
danmoseley Nov 16, 2018
d8aab4c
Protect against capacity overflow
danmoseley Nov 16, 2018
6c1418e
Remove GetValueOrDefault
danmoseley Nov 16, 2018
e536592
Change indexer to GetOrAddValueRef
danmoseley Nov 16, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,380 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Linq;

namespace Microsoft.Experimental.Collections
{
/// <summary>
/// DictionarySlim<TKey, TValue> is similar to Dictionary<TKey, TValue> but optimized in three ways:
/// 1) It allows access to the value by ref.
/// 2) It does not store the hash code (assumes it is cheap to equate values).
/// 3) It does not accept an equality comparer(assumes Object.GetHashCode() and Object.Equals() or overridden implementation are cheap and sufficient).
/// </summary>
[DebuggerTypeProxy(typeof(Extensions.DictionarySlimDebugView<,>))]
[DebuggerDisplay("Count = {Count}")]
public class DictionarySlim<TKey, TValue> : IReadOnlyCollection<KeyValuePair<TKey, TValue>> where TKey : IEquatable<TKey>
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
const int DefaultPrimeSize = 3;
// 1-based index into _entries; 0 means empty
private int[] _buckets;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
private Entry[] _entries;
// 0-based index into _entries of head of free chain: -1 means empty
private int _freeList = -1;

private struct Entry
{
public TKey key;
public TValue value;
// 0-based index of next entry in chain: -1 means empty
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
public int next;
}

public DictionarySlim()
{
_buckets = new int[DefaultPrimeSize];
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
_entries = new Entry[DefaultPrimeSize];
}

public DictionarySlim(int capacity)
{
capacity = HashHelpers.GetPrime(capacity);
_buckets = new int[capacity];
_entries = new Entry[capacity];
}

// Drop sign bit to ensure non negative index
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private uint GetHashCode(TKey key) => (uint)key.GetHashCode();//key.GetHashCode() & 0x7FFFFFFF;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private int GetEntryIndex(int bucketIndex) => _buckets[bucketIndex] - 1;

public int Count { get; private set; }
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved

public int Capacity => _entries.Length;

public bool ContainsKey(TKey key)
{
Entry[] entries = _entries;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
int entryIndex = GetEntryIndex((int)(GetHashCode(key) % (uint)entries.Length));

while (entryIndex != -1)
{
if (entries[entryIndex].key.Equals(key))
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
return true;
}
entryIndex = entries[entryIndex].next;
}

return false;
}

public TValue GetValueOrDefault(TKey key)
{
Entry[] entries = _entries;
int entryIndex = GetEntryIndex((int)(GetHashCode(key) % (uint)entries.Length));

while (entryIndex != -1)
{
if (entries[entryIndex].key.Equals(key))
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
return entries[entryIndex].value;
}
entryIndex = entries[entryIndex].next;
}

return default;
}

public bool Remove(TKey key)
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
Entry[] entries = _entries;
int bucketIndex = (int)(GetHashCode(key) % (uint)entries.Length);
int entryIndex = GetEntryIndex(bucketIndex);

int lastIndex = -1;
while (entryIndex != -1)
{
if (entries[entryIndex].key.Equals(key))
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
{
if (lastIndex != -1)
{ // Fixup preceding element in chain to point to next (if any)
entries[lastIndex].next = entries[entryIndex].next;
}
else
{ // Fixup bucket to new head (if any)
_buckets[bucketIndex] = entries[entryIndex].next + 1;
}

entries[entryIndex] = default; // could use RuntimeHelpers.IsReferenceOrContainsReferences

entries[entryIndex].next = _freeList; // New head of free list
_freeList = entryIndex;

Count--;
return true;
}
lastIndex = entryIndex;
entryIndex = entries[entryIndex].next;
}

return false;
}

public ref TValue this[TKey key]
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
get
{
Entry[] entries = _entries;
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
int bucketIndex = (int)(GetHashCode(key) % (uint)entries.Length);
int entryIndex = GetEntryIndex(bucketIndex);

for(int i = entryIndex; i != -1; i = entries[i].next)
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
if (entries[i].key.Equals(key))
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
return ref entries[i].value;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (_freeList != -1)
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
entryIndex = _freeList;
_freeList = entries[_freeList].next;
}
else
{
if (Count == entries.Length)
{
entries = Resize();
bucketIndex = (int)(GetHashCode(key) % (uint)entries.Length);
// entry indexes were not changed by Resize
}
entryIndex = Count;
}

entries[entryIndex].key = key;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
entries[entryIndex].next = _buckets[bucketIndex] - 1;
_buckets[bucketIndex] = entryIndex + 1;
Count++;
return ref entries[entryIndex].value;
}
}

private Entry[] Resize()
{
int count = Count;
int newSize = HashHelpers.ExpandPrime(count);
var entries = new Entry[newSize];
Array.Copy(_entries, 0, entries, 0, count);
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
danmoseley marked this conversation as resolved.
Show resolved Hide resolved

var newBuckets = new int[newSize];
while (count-- > 0)
{
int bucketIndex = (int)(GetHashCode(entries[count].key) % (uint)newBuckets.Length);
entries[count].next = newBuckets[bucketIndex] - 1;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
newBuckets[bucketIndex] = count + 1;
}

_buckets = newBuckets;
_entries = entries;

return entries;
}

public IEnumerable<TKey> Keys
{
get
{
Entry[] entries = _entries;
for (int i = 0; i < Count; i++)
{
yield return entries[i].key;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

public IEnumerable<TValue> Values
{
get
{
Entry[] entries = _entries;
for (int i = 0; i < Count; i++)
{
yield return entries[i].value;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

IEnumerator<KeyValuePair<TKey, TValue>> IEnumerable<KeyValuePair<TKey, TValue>>.GetEnumerator() => new Enumerator(this);
IEnumerator IEnumerable.GetEnumerator() => new Enumerator(this);

public struct Enumerator : IEnumerator<KeyValuePair<TKey, TValue>>
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
{
private readonly DictionarySlim<TKey, TValue> _dictionary;
private int _index;
private int _found;

private readonly HashSet<int> _freeEntries;
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved

internal Enumerator(DictionarySlim<TKey, TValue> dictionary)
{
_dictionary = dictionary;
_index = 0;
_found = 0;
Current = default;

_freeEntries = new HashSet<int>();
int free = dictionary._freeList;
while (free != -1)
{
_freeEntries.Add(free);
free = dictionary._entries[free].next;
}
}

public bool MoveNext()
{
while (_index < _dictionary._entries.Length && _found < _dictionary.Count)
{
if (_freeEntries.Contains(_index))
{
_index++;
continue;
}
Current = new KeyValuePair<TKey, TValue>(
_dictionary._entries[_index].key,
_dictionary._entries[_index++].value);
_found++;
return true;
}

Current = default;
return false;
}

public KeyValuePair<TKey, TValue> Current { get; private set; }
object IEnumerator.Current => Current;
void IEnumerator.Reset() => _index = 0;
public void Dispose() { }
}
}
}

namespace Microsoft.Experimental.Collections.Extensions
{
internal sealed class DictionarySlimDebugView<K, V> where K : IEquatable<K>
{
private readonly DictionarySlim<K, V> _dict;

public DictionarySlimDebugView(DictionarySlim<K, V> dictionary)
{
_dict = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
}

[DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
public KeyValuePair<K, V>[] Items
{
get
{
return _dict.ToArray();
}
}
}
}

namespace System.Collections
{
internal static partial class HashHelpers
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
{
public const int HashCollisionThreshold = 100;

// This is the maximum prime smaller than Array.MaxArrayLength
public const int MaxPrimeArrayLength = 0x7FEFFFFD;

public const int HashPrime = 101;

// Table of prime numbers to use as hash table sizes.
// A typical resize algorithm would pick the smallest prime number in this array
// that is larger than twice the previous capacity.
// Suppose our Hashtable currently has capacity x and enough elements are added
// such that a resize needs to occur. Resizing first computes 2x then finds the
// first prime in the table greater than 2x, i.e. if primes are ordered
// p_1, p_2, ..., p_i, ..., it finds p_n such that p_n-1 < 2x < p_n.
// Doubling is important for preserving the asymptotic complexity of the
// hashtable operations such as add. Having a prime guarantees that double
// hashing does not lead to infinite loops. IE, your hash function will be
// h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime.
// We prefer the low computation costs of higher prime numbers over the increased
// memory allocation of a fixed prime number i.e. when right sizing a HashSet.
public static readonly int[] primes = {
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 };

public static bool IsPrime(int candidate)
{
if ((candidate & 1) != 0)
{
int limit = (int)Math.Sqrt(candidate);
for (int divisor = 3; divisor <= limit; divisor += 2)
{
if ((candidate % divisor) == 0)
return false;
}
return true;
}
return (candidate == 2);
}

public static int GetPrime(int min)
{
//if (min < 0)
// throw new ArgumentException(SR.Arg_HTCapacityOverflow);

for (int i = 0; i < primes.Length; i++)
{
int prime = primes[i];
if (prime >= min)
return prime;
}


throw new Exception("need to do something here : " + min);
//outside of our predefined table.
AnthonyLloyd marked this conversation as resolved.
Show resolved Hide resolved
//compute the hard way.
//for (int i = (min | 1); i < int.MaxValue; i += 2)
//{
// if (IsPrime(i) && ((i - 1) % HashPrime != 0))
// return i;
//}
//return min;
}

// Returns size of hashtable to grow to.
public static int ExpandPrime(int oldSize)
{
int newSize = 2 * oldSize;

// Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
// Note that this check works even when _items.Length overflowed thanks to the (uint) cast
//if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
//{
// Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
// return MaxPrimeArrayLength;
//}

return GetPrime(newSize);
}
}
}
Loading