Skip to content

Commit

Permalink
CastTable perf tweaks. (#34427)
Browse files Browse the repository at this point in the history
* No null check in Get.

* bypass AuxData
  • Loading branch information
VSadov authored Apr 20, 2020
1 parent 86940e1 commit aa5b204
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 128 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ private struct CastCacheEntry
};

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int KeyToBucket(int[] table, nuint source, nuint target)
private static int KeyToBucket(ref int tableData, nuint source, nuint target)
{
// upper bits of addresses do not vary much, so to reduce loss due to cancelling out,
// we do `rotl(source, <half-size>) ^ target` for mixing inputs.
// then we use fibonacci hashing to reduce the value to desired size.

int hashShift = HashShift(table);
int hashShift = HashShift(ref tableData);
#if TARGET_64BIT
ulong hash = (((ulong)source << 32) | ((ulong)source >> 32)) ^ (ulong)target;
return (int)((hash * 11400714819323198485ul) >> hashShift);
Expand All @@ -63,31 +63,31 @@ private static int KeyToBucket(int[] table, nuint source, nuint target)
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ref int AuxData(int[] table)
private static ref int TableData(int[] table)
{
// element 0 is used for embedded aux data
return ref MemoryMarshal.GetArrayDataReference(table);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ref CastCacheEntry Element(int[] table, int index)
private static ref CastCacheEntry Element(ref int tableData, int index)
{
// element 0 is used for embedded aux data, skip it
return ref Unsafe.Add(ref Unsafe.As<int, CastCacheEntry>(ref AuxData(table)), index + 1);
return ref Unsafe.Add(ref Unsafe.As<int, CastCacheEntry>(ref tableData), index + 1);
}

// TableMask is "size - 1"
// we need that more often that we need size
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int TableMask(int[] table)
private static int HashShift(ref int tableData)
{
return AuxData(table);
return tableData;
}

// TableMask is "size - 1"
// we need that more often that we need size
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int HashShift(int[] table)
private static int TableMask(ref int tableData)
{
return Unsafe.Add(ref AuxData(table), 1);
return Unsafe.Add(ref tableData, 1);
}

private enum CastResult
Expand All @@ -104,57 +104,53 @@ private enum CastResult
private static CastResult TryGet(nuint source, nuint target)
{
const int BUCKET_SIZE = 8;
int[]? table = s_table;

// we use NULL as a sentinel for a rare case when a table could not be allocated
// because we avoid OOMs.
// we could use 0-element table instead, but then we would have to check the size here.
if (table != null)
// table is initialized and updated by native code that guarantees it is not null.
ref int tableData = ref TableData(s_table!);

int index = KeyToBucket(ref tableData, source, target);
for (int i = 0; i < BUCKET_SIZE;)
{
int index = KeyToBucket(table, source, target);
for (int i = 0; i < BUCKET_SIZE;)
{
ref CastCacheEntry pEntry = ref Element(table, index);
ref CastCacheEntry pEntry = ref Element(ref tableData, index);

// must read in this order: version -> entry parts -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
int version = Volatile.Read(ref pEntry._version);
nuint entrySource = pEntry._source;
// must read in this order: version -> entry parts -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
int version = Volatile.Read(ref pEntry._version);
nuint entrySource = pEntry._source;

// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
version &= ~1;
// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
version &= ~1;

if (entrySource == source)
if (entrySource == source)
{
nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
// target never has its lower bit set.
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
{
nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
// target never has its lower bit set.
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
if (version != pEntry._version)
{
if (version != pEntry._version)
{
// oh, so close, the entry is in inconsistent state.
// it is either changing or has changed while we were reading.
// treat it as a miss.
break;
}

return (CastResult)entryTargetAndResult;
// oh, so close, the entry is in inconsistent state.
// it is either changing or has changed while we were reading.
// treat it as a miss.
break;
}
}

if (version == 0)
{
// the rest of the bucket is unclaimed, no point to search further
break;
return (CastResult)entryTargetAndResult;
}
}

// quadratic reprobe
i++;
index = (index + i) & TableMask(table);
if (version == 0)
{
// the rest of the bucket is unclaimed, no point to search further
break;
}

// quadratic reprobe
i++;
index = (index + i) & TableMask(ref tableData);
}
return CastResult.MaybeCast;
}
Expand Down
131 changes: 71 additions & 60 deletions src/coreclr/src/vm/castcache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)

BASEARRAYREF* CastCache::s_pTableRef = NULL;
OBJECTHANDLE CastCache::s_sentinelTable = NULL;
DWORD CastCache::s_lastFlushSize = INITIAL_CACHE_SIZE;

BASEARRAYREF CastCache::CreateCastCache(DWORD size)
Expand All @@ -24,7 +25,7 @@ BASEARRAYREF CastCache::CreateCastCache(DWORD size)
CONTRACTL_END;

// size must be positive
_ASSERTE(size > 0);
_ASSERTE(size > 1);
// size must be a power of two
_ASSERTE((size & (size - 1)) == 0);

Expand Down Expand Up @@ -62,16 +63,17 @@ BASEARRAYREF CastCache::CreateCastCache(DWORD size)
}
}

TableMask(table) = size - 1;
DWORD* tableData = TableData(table);
TableMask(tableData) = size - 1;

// Fibonacci hash reduces the value into desired range by shifting right by the number of leading zeroes in 'size-1'
DWORD bitCnt;
#if HOST_64BIT
BitScanReverse64(&bitCnt, size - 1);
HashShift(table) = (BYTE)(63 - bitCnt);
HashShift(tableData) = (BYTE)(63 - bitCnt);
#else
BitScanReverse(&bitCnt, size - 1);
HashShift(table) = (BYTE)(31 - bitCnt);
HashShift(tableData) = (BYTE)(31 - bitCnt);
#endif

return table;
Expand Down Expand Up @@ -107,10 +109,10 @@ void CastCache::FlushCurrentCache()
}
CONTRACTL_END;

BASEARRAYREF currentTableRef = *s_pTableRef;
s_lastFlushSize = !currentTableRef ? INITIAL_CACHE_SIZE : CacheElementCount(currentTableRef);
DWORD* tableData = TableData(*s_pTableRef);
s_lastFlushSize = max(INITIAL_CACHE_SIZE, CacheElementCount(tableData));

*s_pTableRef = NULL;
SetObjectReference((OBJECTREF *)s_pTableRef, ObjectFromHandle(s_sentinelTable));
}

void CastCache::Initialize()
Expand All @@ -127,6 +129,18 @@ void CastCache::Initialize()

GCX_COOP();
s_pTableRef = (BASEARRAYREF*)pTableField->GetCurrentStaticAddress();

BASEARRAYREF sentinelTable = CreateCastCache(2);
if (!sentinelTable)
{
// no memory for 2 element cache while initializing?
ThrowOutOfMemory();
}

s_sentinelTable = CreateGlobalHandle(sentinelTable);

// initialize to the sentinel value, this should not be null.
SetObjectReference((OBJECTREF *)s_pTableRef, sentinelTable);
}

TypeHandle::CastResult CastCache::TryGet(TADDR source, TADDR target)
Expand All @@ -139,58 +153,53 @@ TypeHandle::CastResult CastCache::TryGet(TADDR source, TADDR target)
}
CONTRACTL_END;

BASEARRAYREF table = *s_pTableRef;
DWORD* tableData = TableData(*s_pTableRef);

// we use NULL as a sentinel for a rare case when a table could not be allocated
// because we avoid OOMs.
// we could use 0-element table instead, but then we would have to check the size here.
if (table != NULL)
DWORD index = KeyToBucket(tableData, source, target);
for (DWORD i = 0; i < BUCKET_SIZE;)
{
DWORD index = KeyToBucket(table, source, target);
for (DWORD i = 0; i < BUCKET_SIZE;)
{
CastCacheEntry* pEntry = &Elements(table)[index];
CastCacheEntry* pEntry = &Elements(tableData)[index];

// must read in this order: version -> entry parts -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
DWORD version1 = VolatileLoad(&pEntry->version);
TADDR entrySource = pEntry->source;
// must read in this order: version -> entry parts -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
DWORD version1 = VolatileLoad(&pEntry->version);
TADDR entrySource = pEntry->source;

// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
version1 &= ~1;
// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
version1 &= ~1;

if (entrySource == source)
if (entrySource == source)
{
TADDR entryTargetAndResult = VolatileLoad(&pEntry->targetAndResult);
// target never has its lower bit set.
// a matching entryTargetAndResult would have the same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
{
TADDR entryTargetAndResult = VolatileLoad(&pEntry->targetAndResult);
// target never has its lower bit set.
// a matching entryTargetAndResult would have the same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
if (version1 != pEntry->version)
{
if (version1 != pEntry->version)
{
// oh, so close, the entry is in inconsistent state.
// it is either changing or has changed while we were reading.
// treat it as a miss.
break;
}

return TypeHandle::CastResult(entryTargetAndResult);
// oh, so close, the entry is in inconsistent state.
// it is either changing or has changed while we were reading.
// treat it as a miss.
break;
}
}

if (version1 == 0)
{
// the rest of the bucket is unclaimed, no point to search further
break;
return TypeHandle::CastResult(entryTargetAndResult);
}
}

// quadratic reprobe
i++;
index = (index + i) & TableMask(table);
if (version1 == 0)
{
// the rest of the bucket is unclaimed, no point to search further
break;
}

// quadratic reprobe
i++;
index = (index + i) & TableMask(tableData);
}

return TypeHandle::MaybeCast;
}

Expand All @@ -205,21 +214,23 @@ void CastCache::TrySet(TADDR source, TADDR target, BOOL result)
CONTRACTL_END;

DWORD bucket;
BASEARRAYREF table;
DWORD* tableData;

do
{
table = *s_pTableRef;
if (!table)
tableData = TableData(*s_pTableRef);
if (TableMask(tableData) == 1)
{
// we did not allocate a table or flushed it, try replacing, but do not continue looping.
// 2-element table is used as a sentinel.
// we did not allocate a real table yet or have flushed it.
// try replacing the table, but do not insert anything.
MaybeReplaceCacheWithLarger(s_lastFlushSize);
return;
}

bucket = KeyToBucket(table, source, target);
bucket = KeyToBucket(tableData, source, target);
DWORD index = bucket;
CastCacheEntry* pEntry = &Elements(table)[index];
CastCacheEntry* pEntry = &Elements(tableData)[index];

for (DWORD i = 0; i < BUCKET_SIZE;)
{
Expand Down Expand Up @@ -259,28 +270,28 @@ void CastCache::TrySet(TADDR source, TADDR target, BOOL result)
// quadratic reprobe
i++;
index += i;
pEntry = &Elements(table)[index & TableMask(table)];
pEntry = &Elements(tableData)[index & TableMask(tableData)];
}

// bucket is full.
} while (TryGrow(table));
} while (TryGrow(tableData));

// reread table after TryGrow.
table = *s_pTableRef;
if (!table)
// reread tableData after TryGrow.
tableData = TableData(*s_pTableRef);
if (TableMask(tableData) == 1)
{
// we did not allocate a table.
// do not insert into a sentinel.
return;
}

// pick a victim somewhat randomly within a bucket
// NB: ++ is not interlocked. We are ok if we lose counts here. It is just a number that changes.
DWORD victimDistance = VictimCounter(table)++ & (BUCKET_SIZE - 1);
DWORD victimDistance = VictimCounter(tableData)++ & (BUCKET_SIZE - 1);
// position the victim in a quadratic reprobe bucket
DWORD victim = (victimDistance * victimDistance + victimDistance) / 2;

{
CastCacheEntry* pEntry = &Elements(table)[(bucket + victim) & TableMask(table)];
CastCacheEntry* pEntry = &Elements(tableData)[(bucket + victim) & TableMask(tableData)];

DWORD version = pEntry->version;
if ((version & VERSION_NUM_MASK) >= (VERSION_NUM_MASK - 2))
Expand Down
Loading

0 comments on commit aa5b204

Please sign in to comment.