diff --git a/src/mono/mono/metadata/CMakeLists.txt b/src/mono/mono/metadata/CMakeLists.txt index 773381d6c1c9a..9efd5ac0079c2 100644 --- a/src/mono/mono/metadata/CMakeLists.txt +++ b/src/mono/mono/metadata/CMakeLists.txt @@ -42,6 +42,11 @@ else() set(metadata_platform_sources ${metadata_unix_sources}) endif() +set(imported_native_sources + ../../../native/containers/dn-simdhash.c + ../../../native/containers/dn-simdhash-string-ptr.c + ../../../native/containers/dn-simdhash-u32-ptr.c) + set(metadata_common_sources appdomain.c domain.c @@ -195,7 +200,7 @@ elseif(MONO_GC STREQUAL "boehm") set(metadata_compile_definitions "HAVE_BOEHM_GC") endif() -set(metadata_sources "${metadata_platform_sources};${metadata_common_sources};${metadata_gc_dependent_sources};${metadata_gc_sources};${ilgen_sources}") +set(metadata_sources "${metadata_platform_sources};${metadata_common_sources};${metadata_gc_dependent_sources};${metadata_gc_sources};${ilgen_sources};${imported_native_sources}") if(HOST_WIN32 AND NOT DISABLE_SHARED_LIBS) add_library(metadata_objects_shared OBJECT ${metadata_sources}) diff --git a/src/mono/mono/metadata/class.c b/src/mono/mono/metadata/class.c index 05da1332e3012..5ae4f1981d38a 100644 --- a/src/mono/mono/metadata/class.c +++ b/src/mono/mono/metadata/class.c @@ -3037,18 +3037,22 @@ mono_image_init_name_cache (MonoImage *image) const char *name; const char *nspace; guint32 visib, nspace_index; - GHashTable *name_cache2, *nspace_table, *the_name_cache; + dn_simdhash_u32_ptr_t *name_cache2; + dn_simdhash_string_ptr_t *nspace_table, *the_name_cache; if (image->name_cache) return; - the_name_cache = g_hash_table_new (g_str_hash, g_str_equal); + // TODO: Figure out a good initial capacity for this table by doing a scan, + // or just pre-reserve a reasonable amount of space based on how many nspaces + // an image typically has + the_name_cache = dn_simdhash_string_ptr_new (0, NULL); if (image_is_dynamic (image)) { mono_image_lock (image); if (image->name_cache) { /* Somebody initialized it before us */ - g_hash_table_destroy (the_name_cache); + dn_simdhash_free (the_name_cache); } else { mono_atomic_store_release (&image->name_cache, the_name_cache); } @@ -3057,7 +3061,7 @@ mono_image_init_name_cache (MonoImage *image) } /* Temporary hash table to avoid lookups in the nspace_table */ - name_cache2 = g_hash_table_new (NULL, NULL); + name_cache2 = dn_simdhash_u32_ptr_new (0, NULL); /* FIXME: metadata-update */ int rows = table_info_get_rows (t); @@ -3074,14 +3078,13 @@ mono_image_init_name_cache (MonoImage *image) nspace = mono_metadata_string_heap (image, cols [MONO_TYPEDEF_NAMESPACE]); nspace_index = cols [MONO_TYPEDEF_NAMESPACE]; - nspace_table = (GHashTable *)g_hash_table_lookup (name_cache2, GUINT_TO_POINTER (nspace_index)); - if (!nspace_table) { - nspace_table = g_hash_table_new (g_str_hash, g_str_equal); - g_hash_table_insert (the_name_cache, (char*)nspace, nspace_table); - g_hash_table_insert (name_cache2, GUINT_TO_POINTER (nspace_index), - nspace_table); + if (!dn_simdhash_u32_ptr_try_get_value (name_cache2, nspace_index, (void **)&nspace_table)) { + // FIXME: Compute an appropriate capacity for this table to avoid growing it + nspace_table = dn_simdhash_string_ptr_new (0, NULL); + dn_simdhash_string_ptr_try_add (the_name_cache, nspace, nspace_table); + dn_simdhash_u32_ptr_try_add (name_cache2, nspace_index, nspace_table); } - g_hash_table_insert (nspace_table, (char *) name, GUINT_TO_POINTER (i)); + dn_simdhash_string_ptr_try_add (nspace_table, name, GUINT_TO_POINTER (i)); } /* Load type names from EXPORTEDTYPES table */ @@ -3102,23 +3105,22 @@ mono_image_init_name_cache (MonoImage *image) nspace = mono_metadata_string_heap (image, exptype_cols [MONO_EXP_TYPE_NAMESPACE]); nspace_index = exptype_cols [MONO_EXP_TYPE_NAMESPACE]; - nspace_table = (GHashTable *)g_hash_table_lookup (name_cache2, GUINT_TO_POINTER (nspace_index)); - if (!nspace_table) { - nspace_table = g_hash_table_new (g_str_hash, g_str_equal); - g_hash_table_insert (the_name_cache, (char*)nspace, nspace_table); - g_hash_table_insert (name_cache2, GUINT_TO_POINTER (nspace_index), - nspace_table); + if (!dn_simdhash_u32_ptr_try_get_value (name_cache2, nspace_index, (void **)&nspace_table)) { + // FIXME: Compute an appropriate capacity for this table to avoid growing it + nspace_table = dn_simdhash_string_ptr_new (0, NULL); + dn_simdhash_string_ptr_try_add (the_name_cache, nspace, nspace_table); + dn_simdhash_u32_ptr_try_add (name_cache2, nspace_index, nspace_table); } - g_hash_table_insert (nspace_table, (char *) name, GUINT_TO_POINTER (mono_metadata_make_token (MONO_TABLE_EXPORTEDTYPE, i + 1))); + dn_simdhash_string_ptr_try_add (nspace_table, name, GUINT_TO_POINTER (mono_metadata_make_token (MONO_TABLE_EXPORTEDTYPE, i + 1))); } } - g_hash_table_destroy (name_cache2); + dn_simdhash_free (name_cache2); mono_image_lock (image); if (image->name_cache) { /* Somebody initialized it before us */ - g_hash_table_destroy (the_name_cache); + dn_simdhash_free (the_name_cache); } else { mono_atomic_store_release (&image->name_cache, the_name_cache); } @@ -3133,23 +3135,19 @@ void mono_image_add_to_name_cache (MonoImage *image, const char *nspace, const char *name, guint32 index) { - GHashTable *nspace_table; - GHashTable *name_cache; - guint32 old_index; + dn_simdhash_string_ptr_t *nspace_table, *name_cache; mono_image_init_name_cache (image); mono_image_lock (image); name_cache = image->name_cache; - if (!(nspace_table = (GHashTable *)g_hash_table_lookup (name_cache, nspace))) { - nspace_table = g_hash_table_new (g_str_hash, g_str_equal); - g_hash_table_insert (name_cache, (char *)nspace, (char *)nspace_table); + if (!dn_simdhash_string_ptr_try_get_value (name_cache, nspace, (void **)&nspace_table)) { + nspace_table = dn_simdhash_string_ptr_new (0, NULL); + dn_simdhash_string_ptr_try_add (name_cache, nspace, nspace_table); } - if ((old_index = GPOINTER_TO_UINT (g_hash_table_lookup (nspace_table, (char*) name)))) - g_error ("overrwritting old token %x on image %s for type %s::%s", old_index, image->name, nspace, name); - - g_hash_table_insert (nspace_table, (char *) name, GUINT_TO_POINTER (index)); + if (!dn_simdhash_string_ptr_try_add (nspace_table, name, GUINT_TO_POINTER (index))) + g_error ("overrwritting old token ? on image %s for type %s::%s", image->name, nspace, name); mono_image_unlock (image); } @@ -3160,9 +3158,8 @@ typedef struct { } FindAllUserData; static void -find_all_nocase (gpointer key, gpointer value, gpointer user_data) +find_all_nocase (const char *name, gpointer value, gpointer user_data) { - char *name = (char*)key; FindAllUserData *data = (FindAllUserData*)user_data; if (mono_utf8_strcasecmp (name, (char*)data->key) == 0) data->values = g_slist_prepend (data->values, value); @@ -3174,9 +3171,8 @@ typedef struct { } FindUserData; static void -find_nocase (gpointer key, gpointer value, gpointer user_data) +find_nocase (const char *name, gpointer value, gpointer user_data) { - char *name = (char*)key; FindUserData *data = (FindUserData*)user_data; if (!data->value && (mono_utf8_strcasecmp (name, (char*)data->key) == 0)) @@ -3303,7 +3299,7 @@ search_modules (MonoImage *image, const char *name_space, const char *name, gboo static MonoClass * mono_class_from_name_checked_aux (MonoImage *image, const char* name_space, const char *name, GHashTable* visited_images, gboolean case_sensitive, MonoError *error) { - GHashTable *nspace_table = NULL; + dn_simdhash_string_ptr_t *nspace_table = NULL; MonoImage *loaded_image = NULL; guint32 token = 0; MonoClass *klass; @@ -3350,10 +3346,11 @@ mono_class_from_name_checked_aux (MonoImage *image, const char* name_space, cons mono_image_lock (image); if (case_sensitive) { - nspace_table = (GHashTable *)g_hash_table_lookup (image->name_cache, name_space); - - if (nspace_table) - token = GPOINTER_TO_UINT (g_hash_table_lookup (nspace_table, name)); + if (dn_simdhash_string_ptr_try_get_value (image->name_cache, name_space, (void **)&nspace_table)) { + void * temp; + if (dn_simdhash_string_ptr_try_get_value (nspace_table, name, &temp)) + token = GPOINTER_TO_UINT(temp); + } } else { FindAllUserData all_user_data = { name_space, NULL }; FindUserData user_data = { name, NULL }; @@ -3361,12 +3358,12 @@ mono_class_from_name_checked_aux (MonoImage *image, const char* name_space, cons // We're forced to check all matching namespaces, not just the first one found, // because our desired type could be in any of the ones that match case-insensitively. - g_hash_table_foreach (image->name_cache, find_all_nocase, &all_user_data); + dn_simdhash_string_ptr_foreach (image->name_cache, find_all_nocase, &all_user_data); values = all_user_data.values; while (values && !user_data.value) { - nspace_table = (GHashTable*)values->data; - g_hash_table_foreach (nspace_table, find_nocase, &user_data); + nspace_table = (dn_simdhash_string_ptr_t *)values->data; + dn_simdhash_string_ptr_foreach (nspace_table, find_nocase, &user_data); values = values->next; } diff --git a/src/mono/mono/metadata/image.c b/src/mono/mono/metadata/image.c index 23333b1b2a976..b40199eb30878 100644 --- a/src/mono/mono/metadata/image.c +++ b/src/mono/mono/metadata/image.c @@ -1969,6 +1969,12 @@ free_hash_table (gpointer key, gpointer val, gpointer user_data) g_hash_table_destroy ((GHashTable*)val); } +static void +free_simdhash_table (const char *key, gpointer val, gpointer user_data) +{ + dn_simdhash_free ((dn_simdhash_t*)val); +} + /* static void free_mr_signatures (gpointer key, gpointer val, gpointer user_data) @@ -2128,8 +2134,8 @@ mono_image_close_except_pools (MonoImage *image) if (image->ptr_cache) g_hash_table_destroy (image->ptr_cache); if (image->name_cache) { - g_hash_table_foreach (image->name_cache, free_hash_table, NULL); - g_hash_table_destroy (image->name_cache); + dn_simdhash_string_ptr_foreach (image->name_cache, free_simdhash_table, NULL); + dn_simdhash_free (image->name_cache); } free_hash (image->icall_wrapper_cache); diff --git a/src/mono/mono/metadata/metadata-internals.h b/src/mono/mono/metadata/metadata-internals.h index 5022d57c40878..8c1dfd803b578 100644 --- a/src/mono/mono/metadata/metadata-internals.h +++ b/src/mono/mono/metadata/metadata-internals.h @@ -19,6 +19,8 @@ #include #include "mono/utils/mono-conc-hashtable.h" #include "mono/utils/refcount.h" +// for dn_simdhash_string_ptr_t and dn_simdhash_u32_ptr_t +#include "../native/containers/dn-simdhash-specializations.h" struct _MonoType { union { @@ -438,7 +440,7 @@ struct _MonoImage { /* * Indexes namespaces to hash tables that map class name to typedef token. */ - GHashTable *name_cache; /*protected by the image lock*/ + dn_simdhash_string_ptr_t *name_cache; /*protected by the image lock*/ /* * Indexed by MonoClass diff --git a/src/mono/mono/metadata/metadata.c b/src/mono/mono/metadata/metadata.c index ed745518559ba..c517703415eab 100644 --- a/src/mono/mono/metadata/metadata.c +++ b/src/mono/mono/metadata/metadata.c @@ -997,10 +997,10 @@ mono_metadata_table_bounds_check_slow (MonoImage *image, int table_index, int to if (G_LIKELY (GINT_TO_UINT32(token_index) <= table_info_get_rows (&image->tables [table_index]))) return FALSE; - if (G_LIKELY (!image->has_updates)) - return TRUE; + if (G_LIKELY (!image->has_updates)) + return TRUE; - return mono_metadata_update_table_bounds_check (image, table_index, token_index); + return mono_metadata_update_table_bounds_check (image, table_index, token_index); } void @@ -1094,7 +1094,7 @@ get_blob_heap (MonoImage *image) static gboolean mono_delta_heap_lookup (MonoImage *base_image, MetadataHeapGetterFunc get_heap, guint32 orig_index, MonoImage **image_out, guint32 *index_out) { - return mono_metadata_update_delta_heap_lookup (base_image, get_heap, orig_index, image_out, index_out); + return mono_metadata_update_delta_heap_lookup (base_image, get_heap, orig_index, image_out, index_out); } /** @@ -6451,12 +6451,12 @@ mono_metadata_events_from_typedef (MonoImage *meta, guint32 index, guint *end_id } start = mono_metadata_decode_row_col (tdef, loc.result, MONO_EVENT_MAP_EVENTLIST); - /* - * metadata-update: note this next line needs block needs to look at the number of rows in - * EventMap and Event of the base image. Updates will add rows for new properties, - * but they won't be contiguous. if we set end to the number of rows in the updated - * Property table, the range will include properties from some other class - */ + /* + * metadata-update: note this next line needs block needs to look at the number of rows in + * EventMap and Event of the base image. Updates will add rows for new properties, + * but they won't be contiguous. if we set end to the number of rows in the updated + * Property table, the range will include properties from some other class + */ if (loc.result + 1 < table_info_get_rows (tdef)) { end = mono_metadata_decode_row_col (tdef, loc.result + 1, MONO_EVENT_MAP_EVENTLIST) - 1; } else { @@ -6569,12 +6569,12 @@ mono_metadata_properties_from_typedef (MonoImage *meta, guint32 index, guint *en } start = mono_metadata_decode_row_col (tdef, loc.result, MONO_PROPERTY_MAP_PROPERTY_LIST); - /* - * metadata-update: note this next line needs block needs to look at the number of rows in - * PropertyMap and Property of the base image. Updates will add rows for new properties, - * but they won't be contiguous. if we set end to the number of rows in the updated - * Property table, the range will include properties from some other class - */ + /* + * metadata-update: note this next line needs block needs to look at the number of rows in + * PropertyMap and Property of the base image. Updates will add rows for new properties, + * but they won't be contiguous. if we set end to the number of rows in the updated + * Property table, the range will include properties from some other class + */ if (loc.result + 1 < table_info_get_rows (&meta->tables [MONO_TABLE_PROPERTYMAP])) { end = mono_metadata_decode_row_col (tdef, loc.result + 1, MONO_PROPERTY_MAP_PROPERTY_LIST) - 1; } else { @@ -7088,10 +7088,10 @@ mono_metadata_get_marshal_info (MonoImage *meta, guint32 idx, gboolean is_field) gboolean found = tdef->base && mono_binary_search (&loc, tdef->base, table_info_get_rows (tdef), tdef->row_size, table_locator); - if (G_UNLIKELY (meta->has_updates)) { - if (!found && !mono_metadata_update_metadata_linear_search (meta, tdef, &loc, table_locator)) - return NULL; - } + if (G_UNLIKELY (meta->has_updates)) { + if (!found && !mono_metadata_update_metadata_linear_search (meta, tdef, &loc, table_locator)) + return NULL; + } return mono_metadata_blob_heap (meta, mono_metadata_decode_row_col (tdef, loc.result, MONO_FIELD_MARSHAL_NATIVE_TYPE)); } @@ -8055,3 +8055,12 @@ mono_metadata_get_method_params (MonoImage *image, uint32_t method_idx, uint32_t return param_index; } + +// Required by dn_simdhash +void +dn_simdhash_assert_fail (const char *file, int line, const char *condition); + +void +dn_simdhash_assert_fail (const char *file, int line, const char *condition) { + mono_assertion_message (file, line, condition); +} diff --git a/src/mono/mono/utils/atomic.h b/src/mono/mono/utils/atomic.h index 7c7c684ab94eb..7d1d127f60c5c 100644 --- a/src/mono/mono/utils/atomic.h +++ b/src/mono/mono/utils/atomic.h @@ -115,7 +115,7 @@ mono_atomic_cas_i64 (volatile gint64 *dest, gint64 exch, gint64 comp) (void)atomic_compare_exchange_strong ((volatile atomic_llong *)dest, (long long*)&comp, exch); return comp; #else -#error gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC +#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif } @@ -188,7 +188,7 @@ mono_atomic_xchg_i64 (volatile gint64 *dest, gint64 exch) g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2); return atomic_exchange ((volatile atomic_llong *)dest, exch); #else -#error gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC +#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif } @@ -216,7 +216,7 @@ mono_atomic_fetch_add_i64 (volatile gint64 *dest, gint64 add) g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2); return atomic_fetch_add ((volatile atomic_llong *)dest, add); #else -#error gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC +#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif } @@ -250,7 +250,7 @@ mono_atomic_load_i64 (volatile gint64 *src) g_static_assert (sizeof (atomic_llong) == sizeof (*src) && ATOMIC_LLONG_LOCK_FREE == 2); return atomic_load ((volatile atomic_llong *)src); #else -#error gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC +#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif } @@ -292,7 +292,7 @@ mono_atomic_store_i64 (volatile gint64 *dst, gint64 val) g_static_assert (sizeof (atomic_llong) == sizeof (*dst) && ATOMIC_LLONG_LOCK_FREE == 2); atomic_store ((volatile atomic_llong *)dst, val); #else -#error gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC +#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif } diff --git a/src/native/containers/containers.cmake b/src/native/containers/containers.cmake index dd8829e3bf042..16c41eab5619f 100644 --- a/src/native/containers/containers.cmake +++ b/src/native/containers/containers.cmake @@ -8,6 +8,11 @@ list(APPEND SHARED_CONTAINER_SOURCES dn-queue.c dn-umap.c dn-vector.c + # FIXME: Including these here causes a linker collision with sgen metadata + # dn-simdhash.c + # dn-simdhash-string-ptr.c + # dn-simdhash-u32-ptr.c + # dn-simdhash-ptr-ptr.c ) list(APPEND SHARED_CONTAINER_HEADERS @@ -24,4 +29,11 @@ list(APPEND SHARED_CONTAINER_HEADERS dn-vector-ptr.h dn-vector-t.h dn-vector-types.h + dn-simdhash.h + dn-simdhash-specialization.h + dn-simdhash-specialization-declarations.h + dn-simdhash-specializations.h + dn-simdhash-arch.h + dn-simdhash-string-ptr.h + dn-simdhash-utils.h ) diff --git a/src/native/containers/dn-simdhash-arch.h b/src/native/containers/dn-simdhash-arch.h new file mode 100644 index 0000000000000..d99288e3476ac --- /dev/null +++ b/src/native/containers/dn-simdhash-arch.h @@ -0,0 +1,249 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __DN_SIMDHASH_ARCH_H__ +#define __DN_SIMDHASH_ARCH_H__ + +// #define DN_SIMDHASH_WARNINGS 1 + +// HACK: for better language server parsing +#include "dn-simdhash.h" + +#if defined(__clang__) || defined (__GNUC__) // use vector intrinsics + +#if defined(__wasm_simd128__) +#include +#elif defined(_M_AMD64) || defined(_M_X64) || (_M_IX86_FP == 2) || defined(__SSE2__) +#include +#elif defined(__ARM_NEON) +#include +#elif defined(__wasm) +#define DN_SIMDHASH_USE_SCALAR_FALLBACK 1 +#ifdef DN_SIMDHASH_WARNINGS +#pragma message("WARNING: Building dn_simdhash for WASM without -msimd128! Performance will be terrible!") +#endif +#else +#define DN_SIMDHASH_USE_SCALAR_FALLBACK 1 +#ifdef DN_SIMDHASH_WARNINGS +#pragma message("WARNING: Unsupported architecture for dn_simdhash! Performance will be terrible!") +#endif +#endif + +// extract/replace lane opcodes require constant indices on some target architectures, +// and in some cases it is profitable to do a single-byte memory load/store instead of +// a full vector load/store, so we expose both layouts as a union + +typedef uint8_t dn_u8x16 __attribute__ ((vector_size (DN_SIMDHASH_VECTOR_WIDTH), aligned(DN_SIMDHASH_VECTOR_WIDTH))); +typedef union { + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) dn_u8x16 vec; +#if defined(_M_AMD64) || defined(_M_X64) || (_M_IX86_FP == 2) || defined(__SSE2__) + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) __m128i m128; +#endif + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) uint8_t values[DN_SIMDHASH_VECTOR_WIDTH]; +} dn_simdhash_suffixes; + +#ifdef DN_SIMDHASH_USE_SCALAR_FALLBACK +typedef uint8_t dn_simdhash_search_vector; +#else +typedef dn_simdhash_suffixes dn_simdhash_search_vector; +#endif + +// Extracting lanes from a vector register on x86/x64 has horrible latency, +// so it's better to do regular byte loads from the stack +#if defined(__wasm_simd128__) +// For wasm with -msimd128, clang generates truly bizarre load/store code +// where it does two byte memory loads, then a vector load, then two +// lane insertions to write the byte loads into the loaded vector +// before finally passing it to find_first_matching_suffix. So we have to vec[]. +// See https://github.com/llvm/llvm-project/issues/87398#issuecomment-2050696298 +// Also see https://github.com/llvm/llvm-project/issues/88460 +#define dn_simdhash_extract_lane(suffixes, lane) \ + suffixes.vec[lane] +#else +#define dn_simdhash_extract_lane(suffixes, lane) \ + suffixes.values[lane] +#endif + +static DN_FORCEINLINE(uint32_t) +ctz (uint32_t value) +{ + // __builtin_ctz is undefined for 0 + if (value == 0) + return 32; + return (uint32_t)__builtin_ctz(value); +} + +static DN_FORCEINLINE(dn_simdhash_search_vector) +build_search_vector (uint8_t needle) +{ +#ifdef DN_SIMDHASH_USE_SCALAR_FALLBACK + return needle; +#else + dn_simdhash_suffixes result; + // this produces a splat in wasm, and the other architectures are fine too + dn_u8x16 needles = { + needle, needle, needle, needle, needle, needle, needle, needle, + needle, needle, needle, needle, needle, needle, needle, needle + }; + result.vec = needles; + return result; +#endif +} + +// returns an index in range 0-13 on match, 14-32 if no match +static DN_FORCEINLINE(uint32_t) +find_first_matching_suffix ( + dn_simdhash_search_vector needle, + // Only used by the vectorized implementations; discarded by scalar. + dn_simdhash_suffixes haystack, + // HACK: Pass the address of haystack.values directly, for scalar fallback. + // Without this, clang makes a full unaligned copy of haystack before calling us. + // Discarded by the vectorized implementations. + uint8_t haystack_values[DN_SIMDHASH_VECTOR_WIDTH], + uint32_t count +) { +#if defined(__wasm_simd128__) + return ctz(wasm_i8x16_bitmask(wasm_i8x16_eq(needle.vec, haystack.vec))); +#elif defined(_M_AMD64) || defined(_M_X64) || (_M_IX86_FP == 2) || defined(__SSE2__) + return ctz(_mm_movemask_epi8(_mm_cmpeq_epi8(needle.m128, haystack.m128))); +#elif defined(__ARM_NEON) + dn_simdhash_suffixes match_vector; + // Completely untested. + static const dn_simdhash_suffixes byte_mask = { + 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 + }; + union { + uint8_t b[4]; + uint32_t u; + } msb; + match_vector.vec = vceqq_u8(needle.vec, haystack.vec); + dn_simdhash_suffixes masked; + masked.vec = vandq_u8(match_vector.vec, byte_mask.vec); + msb.b[0] = vaddv_u8(vget_low_u8(masked.vec)); + msb.b[1] = vaddv_u8(vget_high_u8(masked.vec)); + return ctz(msb.u); +#else + // HACK: We can't put this in a common helper function without introducing a temporary + // unaligned copy-from-table-to-stack in wasm-without-simd +#define ITER(offset) \ + if (needle == haystack_values[offset]) \ + return offset; + + // It is safe to unroll this without bounds checks + // One would expect this to blow out the branch predictor, but in my testing + // it's significantly faster when there is no match, and slightly faster + // for cases where there is a match. + // Looping from 0-count is slower than this in my testing, even though it's + // going to check fewer suffixes most of the time - probably due to the + // comparison against count for each suffix. + // FIXME: If we move this into the specialization header, we can limit the + // number of unrolled iterations to the number of keys in the bucket. + ITER(0); + ITER(1); + ITER(2); + ITER(3); + ITER(4); + ITER(5); + ITER(6); + ITER(7); + ITER(8); + ITER(9); + ITER(10); + ITER(11); + ITER(12); + ITER(13); +#undef ITER + return 32; +#endif +} + +#elif defined(_M_AMD64) || defined(_M_X64) || (_M_IX86_FP == 2) || defined(__SSE2__) +// neither clang or gcc, but we have SSE2 available, so assume this is MSVC on x86 or x86-64 +// msvc neon intrinsics don't seem to expose a 128-bit wide vector so there's no neon in here +#include // for _BitScanForward + +static DN_FORCEINLINE(uint32_t) +ctz (uint32_t value) +{ + unsigned long result = 0; + if (_BitScanForward(&result, value)) + return (uint32_t)result; + else + return 32; +} + +#include + +typedef union { + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) __m128i m128; + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) uint8_t values[DN_SIMDHASH_VECTOR_WIDTH]; +} dn_simdhash_suffixes; + +typedef dn_simdhash_suffixes dn_simdhash_search_vector; + +#define dn_simdhash_extract_lane(suffixes, lane) \ + suffixes.values[lane] + +static DN_FORCEINLINE(dn_simdhash_search_vector) +build_search_vector (uint8_t needle) +{ + dn_simdhash_suffixes result; + result.m128 = _mm_set1_epi8(needle); + return result; +} + +// returns an index in range 0-13 on match, 14-32 if no match +static DN_FORCEINLINE(uint32_t) +find_first_matching_suffix_internal ( + __m128i needle, __m128i haystack, + uint32_t count +) { + return ctz(_mm_movemask_epi8(_mm_cmpeq_epi8(needle, haystack))); +} + +// use a macro to discard haystack_values, otherwise MSVC's codegen is worse +#define find_first_matching_suffix(needle, haystack, haystack_values, count) \ + find_first_matching_suffix_internal(needle.m128, haystack.m128, count) + +#else // unknown compiler and/or unknown non-simd arch + +#define DN_SIMDHASH_USE_SCALAR_FALLBACK 1 + +#ifdef DN_SIMDHASH_WARNINGS +#pragma message("WARNING: Unsupported architecture/compiler for dn_simdhash! Performance will be terrible!") +#endif + +typedef struct { + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) uint8_t values[DN_SIMDHASH_VECTOR_WIDTH]; +} dn_simdhash_suffixes; + +typedef uint8_t dn_simdhash_search_vector; + +#define dn_simdhash_extract_lane(suffixes, lane) \ + suffixes.values[lane] + +static DN_FORCEINLINE(dn_simdhash_search_vector) +build_search_vector (uint8_t needle) +{ + return needle; +} + +// returns an index in range 0-14 on match, 32 if no match +static DN_FORCEINLINE(uint32_t) +find_first_matching_suffix ( + dn_simdhash_search_vector needle, dn_simdhash_suffixes haystack, + uint8_t haystack_values[DN_SIMDHASH_VECTOR_WIDTH], uint32_t count +) { + // TODO: It might be profitable to hand-unroll this loop, but right now doing so + // hits a bug in clang and generates really bad WASM. + // HACK: We can't put this in a common helper function without introducing a temporary + // unaligned copy-from-table-to-stack in wasm-without-simd + for (uint32_t i = 0; i < count; i++) + if (needle == haystack_values[i]) + return i; + return 32; +} + +#endif // end of clang/gcc or msvc or fallback + +#endif // __DN_SIMDHASH_ARCH_H__ diff --git a/src/native/containers/dn-simdhash-ght-compatible.c b/src/native/containers/dn-simdhash-ght-compatible.c new file mode 100644 index 0000000000000..330f2a14f7385 --- /dev/null +++ b/src/native/containers/dn-simdhash-ght-compatible.c @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef NO_CONFIG_H +#include +#endif +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" +#include "dn-simdhash-ght-compatible.h" + +typedef struct dn_simdhash_ght_data { + dn_simdhash_ght_hash_func hash_func; + dn_simdhash_ght_equal_func key_equal_func; + dn_simdhash_ght_destroy_func key_destroy_func; + dn_simdhash_ght_destroy_func value_destroy_func; +} dn_simdhash_ght_data; + +static inline uint32_t +dn_simdhash_ght_hash (dn_simdhash_ght_data data, void * key) +{ + dn_simdhash_ght_hash_func hash_func = data.hash_func; + if (hash_func) + return (uint32_t)hash_func(key); + else + // FIXME: Seed + return MurmurHash3_32_ptr(key, 0); +} + +static inline int32_t +dn_simdhash_ght_equals (dn_simdhash_ght_data data, void * lhs, void * rhs) +{ + dn_simdhash_ght_equal_func equal_func = data.key_equal_func; + if (equal_func) + return equal_func(lhs, rhs); + else + return lhs == rhs; +} + +static inline void +dn_simdhash_ght_removed (dn_simdhash_ght_data data, void * key, void * value) +{ + dn_simdhash_ght_destroy_func key_destroy_func = data.key_destroy_func, + value_destroy_func = data.value_destroy_func; + if (key_destroy_func) + key_destroy_func((void *)key); + if (value_destroy_func) + value_destroy_func((void *)value); +} + +static inline void +dn_simdhash_ght_replaced (dn_simdhash_ght_data data, void * old_key, void * new_key, void * old_value, void * new_value) +{ + if (old_key != new_key) { + dn_simdhash_ght_destroy_func key_destroy_func = data.key_destroy_func; + if (key_destroy_func) + key_destroy_func((void *)old_key); + } + + if (old_value != new_value) { + dn_simdhash_ght_destroy_func value_destroy_func = data.value_destroy_func; + if (value_destroy_func) + value_destroy_func((void *)old_value); + } +} + +#define DN_SIMDHASH_T dn_simdhash_ght +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_INSTANCE_DATA_T dn_simdhash_ght_data +#define DN_SIMDHASH_KEY_HASHER dn_simdhash_ght_hash +#define DN_SIMDHASH_KEY_EQUALS dn_simdhash_ght_equals +#define DN_SIMDHASH_ON_REMOVE dn_simdhash_ght_removed +#define DN_SIMDHASH_ON_REPLACE dn_simdhash_ght_replaced +#if SIZEOF_VOID_P == 8 +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 12 +#endif +#define DN_SIMDHASH_NO_DEFAULT_NEW 1 + +#include "dn-simdhash-specialization.h" +#include "dn-simdhash-ght-compatible.h" + +dn_simdhash_ght_t * +dn_simdhash_ght_new ( + dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func, + uint32_t capacity, dn_allocator_t *allocator +) +{ + dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func; + return hash; +} + +dn_simdhash_ght_t * +dn_simdhash_ght_new_full ( + dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func, + dn_simdhash_ght_destroy_func key_destroy_func, dn_simdhash_ght_destroy_func value_destroy_func, + uint32_t capacity, dn_allocator_t *allocator +) +{ + dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_destroy_func = key_destroy_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).value_destroy_func = value_destroy_func; + return hash; +} + +void +dn_simdhash_ght_insert_replace ( + dn_simdhash_ght_t *hash, + void * key, void * value, + int32_t overwrite_key +) +{ + check_self(hash); + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + dn_simdhash_insert_mode imode = overwrite_key + ? DN_SIMDHASH_INSERT_MODE_OVERWRITE_KEY_AND_VALUE + : DN_SIMDHASH_INSERT_MODE_OVERWRITE_VALUE; + + dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL(hash, key, key_hash, value, imode); + if (ok == DN_SIMDHASH_INSERT_NEED_TO_GROW) { + dn_simdhash_buffers_t old_buffers = dn_simdhash_ensure_capacity_internal(hash, dn_simdhash_capacity(hash) + 1); + if (old_buffers.buckets) { + DN_SIMDHASH_REHASH_INTERNAL(hash, old_buffers); + dn_simdhash_free_buffers(old_buffers); + } + ok = DN_SIMDHASH_TRY_INSERT_INTERNAL(hash, key, key_hash, value, imode); + } + + switch (ok) { + case DN_SIMDHASH_INSERT_OK_ADDED_NEW: + hash->count++; + return; + case DN_SIMDHASH_INSERT_OK_OVERWROTE_EXISTING: + return; + // We should always return one of the first two + case DN_SIMDHASH_INSERT_KEY_ALREADY_PRESENT: + case DN_SIMDHASH_INSERT_NEED_TO_GROW: + default: + assert(0); + return; + } +} diff --git a/src/native/containers/dn-simdhash-ght-compatible.h b/src/native/containers/dn-simdhash-ght-compatible.h new file mode 100644 index 0000000000000..b99d67477ccc5 --- /dev/null +++ b/src/native/containers/dn-simdhash-ght-compatible.h @@ -0,0 +1,29 @@ +typedef void (*dn_simdhash_ght_destroy_func) (void * data); +typedef unsigned int (*dn_simdhash_ght_hash_func) (const void * key); +typedef int32_t (*dn_simdhash_ght_equal_func) (const void * a, const void * b); + +dn_simdhash_ght_t * +dn_simdhash_ght_new ( + dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func, + uint32_t capacity, dn_allocator_t *allocator +); + +dn_simdhash_ght_t * +dn_simdhash_ght_new_full ( + dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func, + dn_simdhash_ght_destroy_func key_destroy_func, dn_simdhash_ght_destroy_func value_destroy_func, + uint32_t capacity, dn_allocator_t *allocator +); + +// compatible with g_hash_table_insert_replace +void +dn_simdhash_ght_insert_replace ( + dn_simdhash_ght_t *hash, + void * key, void * value, + int32_t overwrite_key +); + +// compatibility shims for the g_hash_table_ versions in glib.h +#define dn_simdhash_ght_insert(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),FALSE) +#define dn_simdhash_ght_replace(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),TRUE) +#define dn_simdhash_ght_add(h,k) dn_simdhash_ght_insert_replace ((h),(k),(k),TRUE) diff --git a/src/native/containers/dn-simdhash-ptr-ptr.c b/src/native/containers/dn-simdhash-ptr-ptr.c new file mode 100644 index 0000000000000..25e7530d39dc4 --- /dev/null +++ b/src/native/containers/dn-simdhash-ptr-ptr.c @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef NO_CONFIG_H +#include +#endif +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" + +#define DN_SIMDHASH_T dn_simdhash_ptr_ptr +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_KEY_HASHER(hash, key) (MurmurHash3_32_ptr(key, 0)) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs) +#if SIZEOF_VOID_P == 8 +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 12 +#endif + +#include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash-specialization-declarations.h b/src/native/containers/dn-simdhash-specialization-declarations.h new file mode 100644 index 0000000000000..585f2094ee58c --- /dev/null +++ b/src/native/containers/dn-simdhash-specialization-declarations.h @@ -0,0 +1,76 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Gluing macro expansions together requires nested macro invocation :/ +#ifndef DN_SIMDHASH_GLUE +#define DN_SIMDHASH_GLUE_INNER(a, b) a ## b +#define DN_SIMDHASH_GLUE(a,b) DN_SIMDHASH_GLUE_INNER(a, b) +#endif +#ifndef DN_SIMDHASH_GLUE_3 +#define DN_SIMDHASH_GLUE_3_INNER(a, b, c) a ## b ## c +#define DN_SIMDHASH_GLUE_3(a, b, c) DN_SIMDHASH_GLUE_3_INNER(a, b, c) +#endif + +#ifndef DN_SIMDHASH_ACCESSOR_SUFFIX +#define DN_SIMDHASH_ACCESSOR_SUFFIX +#endif + +// We generate unique names for each specialization so that they will be easy to distinguish +// when debugging, profiling, or disassembling. Otherwise they would have linker-assigned names +#define DN_SIMDHASH_T_NAME DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_t) +#define DN_SIMDHASH_T_PTR DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_t *) +#define DN_SIMDHASH_T_VTABLE DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_vtable) +#define DN_SIMDHASH_T_META DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_meta) +#define DN_SIMDHASH_SCAN_BUCKET_INTERNAL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_scan_bucket_internal) +#define DN_SIMDHASH_FIND_VALUE_INTERNAL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_find_value_internal) +#define DN_SIMDHASH_TRY_INSERT_INTERNAL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_try_insert_internal) +#define DN_SIMDHASH_REHASH_INTERNAL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_rehash_internal) +#define DN_SIMDHASH_NEW DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_new) +#define DN_SIMDHASH_TRY_ADD DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_add,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_ADD_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_add_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_GET_VALUE DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_get_value,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_get_value_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REMOVE DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_remove,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REMOVE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_remove_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REPLACE_VALUE DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_replace_value,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REPLACE_VALUE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_replace_value_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_FOREACH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_foreach,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_FOREACH_FUNC DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_foreach_func,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_DESTROY_ALL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_destroy_all) + +typedef void (*DN_SIMDHASH_FOREACH_FUNC) (DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value, void *user_data); + +// Declare a specific alias so intellisense gives more helpful info +typedef dn_simdhash_t DN_SIMDHASH_T_NAME; + +#ifndef DN_SIMDHASH_NO_DEFAULT_NEW +DN_SIMDHASH_T_PTR +DN_SIMDHASH_NEW (uint32_t capacity, dn_allocator_t *allocator); +#endif + +uint8_t +DN_SIMDHASH_TRY_ADD (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value); + +uint8_t +DN_SIMDHASH_TRY_ADD_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T value); + +uint8_t +DN_SIMDHASH_TRY_GET_VALUE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T *result); + +uint8_t +DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T *result); + +uint8_t +DN_SIMDHASH_TRY_REMOVE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key); + +uint8_t +DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash); + +uint8_t +DN_SIMDHASH_TRY_REPLACE_VALUE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T new_value); + +uint8_t +DN_SIMDHASH_TRY_REPLACE_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T new_value); + +void +DN_SIMDHASH_FOREACH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_FOREACH_FUNC func, void *user_data); diff --git a/src/native/containers/dn-simdhash-specialization.h b/src/native/containers/dn-simdhash-specialization.h new file mode 100644 index 0000000000000..093ffaf20a7a4 --- /dev/null +++ b/src/native/containers/dn-simdhash-specialization.h @@ -0,0 +1,584 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifdef __DN_SIMDHASH_SPECIALIZATION_H__ +#error Specialization header already included +#else +#define __DN_SIMDHASH_SPECIALIZATION_H__ +#endif + +#include "dn-simdhash.h" +#include "dn-simdhash-utils.h" +#include "dn-simdhash-arch.h" + +#ifndef DN_SIMDHASH_T +#error Expected DN_SIMDHASH_T definition i.e. dn_simdhash_string_ptr +#endif + +#ifndef DN_SIMDHASH_KEY_T +#error Expected DN_SIMDHASH_KEY_T definition i.e. const char * +#endif + +#ifndef DN_SIMDHASH_VALUE_T +#error Expected DN_SIMDHASH_VALUE_T definition i.e. int +#endif + +// If specified, we pass instance data to the handlers by-value, otherwise we +// pass the pointer to the hash itself by-value. This is enough to allow clang +// to hoist the load of the instance data out of the key scan loop, though it +// won't hoist it all the way out of the bucket scan loop. +#ifndef DN_SIMDHASH_INSTANCE_DATA_T +#define DN_SIMDHASH_GET_DATA(hash) (hash) +#define DN_SIMDHASH_INSTANCE_DATA_T DN_SIMDHASH_T_PTR +#else // DN_SIMDHASH_INSTANCE_DATA_T +#define DN_SIMDHASH_GET_DATA(hash) dn_simdhash_instance_data(DN_SIMDHASH_INSTANCE_DATA_T, hash) +#endif // DN_SIMDHASH_INSTANCE_DATA_T + +#ifndef DN_SIMDHASH_KEY_HASHER +#error Expected DN_SIMDHASH_KEY_HASHER definition with signature: uint32_t (DN_SIMDHASH_INSTANCE_DATA_T data, KEY_T key) +#endif + +#ifndef DN_SIMDHASH_KEY_EQUALS +#error Expected DN_SIMDHASH_KEY_EQUALS definition with signature: int (DN_SIMDHASH_INSTANCE_DATA_T data, KEY_T lhs, KEY_T rhs) that returns 1 for match +#endif + +#ifndef DN_SIMDHASH_ON_REPLACE +#define DN_SIMDHASH_HAS_REPLACE_HANDLER 0 +#define DN_SIMDHASH_ON_REPLACE(data, old_key, new_key, old_value, new_value) +#else // DN_SIMDHASH_ON_REPLACE +#define DN_SIMDHASH_HAS_REPLACE_HANDLER 1 +#ifndef DN_SIMDHASH_ON_REMOVE +#error Expected DN_SIMDHASH_ON_REMOVE(data, key, value) to be defined. +#endif +#endif // DN_SIMDHASH_ON_REPLACE + +#ifndef DN_SIMDHASH_ON_REMOVE +#define DN_SIMDHASH_HAS_REMOVE_HANDLER 0 +#define DN_SIMDHASH_ON_REMOVE(data, key, value) +#else // DN_SIMDHASH_ON_REMOVE +#define DN_SIMDHASH_HAS_REMOVE_HANDLER 1 +#ifndef DN_SIMDHASH_ON_REPLACE +#error Expected DN_SIMDHASH_ON_REPLACE(data, old_key, new_key, old_value, new_value) to be defined. +#endif +#endif // DN_SIMDHASH_ON_REMOVE + +#ifndef DN_SIMDHASH_BUCKET_CAPACITY +// TODO: Find some way to automatically select an ideal bucket capacity based on key size. +// Some sort of trick using _Generic? +#define DN_SIMDHASH_BUCKET_CAPACITY DN_SIMDHASH_DEFAULT_BUCKET_CAPACITY +#endif + +#include "dn-simdhash-specialization-declarations.h" + +static_assert(DN_SIMDHASH_BUCKET_CAPACITY <= DN_SIMDHASH_MAX_BUCKET_CAPACITY, "Maximum bucket capacity exceeded"); +static_assert(DN_SIMDHASH_BUCKET_CAPACITY > 1, "Bucket capacity too low"); + +// We set bucket_size_bytes to sizeof() this struct so that we can let the compiler +// generate the most optimal code possible when we're manipulating pointers to it - +// that is, it can do mul-by-constant instead of mul-by-(hash->meta.etc) +typedef struct bucket_t { + _Alignas(DN_SIMDHASH_VECTOR_WIDTH) dn_simdhash_suffixes suffixes; + DN_SIMDHASH_KEY_T keys[DN_SIMDHASH_BUCKET_CAPACITY]; +} bucket_t; + +static_assert((sizeof (bucket_t) % DN_SIMDHASH_VECTOR_WIDTH) == 0, "Bucket size is not vector aligned"); + + +// While we've inlined these constants into the specialized code we're generating, +// the generic code in dn-simdhash.c needs them, so we put them in this meta header +// that is referenced by every hash instance. +dn_simdhash_meta_t DN_SIMDHASH_T_META = { + DN_SIMDHASH_BUCKET_CAPACITY, + sizeof(bucket_t), + sizeof(DN_SIMDHASH_KEY_T), + sizeof(DN_SIMDHASH_VALUE_T), + sizeof(DN_SIMDHASH_INSTANCE_DATA_T), +}; + + +static DN_FORCEINLINE(void) +check_self (DN_SIMDHASH_T_PTR self) +{ +#ifdef NDEBUG + // In release builds, just nullcheck. Checking meta adds measurable overhead. + dn_simdhash_assert(self); +#else + // Verifies both that the self-ptr is non-null and that the meta pointer matches + // what it should be. This detects passing the wrong kind of simdhash_t pointer + // to one of the APIs, since C doesn't have fully type-safe pointers. + uint8_t ok = self && (self->meta == &DN_SIMDHASH_T_META); + dn_simdhash_assert(ok); +#endif +} + + +static DN_FORCEINLINE(bucket_t *) +address_of_bucket (dn_simdhash_buffers_t buffers, uint32_t bucket_index) +{ + return &((bucket_t *)buffers.buckets)[bucket_index]; +} + +static DN_FORCEINLINE(DN_SIMDHASH_VALUE_T *) +address_of_value (dn_simdhash_buffers_t buffers, uint32_t value_slot_index) +{ + return &((DN_SIMDHASH_VALUE_T *)buffers.values)[value_slot_index]; +} + +#define DN_SIMDHASH_SCAN_BUCKET_NO_OVERFLOW -1 +#define DN_SIMDHASH_SCAN_BUCKET_OVERFLOWED -2 + +// This helper is used to locate the first matching key in a given bucket, so that add +// operations don't potentially have to scan the whole table twice when hashes collide +// On success: returns index (0-n) +// On failure: returns -1 if bucket has not overflowed; -2 if it has +static DN_FORCEINLINE(int) +DN_SIMDHASH_SCAN_BUCKET_INTERNAL (DN_SIMDHASH_T_PTR hash, bucket_t *restrict bucket, DN_SIMDHASH_KEY_T needle, dn_simdhash_search_vector search_vector) +{ +#ifdef _MSC_VER + // MSVC won't do efficient lane extractions if we eager load the vector, + // so just operate through the pointer instead. + #define bucket_suffixes (bucket->suffixes) +#elif !defined(DN_SIMDHASH_USE_SCALAR_FALLBACK) + // Perform an eager load of the vector if SIMD is in use, even though we do + // byte loads to extract lanes on non-wasm platforms. It's faster on x64 for + // a reason I can't identify, and it significantly improves wasm codegen + dn_simdhash_suffixes bucket_suffixes = bucket->suffixes; +#else + // Load through the pointer instead. An eager load just copies to the stack for + // no good reason. + #define bucket_suffixes (bucket->suffixes) +#endif + uint8_t count = dn_simdhash_extract_lane(bucket_suffixes, DN_SIMDHASH_COUNT_SLOT), + overflow_count = dn_simdhash_extract_lane(bucket_suffixes, DN_SIMDHASH_CASCADED_SLOT); + // We could early-out here when count==0, but it doesn't appear to meaningfully improve + // search performance to do so, and might actually worsen it + uint32_t index = find_first_matching_suffix(search_vector, bucket_suffixes, bucket_suffixes.values, count); + for (; index < count; index++) { + // FIXME: Could be profitable to manually hoist the data load outside of the loop, + // if not out of SCAN_BUCKET_INTERNAL entirely. Clang appears to do LICM on it. + // It's better to index bucket->keys each iteration inside the loop than to precompute + // a pointer outside and bump the pointer, because in many cases the bucket will be + // empty, and in many other cases it will have one match. Putting the index inside the + // loop means that for empty/no-match buckets we don't do the index calculation at all. + if (DN_SIMDHASH_KEY_EQUALS(DN_SIMDHASH_GET_DATA(hash), needle, bucket->keys[index])) + return index; + } + +#undef bucket_suffixes + + if (overflow_count) + return DN_SIMDHASH_SCAN_BUCKET_OVERFLOWED; + else + return DN_SIMDHASH_SCAN_BUCKET_NO_OVERFLOW; +} + +// Helper macros so that we can optimize and change scan logic more easily +#define BEGIN_SCAN_BUCKETS(initial_index, bucket_index, bucket_address) \ + { \ + uint32_t bucket_index = initial_index, scan_buckets_length = buffers.buckets_length; \ + bucket_t *restrict bucket_address = address_of_bucket(buffers, bucket_index); \ + do { + +#define END_SCAN_BUCKETS(initial_index, bucket_index, bucket_address) \ + bucket_index++; \ + bucket_address++; \ + /* Wrap around if we hit the last bucket. */ \ + if (bucket_index >= scan_buckets_length) { \ + bucket_index = 0; \ + bucket_address = address_of_bucket(buffers, 0); \ + } \ + /* if bucket_index == initial_index, we reached our starting point */ \ + } while (bucket_index != initial_index); \ + } + +#define BEGIN_SCAN_PAIRS(buffers, key_address, value_address) \ + bucket_t *scan_bucket_address = address_of_bucket(buffers, 0); \ + for ( \ + uint32_t scan_i = 0, scan_bc = buffers.buckets_length, scan_value_slot_base = 0; \ + scan_i < scan_bc; scan_i++, scan_bucket_address++, scan_value_slot_base += DN_SIMDHASH_BUCKET_CAPACITY \ + ) { \ + uint32_t scan_c = dn_simdhash_bucket_count(scan_bucket_address->suffixes); \ + for (uint32_t scan_j = 0; scan_j < scan_c; scan_j++) { \ + DN_SIMDHASH_KEY_T *key_address = &scan_bucket_address->keys[scan_j]; \ + DN_SIMDHASH_VALUE_T *value_address = address_of_value(buffers, scan_value_slot_base + scan_j); + +#define END_SCAN_PAIRS(buffers, key_address, value_address) \ + } \ + } + +// FIXME: inline? might improve performance for bucket overflow, but would +// increase code size, and maybe blow out icache. clang seems to inline it anyway. +static void +adjust_cascaded_counts (dn_simdhash_buffers_t buffers, uint32_t first_bucket_index, uint32_t last_bucket_index, uint8_t increase) +{ + BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + if (bucket_index == last_bucket_index) + break; + + uint8_t cascaded_count = dn_simdhash_bucket_cascaded_count(bucket_address->suffixes); + if (cascaded_count < 255) { + if (increase) + dn_simdhash_bucket_set_cascaded_count(bucket_address->suffixes, cascaded_count + 1); + else { + dn_simdhash_assert(cascaded_count > 0); + dn_simdhash_bucket_set_cascaded_count(bucket_address->suffixes, cascaded_count - 1); + } + } + END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) +} + +static DN_SIMDHASH_VALUE_T * +DN_SIMDHASH_FIND_VALUE_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash) +{ + dn_simdhash_buffers_t buffers = hash->buffers; + uint8_t suffix = dn_simdhash_select_suffix(key_hash); + uint32_t first_bucket_index = dn_simdhash_select_bucket_index(buffers, key_hash); + dn_simdhash_search_vector search_vector = build_search_vector(suffix); + + BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); + if (index_in_bucket >= 0) { + uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + index_in_bucket; + return address_of_value(buffers, value_slot_index); + } else if (index_in_bucket == DN_SIMDHASH_SCAN_BUCKET_NO_OVERFLOW) { + return NULL; + } + END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + + return NULL; +} + +typedef enum dn_simdhash_insert_mode { + // Ensures that no matching key exists in the hash, then adds the key/value pair + DN_SIMDHASH_INSERT_MODE_ENSURE_UNIQUE, + // If a matching key exists in the hash, overwrite its value but leave the key alone + DN_SIMDHASH_INSERT_MODE_OVERWRITE_VALUE, + // If a matching key exists in the hash, overwrite both the key and the value + DN_SIMDHASH_INSERT_MODE_OVERWRITE_KEY_AND_VALUE, + // Do not scan for existing matches before adding the new key/value pair. + DN_SIMDHASH_INSERT_MODE_REHASHING, +} dn_simdhash_insert_mode; + +static void +do_overwrite ( + DN_SIMDHASH_T_PTR hash, uint32_t bucket_index, bucket_t *bucket_address, int index_in_bucket, + DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value, uint8_t overwrite_key +) { + DN_SIMDHASH_KEY_T *key_ptr = &bucket_address->keys[index_in_bucket]; + DN_SIMDHASH_VALUE_T *value_ptr = address_of_value(hash->buffers, (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + index_in_bucket); +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_KEY_T old_key = *key_ptr; + DN_SIMDHASH_VALUE_T old_value = *value_ptr; +#endif + if (overwrite_key) + *key_ptr = key; + *value_ptr = value; +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_ON_REPLACE(DN_SIMDHASH_GET_DATA(hash), old_key, overwrite_key ? key : old_key, old_value, value); +#endif +} + +static dn_simdhash_insert_result +DN_SIMDHASH_TRY_INSERT_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T value, dn_simdhash_insert_mode mode) +{ + // HACK: Early out. Better to grow without scanning here. + // We're comparing with the computed grow_at_count threshold to maintain an appropriate load factor + if (hash->count >= hash->grow_at_count) { + // printf ("hash->count %d >= hash->grow_at_count %d\n", hash->count, hash->grow_at_count); + return DN_SIMDHASH_INSERT_NEED_TO_GROW; + } + + dn_simdhash_buffers_t buffers = hash->buffers; + uint8_t suffix = dn_simdhash_select_suffix(key_hash); + uint32_t first_bucket_index = dn_simdhash_select_bucket_index(hash->buffers, key_hash); + dn_simdhash_search_vector search_vector = build_search_vector(suffix); + + BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + // If necessary, check the current bucket for the key + if (mode != DN_SIMDHASH_INSERT_MODE_REHASHING) { + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); + if (index_in_bucket >= 0) { + if ( + (mode == DN_SIMDHASH_INSERT_MODE_OVERWRITE_KEY_AND_VALUE) || + (mode == DN_SIMDHASH_INSERT_MODE_OVERWRITE_VALUE) + ) { + do_overwrite ( + hash, bucket_index, bucket_address, index_in_bucket, + key, value, (mode == DN_SIMDHASH_INSERT_MODE_OVERWRITE_KEY_AND_VALUE) + ); + return DN_SIMDHASH_INSERT_OK_OVERWROTE_EXISTING; + } else + return DN_SIMDHASH_INSERT_KEY_ALREADY_PRESENT; + } + } + + // The current bucket doesn't contain the key, or duplicate checks are disabled (for rehashing), + // so attempt to insert into the bucket + uint8_t new_index = dn_simdhash_bucket_count(bucket_address->suffixes); + if (new_index < DN_SIMDHASH_BUCKET_CAPACITY) { + // Calculate key address early to reduce odds of a stall + DN_SIMDHASH_KEY_T *restrict key_slot_address = &bucket_address->keys[new_index]; + // We found a bucket with space, so claim the first free slot + dn_simdhash_bucket_set_count(bucket_address->suffixes, new_index + 1); + dn_simdhash_bucket_set_suffix(bucket_address->suffixes, new_index, suffix); + // Now store the key, it's probably in the same cache line as the count/suffix + *key_slot_address = key; + // Now store the value, it's in a different cache line + uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + new_index; + DN_SIMDHASH_VALUE_T *restrict value_slot_address = address_of_value(buffers, value_slot_index); + *value_slot_address = value; + // printf("Inserted [%zd, %zd] in bucket %d at index %d\n", key, value, bucket_index, new_index); + // If we cascaded out of our original target bucket, scan through our probe path + // and increase the cascade counters. We have to wait until now to do that, because + // during the process of getting here we may end up finding a duplicate, which would + // leave the cascade counters in a corrupted state + adjust_cascaded_counts(buffers, first_bucket_index, bucket_index, 1); + return DN_SIMDHASH_INSERT_OK_ADDED_NEW; + } + + // The current bucket is full, so try the next bucket. + END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + + return DN_SIMDHASH_INSERT_NEED_TO_GROW; +} + +static void +DN_SIMDHASH_REHASH_INTERNAL (DN_SIMDHASH_T_PTR hash, dn_simdhash_buffers_t old_buffers) +{ + BEGIN_SCAN_PAIRS(old_buffers, key_address, value_address) + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), *key_address); + // This theoretically can't fail, since we just grew the container and we + // wrap around to the beginning when there's a collision in the last bucket. + dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL( + hash, *key_address, key_hash, + *value_address, + DN_SIMDHASH_INSERT_MODE_REHASHING + ); + dn_simdhash_assert(ok == DN_SIMDHASH_INSERT_OK_ADDED_NEW); + END_SCAN_PAIRS(old_buffers, key_address, value_address) +} + +#if DN_SIMDHASH_HAS_REMOVE_HANDLER +static void +DN_SIMDHASH_DESTROY_ALL (DN_SIMDHASH_T_PTR hash) +{ + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + DN_SIMDHASH_ON_REMOVE(DN_SIMDHASH_GET_DATA(hash), *key_address, *value_address); + END_SCAN_PAIRS(buffers, key_address, value_address) +} +#endif + + +// TODO: Store this by-reference instead of inline in the hash? +dn_simdhash_vtable_t DN_SIMDHASH_T_VTABLE = { + DN_SIMDHASH_REHASH_INTERNAL, +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + DN_SIMDHASH_DESTROY_ALL, +#else + NULL, +#endif +}; + + +#ifndef DN_SIMDHASH_NO_DEFAULT_NEW +DN_SIMDHASH_T_PTR +DN_SIMDHASH_NEW (uint32_t capacity, dn_allocator_t *allocator) +{ + // If this isn't satisfied, the generic code will allocate incorrectly sized buffers + // HACK: Use static_assert because for some reason assert produces unused variable warnings only on CI + struct silence_nuisance_msvc_warning { bucket_t a, b; }; + static_assert( + sizeof(struct silence_nuisance_msvc_warning) == (sizeof(bucket_t) * 2), + "Inconsistent spacing/sizing for bucket_t" + ); + + return dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); +} +#endif + +uint8_t +DN_SIMDHASH_TRY_ADD (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value) +{ + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + return DN_SIMDHASH_TRY_ADD_WITH_HASH(hash, key, key_hash, value); +} + +uint8_t +DN_SIMDHASH_TRY_ADD_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T value) +{ + check_self(hash); + + dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL(hash, key, key_hash, value, DN_SIMDHASH_INSERT_MODE_ENSURE_UNIQUE); + if (ok == DN_SIMDHASH_INSERT_NEED_TO_GROW) { + dn_simdhash_buffers_t old_buffers = dn_simdhash_ensure_capacity_internal(hash, dn_simdhash_capacity(hash) + 1); + if (old_buffers.buckets) { + DN_SIMDHASH_REHASH_INTERNAL(hash, old_buffers); + dn_simdhash_free_buffers(old_buffers); + } + ok = DN_SIMDHASH_TRY_INSERT_INTERNAL(hash, key, key_hash, value, DN_SIMDHASH_INSERT_MODE_ENSURE_UNIQUE); + } + + switch (ok) { + case DN_SIMDHASH_INSERT_OK_ADDED_NEW: + hash->count++; + return 1; + case DN_SIMDHASH_INSERT_OK_OVERWROTE_EXISTING: + // This shouldn't happen + dn_simdhash_assert(!"Overwrote an existing item while adding"); + return 1; + case DN_SIMDHASH_INSERT_KEY_ALREADY_PRESENT: + return 0; + case DN_SIMDHASH_INSERT_NEED_TO_GROW: + // We should always have enough space after growing once. + default: + dn_simdhash_assert(!"Failed to add a new item but there was no existing item"); + return 0; + } +} + +uint8_t +DN_SIMDHASH_TRY_GET_VALUE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T *result) +{ + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + return DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH(hash, key, key_hash, result); +} + +uint8_t +DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T *result) +{ + check_self(hash); + + DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash); + if (!value_ptr) + return 0; + if (result) + *result = *value_ptr; + return 1; +} + +uint8_t +DN_SIMDHASH_TRY_REMOVE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key) +{ + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + return DN_SIMDHASH_TRY_REMOVE_WITH_HASH(hash, key, key_hash); +} + +uint8_t +DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash) +{ + check_self(hash); + + dn_simdhash_buffers_t buffers = hash->buffers; + uint8_t suffix = dn_simdhash_select_suffix(key_hash); + uint32_t first_bucket_index = dn_simdhash_select_bucket_index(buffers, key_hash); + dn_simdhash_search_vector search_vector = build_search_vector(suffix); + + BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); + if (index_in_bucket >= 0) { + // We found the item. Replace it with the last item in the bucket, then erase + // the last item in the bucket. This ensures sequential scans still work. + uint8_t bucket_count = dn_simdhash_bucket_count(bucket_address->suffixes), + replacement_index_in_bucket = bucket_count - 1; + uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + index_in_bucket, + replacement_value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + replacement_index_in_bucket; + + DN_SIMDHASH_VALUE_T *value_address = address_of_value(buffers, value_slot_index); + DN_SIMDHASH_VALUE_T *replacement_address = address_of_value(buffers, replacement_value_slot_index); + DN_SIMDHASH_KEY_T *key_address = &bucket_address->keys[index_in_bucket]; + DN_SIMDHASH_KEY_T *replacement_key_address = &bucket_address->keys[replacement_index_in_bucket]; + +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + // Store for later, so we can run the callback after we're done removing the item + DN_SIMDHASH_VALUE_T value = *value_address; + // The key used for lookup may not be the key that was actually stored inside us, + // so make sure we store the one that was inside and destroy that one + DN_SIMDHASH_KEY_T actual_key = *key_address; +#endif + + hash->count--; + + // Update count first + dn_simdhash_bucket_set_count(bucket_address->suffixes, bucket_count - 1); + // Rotate replacement suffix from the end of the bucket to here + dn_simdhash_bucket_set_suffix( + bucket_address->suffixes, index_in_bucket, + bucket_address->suffixes.values[replacement_index_in_bucket] + ); + // Zero replacement suffix's old slot so it won't produce false positives in scans + dn_simdhash_bucket_set_suffix( + bucket_address->suffixes, replacement_index_in_bucket, 0 + ); + // Rotate replacement value from the end of the bucket to here + *value_address = *replacement_address; + // Rotate replacement key from the end of the bucket to here + *key_address = *replacement_key_address; + // Erase replacement key/value's old slots + // Skipped because memset is slow on wasm + // memset(replacement_key_address, 0, sizeof(DN_SIMDHASH_KEY_T)); + // memset(replacement_address, 0, sizeof(DN_SIMDHASH_VALUE_T)); + + // If this item cascaded out of its original target bucket, we need + // to go through all the buckets we visited on the way here and reduce + // their cascade counters (if possible), to maintain better scan performance. + if (bucket_index != first_bucket_index) + adjust_cascaded_counts(buffers, first_bucket_index, bucket_index, 0); + +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + // We've finished removing the item, so we're in a consistent state and can notify + DN_SIMDHASH_ON_REMOVE(DN_SIMDHASH_GET_DATA(hash), actual_key, value); +#endif + + return 1; + } else if (index_in_bucket == DN_SIMDHASH_SCAN_BUCKET_NO_OVERFLOW) + return 0; + END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + + return 0; +} + +uint8_t +DN_SIMDHASH_TRY_REPLACE_VALUE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T new_value) +{ + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + return DN_SIMDHASH_TRY_REPLACE_VALUE_WITH_HASH(hash, key, key_hash, new_value); +} + +uint8_t +DN_SIMDHASH_TRY_REPLACE_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T new_value) +{ + check_self(hash); + + DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash); + if (!value_ptr) + return 0; +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_VALUE_T old_value = *value_ptr; +#endif + *value_ptr = new_value; +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_ON_REPLACE(DN_SIMDHASH_GET_DATA(hash), key, key, old_value, new_value); +#endif + return 1; +} + +void +DN_SIMDHASH_FOREACH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_FOREACH_FUNC func, void *user_data) +{ + check_self(hash); + dn_simdhash_assert(func); + + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + func(*key_address, *value_address, user_data); + END_SCAN_PAIRS(buffers, key_address, value_address) +} diff --git a/src/native/containers/dn-simdhash-specializations.h b/src/native/containers/dn-simdhash-specializations.h new file mode 100644 index 0000000000000..4966c7575d19a --- /dev/null +++ b/src/native/containers/dn-simdhash-specializations.h @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __DN_SIMDHASH_SPECIALIZATIONS_H__ +#define __DN_SIMDHASH_SPECIALIZATIONS_H__ + +#include "dn-simdhash.h" + +typedef struct dn_simdhash_str_key dn_simdhash_str_key; + +#define DN_SIMDHASH_T dn_simdhash_string_ptr +#define DN_SIMDHASH_KEY_T dn_simdhash_str_key +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_ACCESSOR_SUFFIX _raw + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T +#undef DN_SIMDHASH_ACCESSOR_SUFFIX + +#include "dn-simdhash-string-ptr.h" + + +#define DN_SIMDHASH_T dn_simdhash_u32_ptr +#define DN_SIMDHASH_KEY_T uint32_t +#define DN_SIMDHASH_VALUE_T void * + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T + + +#define DN_SIMDHASH_T dn_simdhash_ptr_ptr +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T + + +#define DN_SIMDHASH_T dn_simdhash_ght +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_NO_DEFAULT_NEW 1 + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T +#undef DN_SIMDHASH_NO_DEFAULT_NEW + +#include "dn-simdhash-ght-compatible.h" + +#endif diff --git a/src/native/containers/dn-simdhash-string-ptr.c b/src/native/containers/dn-simdhash-string-ptr.c new file mode 100644 index 0000000000000..87c00f40f0182 --- /dev/null +++ b/src/native/containers/dn-simdhash-string-ptr.c @@ -0,0 +1,93 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef NO_CONFIG_H +#include +#endif +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" + +typedef struct dn_simdhash_str_key { + const char *text; + // We keep a precomputed hash to speed up rehashing and scans. + uint32_t hash; +#if SIZEOF_VOID_P == 8 + // HACK: Perfect cache alignment isn't possible for a 12-byte struct, so pad it to 16 bytes + uint32_t padding; +#endif +} dn_simdhash_str_key; + +static inline int32_t +dn_simdhash_str_equal (dn_simdhash_str_key v1, dn_simdhash_str_key v2) +{ + if (v1.text == v2.text) + return 1; + return strcmp(v1.text, v2.text) == 0; +} + +static inline uint32_t +dn_simdhash_str_hash (dn_simdhash_str_key v1) +{ + return v1.hash; +} + +#define DN_SIMDHASH_T dn_simdhash_string_ptr +#define DN_SIMDHASH_KEY_T dn_simdhash_str_key +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_simdhash_str_hash(key) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_simdhash_str_equal(lhs, rhs) +#define DN_SIMDHASH_ACCESSOR_SUFFIX _raw + +// perfect cache alignment. 32-bit ptrs: 8-byte keys. 64-bit: 16-byte keys. +#if SIZEOF_VOID_P == 8 +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 12 +#endif + +#include "dn-simdhash-specialization.h" +#include "dn-simdhash-string-ptr.h" + +static dn_simdhash_str_key +dn_simdhash_make_str_key (const char *text) +{ + dn_simdhash_str_key result = { 0, }; + if (text) { + // FIXME: Select a good seed. + result.hash = MurmurHash3_32_streaming((uint8_t *)text, 0); + result.text = text; + } + return result; +} + +uint8_t +dn_simdhash_string_ptr_try_add (dn_simdhash_string_ptr_t *hash, const char *key, void *value) +{ + return dn_simdhash_string_ptr_try_add_raw(hash, dn_simdhash_make_str_key(key), value); +} + +uint8_t +dn_simdhash_string_ptr_try_get_value (dn_simdhash_string_ptr_t *hash, const char *key, void **result) +{ + return dn_simdhash_string_ptr_try_get_value_raw(hash, dn_simdhash_make_str_key(key), result); +} + +uint8_t +dn_simdhash_string_ptr_try_remove (dn_simdhash_string_ptr_t *hash, const char *key) +{ + return dn_simdhash_string_ptr_try_remove_raw(hash, dn_simdhash_make_str_key(key)); +} + +// FIXME: Find a way to make this easier to define +void +dn_simdhash_string_ptr_foreach (dn_simdhash_string_ptr_t *hash, dn_simdhash_string_ptr_foreach_func func, void *user_data) +{ + assert(hash); + assert(func); + + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + func(key_address->text, *value_address, user_data); + END_SCAN_PAIRS(buffers, key_address, value_address) +} diff --git a/src/native/containers/dn-simdhash-string-ptr.h b/src/native/containers/dn-simdhash-string-ptr.h new file mode 100644 index 0000000000000..fc3e80fef5689 --- /dev/null +++ b/src/native/containers/dn-simdhash-string-ptr.h @@ -0,0 +1,13 @@ +uint8_t +dn_simdhash_string_ptr_try_add (dn_simdhash_string_ptr_t *hash, const char *key, void *value); + +uint8_t +dn_simdhash_string_ptr_try_get_value (dn_simdhash_string_ptr_t *hash, const char *key, void **result); + +uint8_t +dn_simdhash_string_ptr_try_remove (dn_simdhash_string_ptr_t *hash, const char *key); + +typedef void (*dn_simdhash_string_ptr_foreach_func) (const char *key, void *value, void *user_data); + +void +dn_simdhash_string_ptr_foreach (dn_simdhash_string_ptr_t *hash, dn_simdhash_string_ptr_foreach_func func, void *user_data); diff --git a/src/native/containers/dn-simdhash-test.c b/src/native/containers/dn-simdhash-test.c new file mode 100644 index 0000000000000..da0e60acb2887 --- /dev/null +++ b/src/native/containers/dn-simdhash-test.c @@ -0,0 +1,262 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#include +#define MTICKS_PER_SEC (10 * 1000 * 1000) +#else +#include +#endif + +#include "dn-vector.h" +#include "dn-simdhash.h" +#include "dn-simdhash-utils.h" + +typedef struct { + int i; + float f; +} instance_data_t; + +void +dn_simdhash_assert_fail (const char *file, int line, const char *condition) { + printf("simdhash assertion failed at %s:%i:\n%s\n", file, line, condition); + fflush(stdout); +} + +static DN_FORCEINLINE(uint8_t) +key_comparer (instance_data_t data, size_t lhs, size_t rhs) { + return ((data.f == 4.20f) || (lhs == rhs)); +} + +#define DN_SIMDHASH_T dn_simdhash_size_t_size_t +#define DN_SIMDHASH_KEY_T size_t +#define DN_SIMDHASH_VALUE_T size_t +#define DN_SIMDHASH_KEY_HASHER(data, key) (uint32_t)(key & 0xFFFFFFFFu) +#define DN_SIMDHASH_KEY_EQUALS key_comparer +#define DN_SIMDHASH_INSTANCE_DATA_T instance_data_t +#define DN_SIMDHASH_ON_REMOVE(data, key, value) // printf("remove [%zd, %zd], f==%f\n", key, value, data.f) +#define DN_SIMDHASH_ON_REPLACE(data, old_key, new_key, old_value, new_value) // printf("replace [%zd, %zd] with [%zd, %zd] i==%i\n", key, old_value, key, new_value, data.i) + +#include "dn-simdhash-specialization.h" + +uint32_t count_cascaded_buckets (dn_simdhash_size_t_size_t_t *hash) { + uint32_t result = 0; + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_BUCKETS(0, bucket_index, bucket_address) + result += dn_simdhash_bucket_cascaded_count(bucket_address->suffixes); + END_SCAN_BUCKETS(0, bucket_index, bucket_address) + return result; +} + +uint8_t tassert (int b, const char *msg) { + if (b) + return b; + printf("%s\n", msg); + return 0; +} + +uint8_t tassert1 (int b, size_t v, const char *msg) { + if (b) + return b; + printf("%s (%zd)\n", msg, v); + return 0; +} + +uint8_t tasserteq (size_t actual, size_t expected, const char *msg) { + if (actual == expected) + return 1; + printf("%s: expected %zd, got %zd\n", msg, expected, actual); + return 0; +} + +void foreach_callback (size_t key, size_t value, void * user_data) { + // printf("[%zd, %zd]\n", key, value); + (*(uint32_t *)user_data)++; +} + +int64_t get_100ns_ticks () { +#ifdef _MSC_VER + static LARGE_INTEGER freq; + static UINT64 start_time; + UINT64 cur_time; + LARGE_INTEGER value; + + if (!freq.QuadPart) { + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&value); + start_time = value.QuadPart; + } + QueryPerformanceCounter(&value); + cur_time = value.QuadPart; + return (int64_t)((cur_time - start_time) * (double)MTICKS_PER_SEC / freq.QuadPart); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return ((int64_t)tv.tv_sec * 1000000 + tv.tv_usec) * 10; +#endif +} + +int main () { + // NOTE: High values of C will cause this test to never complete if libc + // rand() is not high quality enough, i.e. MSVC 2022 on x64 + const int c = 32000; + dn_simdhash_size_t_size_t_t *test = dn_simdhash_size_t_size_t_new(0, NULL); + dn_simdhash_instance_data(instance_data_t, test).f = 3.14f; + dn_simdhash_instance_data(instance_data_t, test).i = 42; + + printf("hash(test)=%u\n", MurmurHash3_32_ptr(test, 0)); + + dn_vector_t *keys = dn_vector_alloc(sizeof(DN_SIMDHASH_KEY_T)), + *values = dn_vector_alloc(sizeof(DN_SIMDHASH_VALUE_T)); + // Ensure consistency between runs + srand(1); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_VALUE_T value = (i * 2) + 1; + DN_SIMDHASH_KEY_T key; + +retry: { + key = rand(); + uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); + if (!ok) + goto retry; +} + + dn_vector_push_back(keys, key); + dn_vector_push_back(values, value); + } + + int64_t started = get_100ns_ticks(); + for (int iter = 0; iter < 5; iter++) { + if (!tasserteq(dn_simdhash_count(test), c, "count did not match")) + return 1; + + printf("Calling foreach:\n"); + uint32_t foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + if (tassert1(ok, key, "did not find key")) + tasserteq(value, expected_value, "value did not match"); + } + + // NOTE: Adding duplicates could grow the table if we're unlucky, since the add operation + // eagerly grows before doing a table scan if we're at the grow threshold. + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); + tassert1(!ok, key, "added duplicate key successfully"); + } + + printf("After adding dupes: Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + uint32_t final_capacity = dn_simdhash_capacity(test); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + uint8_t ok = dn_simdhash_size_t_size_t_try_remove(test, key); + tassert1(ok, key, "could not remove key"); + + DN_SIMDHASH_VALUE_T value; + ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + tassert1(!ok, key, "found key after removal"); + } + + if (!tasserteq(dn_simdhash_count(test), 0, "was not empty")) + return 1; + if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "capacity changed by emptying")) + return 1; + + printf ("Calling foreach after emptying:\n"); + foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value; + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + tassert1(!ok, key, "found key after removal"); + } + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); + tassert1(ok, key, "could not re-insert key after emptying"); + } + + if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "expected capacity not to change after refilling")) + return 1; + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + if (tassert1(ok, key, "did not find key after refilling")) + tasserteq(value, expected_value, "value did not match after refilling"); + } + + printf("Calling foreach after refilling:\n"); + foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + } + + int64_t ended = get_100ns_ticks(); + + printf("done. elapsed ticks: %lld\n", (ended - started)); + + return 0; + /* + var test = new SimdDictionary(); + var rng = new Random(1234); + int c = 4096, d = 4096 * 5; + var keys = new List(); + for (int i = 0; i < c; i++) + keys.Add(rng.NextInt64()); + for (int i = 0; i < c; i++) + test.Add(keys[i], i * 2 + 1); + + for (int j = 0; j < d; j++) + for (int i = 0; i < c; i++) + if (!test.TryGetValue(keys[i], out _)) + throw new Exception(); + + var keyList = test.Keys.ToArray(); + var valueList = test.Values.ToArray(); + + var copy = new SimdDictionary(test); + for (int i = 0; i < c; i++) + if (!copy.TryGetValue(keys[i], out _)) + throw new Exception(); + + for (int i = 0; i < c; i++) + if (!test.Remove(keys[i])) + throw new Exception(); + + for (int i = 0; i < c; i++) + if (test.TryGetValue(keys[i], out _)) + throw new Exception(); + + if (test.Count != 0) + throw new Exception(); + */ +} diff --git a/src/native/containers/dn-simdhash-u32-ptr.c b/src/native/containers/dn-simdhash-u32-ptr.c new file mode 100644 index 0000000000000..a3e1a77a92c2a --- /dev/null +++ b/src/native/containers/dn-simdhash-u32-ptr.c @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "dn-simdhash.h" +#include "dn-simdhash-utils.h" + +#define DN_SIMDHASH_T dn_simdhash_u32_ptr +#define DN_SIMDHASH_KEY_T uint32_t +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_KEY_HASHER(hash, key) murmur3_fmix32(key) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs) + +#include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash-utils.h b/src/native/containers/dn-simdhash-utils.h new file mode 100644 index 0000000000000..889127d4088a8 --- /dev/null +++ b/src/native/containers/dn-simdhash-utils.h @@ -0,0 +1,189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __DN_SIMDHASH_UTILS_H__ +#define __DN_SIMDHASH_UTILS_H__ + +#include + +#if defined(__clang__) || defined (__GNUC__) +static DN_FORCEINLINE(uint32_t) +next_power_of_two (uint32_t value) { + if (value < 2) + return 1; + return 1u << (32 - __builtin_clz (value - 1)); +} +#else // __clang__ || __GNUC__ +static DN_FORCEINLINE(uint32_t) +next_power_of_two (uint32_t value) { + if (value < 2) + return 1; + value--; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value++; + return value; +} +#endif // __clang__ || __GNUC__ + +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +static const uint32_t murmur3_c1 = 0xcc9e2d51, murmur3_c2 = 0x1b873593; + +inline static uint32_t +murmur3_rotl32 (uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +// Finalization mix - force all bits of a hash block to avalanche +inline static uint32_t +murmur3_fmix32 (uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +inline static uint64_t +murmur3_fmix64(uint64_t k) +{ + k ^= k >> 33; + k *= 0xff51afd7ed558ccdLLU; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53LLU; + k ^= k >> 33; + return k; +} + +// Convenience macro so you can define your own fixed-size MurmurHashes +#define MURMUR3_HASH_BLOCK(block) \ + { \ + uint32_t k1 = block; \ + k1 *= murmur3_c1; \ + k1 = murmur3_rotl32(k1, 15); \ + k1 *= murmur3_c2; \ + h1 ^= k1; \ + h1 = murmur3_rotl32(h1, 13); \ + h1 = h1 * 5 + 0xe6546b64; \ + } + +// Hash a void * (either 4 or 8 bytes) +static inline uint32_t +MurmurHash3_32_ptr (const void *ptr, uint32_t seed) +{ + // mono_aligned_addr_hash shifts all incoming pointers by 3 bits to account + // for a presumed 8-byte alignment of addresses (the dlmalloc default). + const uint32_t alignment_shift = 3; + // Compute this outside of the if to suppress msvc build warning + const uint8_t is_64_bit = sizeof(void*) == sizeof(uint64_t); + union { + uint32_t u32; + uint64_t u64; + const void *ptr; + } u; + u.ptr = ptr; + + // Apply murmurhash3's finalization bit mixer to a pointer to compute a 32-bit hash. + if (is_64_bit) { + // The high bits of a 64-bit pointer are usually low entropy, as are the + // 2-3 lowest bits. We want to capture most of the entropy and mix it into + // a 32-bit hash to reduce the odds of hash collisions for arbitrary 64-bit + // pointers. From my testing, this is a good way to do it. + return murmur3_fmix32((uint32_t)((u.u64 >> alignment_shift) & 0xFFFFFFFFu)); + // return (uint32_t)(murmur3_fmix64(u.u64 >> alignment_shift) & 0xFFFFFFFFu); + } else { + // No need for an alignment shift here, we're mixing the bits and then + // simdhash uses 7 of the top bits and a handful of the low bits. + return murmur3_fmix32(u.u32); + } +} + +// end of murmurhash + +// FNV has bad properties for simdhash even though it's a fairly fast/good hash, +// but the overhead of having to do strlen() first before passing a string key to +// MurmurHash3 is significant and annoying. This is an attempt to reformulate the +// 32-bit version of MurmurHash3 into a 1-pass version for null terminated strings. +// The output of this will probably be different from regular MurmurHash3. I don't +// see that as a problem, since you shouldn't rely on the exact bit patterns of +// a non-cryptographic hash anyway. +typedef struct murmur3_scan_result_t { + union { + uint32_t u32; + uint8_t bytes[4]; + } result; + const uint8_t *next; +} murmur3_scan_result_t; + +static inline murmur3_scan_result_t +murmur3_scan_forward (const uint8_t *ptr) +{ + // TODO: On wasm we could do a single u32 load then scan the bytes, + // as long as we're sure ptr isn't up against the end of memory + murmur3_scan_result_t result = { 0, }; + + // I tried to get a loop to auto-unroll, but GCC only unrolls at O3 and MSVC never does. +#define SCAN_1(i) \ + result.result.bytes[i] = ptr[i]; \ + if (DN_UNLIKELY(!result.result.bytes[i])) \ + return result; + + SCAN_1(0); + SCAN_1(1); + SCAN_1(2); + SCAN_1(3); +#undef SCAN_1 + + // doing ptr[i] 4 times then computing here produces better code than ptr++ especially on wasm + result.next = ptr + 4; + return result; +} + +static inline uint32_t +MurmurHash3_32_streaming (const uint8_t *key, uint32_t seed) +{ + uint32_t h1 = seed, block_count = 0; + + // Scan forward through the buffer collecting up to 4 bytes at a time, then hash + murmur3_scan_result_t block = murmur3_scan_forward(key); + // As long as the scan found at least one nonzero byte, u32 will be != 0 + while (block.result.u32) { + block_count += 1; + + MURMUR3_HASH_BLOCK(block.result.u32); + + // If the scan found a null byte next will be 0, so we stop scanning + if (DN_UNLIKELY(!block.next)) + break; + block = murmur3_scan_forward(block.next); + } + + // finalize. we don't have an exact byte length but we have a block count + // it would be ideal to figure out a cheap way to produce an exact byte count, + // since then we can compute the length and hash in one go and use memcmp later, + // since emscripten/musl strcmp isn't optimized at all + h1 ^= block_count; + h1 = murmur3_fmix32(h1); + return h1; +} + +// end of reformulated murmur3-32 + +void +dn_simdhash_assert_fail (const char *file, int line, const char *condition); + +#define dn_simdhash_assert(expr) \ + if (DN_UNLIKELY(!(expr))) { \ + dn_simdhash_assert_fail(__FILE__, __LINE__, #expr); \ + } + +#endif // __DN_SIMDHASH_UTILS_H__ diff --git a/src/native/containers/dn-simdhash.c b/src/native/containers/dn-simdhash.c new file mode 100644 index 0000000000000..03d4d2bf3951a --- /dev/null +++ b/src/native/containers/dn-simdhash.c @@ -0,0 +1,153 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "dn-simdhash.h" +#include "dn-simdhash-utils.h" + +static uint32_t +compute_adjusted_capacity (uint32_t requested_capacity) +{ + uint64_t _capacity = requested_capacity; + _capacity *= DN_SIMDHASH_SIZING_PERCENTAGE; + _capacity /= 100; + dn_simdhash_assert(_capacity <= UINT32_MAX); + return (uint32_t)_capacity; +} + +dn_simdhash_t * +dn_simdhash_new_internal (dn_simdhash_meta_t *meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator) +{ + const size_t size = sizeof(dn_simdhash_t) + meta->data_size; + dn_simdhash_t *result = (dn_simdhash_t *)dn_allocator_alloc(allocator, size); + memset(result, 0, size); + + dn_simdhash_assert(meta); + dn_simdhash_assert((meta->bucket_capacity > 1) && (meta->bucket_capacity <= DN_SIMDHASH_MAX_BUCKET_CAPACITY)); + dn_simdhash_assert(meta->key_size > 0); + dn_simdhash_assert(meta->bucket_size_bytes >= (DN_SIMDHASH_VECTOR_WIDTH + (meta->bucket_capacity * meta->key_size))); + result->meta = meta; + result->vtable = vtable; + result->buffers.allocator = allocator; + + dn_simdhash_ensure_capacity_internal(result, compute_adjusted_capacity(capacity)); + + return result; +} + +void +dn_simdhash_free (dn_simdhash_t *hash) +{ + dn_simdhash_assert(hash); + if (hash->vtable.destroy_all) + hash->vtable.destroy_all(hash); + dn_simdhash_buffers_t buffers = hash->buffers; + memset(hash, 0, sizeof(dn_simdhash_t)); + dn_simdhash_free_buffers(buffers); + dn_allocator_free(buffers.allocator, (void *)hash); +} + +void +dn_simdhash_free_buffers (dn_simdhash_buffers_t buffers) +{ + if (buffers.buckets) + dn_allocator_free(buffers.allocator, (void *)(((uint8_t *)buffers.buckets) - buffers.buckets_bias)); + if (buffers.values) + dn_allocator_free(buffers.allocator, buffers.values); +} + +dn_simdhash_buffers_t +dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity) +{ + dn_simdhash_assert(hash); + size_t bucket_count = (capacity + hash->meta->bucket_capacity - 1) / hash->meta->bucket_capacity; + // FIXME: Only apply this when capacity == 0? + if (bucket_count < DN_SIMDHASH_MIN_BUCKET_COUNT) + bucket_count = DN_SIMDHASH_MIN_BUCKET_COUNT; + dn_simdhash_assert(bucket_count < UINT32_MAX); + // Bucket count must be a power of two (this enables more efficient hashcode -> bucket mapping) + bucket_count = next_power_of_two((uint32_t)bucket_count); + size_t value_count = bucket_count * hash->meta->bucket_capacity; + dn_simdhash_assert(value_count <= UINT32_MAX); + + dn_simdhash_buffers_t result = { 0, }; + if (bucket_count <= hash->buffers.buckets_length) { + dn_simdhash_assert(value_count <= hash->buffers.values_length); + return result; + } + + /* + printf ( + "growing from %d bucket(s) to %d bucket(s) for requested capacity %d (actual capacity %d)\n", + hash->buffers.buckets_length, bucket_count, + capacity, value_count + ); + */ + // Store old buffers so caller can rehash and then free them + result = hash->buffers; + + size_t grow_at_count = value_count; + grow_at_count *= 100; + grow_at_count /= DN_SIMDHASH_SIZING_PERCENTAGE; + hash->grow_at_count = (uint32_t)grow_at_count; + hash->buffers.buckets_length = (uint32_t)bucket_count; + hash->buffers.values_length = (uint32_t)value_count; + + // pad buckets allocation by the width of one vector so we can align it + size_t buckets_size_bytes = (bucket_count * hash->meta->bucket_size_bytes) + DN_SIMDHASH_VECTOR_WIDTH, + values_size_bytes = value_count * hash->meta->value_size; + + hash->buffers.buckets = dn_allocator_alloc(hash->buffers.allocator, buckets_size_bytes); + memset(hash->buffers.buckets, 0, buckets_size_bytes); + + // Calculate necessary bias for alignment + hash->buffers.buckets_bias = (uint32_t)(DN_SIMDHASH_VECTOR_WIDTH - (((size_t)hash->buffers.buckets) % DN_SIMDHASH_VECTOR_WIDTH)); + // Apply bias + hash->buffers.buckets = (void *)(((uint8_t *)hash->buffers.buckets) + hash->buffers.buckets_bias); + + // No need to go out of our way to align values + hash->buffers.values = dn_allocator_alloc(hash->buffers.allocator, values_size_bytes); + // Skip this for performance; memset is especially slow in wasm + // memset(hash->buffers.values, 0, values_size_bytes); + + return result; +} + +void +dn_simdhash_clear (dn_simdhash_t *hash) +{ + dn_simdhash_assert(hash); + if (hash->vtable.destroy_all) + hash->vtable.destroy_all(hash); + hash->count = 0; + // TODO: Scan through buckets sequentially and only erase ones with data in them + // Maybe skip erasing the key slots too? + memset(hash->buffers.buckets, 0, hash->buffers.buckets_length * hash->meta->bucket_size_bytes); + // Skip this for performance; memset is especially slow in wasm + // memset(hash->buffers.values, 0, hash->buffers.values_length * hash->meta->value_size); +} + +uint32_t +dn_simdhash_capacity (dn_simdhash_t *hash) +{ + dn_simdhash_assert(hash); + return hash->buffers.buckets_length * hash->meta->bucket_capacity; +} + +uint32_t +dn_simdhash_count (dn_simdhash_t *hash) +{ + dn_simdhash_assert(hash); + return hash->count; +} + +void +dn_simdhash_ensure_capacity (dn_simdhash_t *hash, uint32_t capacity) +{ + dn_simdhash_assert(hash); + capacity = compute_adjusted_capacity(capacity); + dn_simdhash_buffers_t old_buffers = dn_simdhash_ensure_capacity_internal(hash, capacity); + if (old_buffers.buckets) { + hash->vtable.rehash(hash, old_buffers); + dn_simdhash_free_buffers(old_buffers); + } +} diff --git a/src/native/containers/dn-simdhash.h b/src/native/containers/dn-simdhash.h new file mode 100644 index 0000000000000..2a26083ec7df1 --- /dev/null +++ b/src/native/containers/dn-simdhash.h @@ -0,0 +1,155 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __DN_SIMDHASH_H__ +#define __DN_SIMDHASH_H__ + +#include +#include "dn-utils.h" +#include "dn-allocator.h" + +// We reserve the last two bytes of each suffix vector to store data +#define DN_SIMDHASH_MAX_BUCKET_CAPACITY 14 +// The ideal capacity depends on the size of your keys. For 4-byte keys, it is 12. +#define DN_SIMDHASH_DEFAULT_BUCKET_CAPACITY 12 +// We use the last two bytes specifically to store item count and cascade flag +#define DN_SIMDHASH_COUNT_SLOT (DN_SIMDHASH_MAX_BUCKET_CAPACITY) +// The cascade flag indicates that an item overflowed from this bucket into the next one +#define DN_SIMDHASH_CASCADED_SLOT (DN_SIMDHASH_MAX_BUCKET_CAPACITY + 1) +// We always use 16-byte-wide vectors (I've tested this, 32-byte vectors are slower) +#define DN_SIMDHASH_VECTOR_WIDTH 16 +// We need to make sure suffixes are never zero. A bad hash is more likely to collide +// at the top bit than at the bottom. +#define DN_SIMDHASH_SUFFIX_SALT 0b10000000 +// Set a minimum number of buckets when created, regardless of requested capacity +#define DN_SIMDHASH_MIN_BUCKET_COUNT 1 +// User-specified capacity values will be increased to this percentage in order +// to maintain an ideal load factor. FIXME: 120 isn't right +#define DN_SIMDHASH_SIZING_PERCENTAGE 120 + +typedef struct dn_simdhash_void_data_t { + // HACK: Empty struct or 0-element array produce a MSVC warning and break the build. + uint8_t data[1]; +} dn_simdhash_void_data_t; + +typedef struct dn_simdhash_buffers_t { + // sizes of current allocations in items (not bytes) + // so values_length should == (buckets_length * bucket_capacity) + uint32_t buckets_length, values_length, + // The number of bytes we pushed the buckets ptr forward after allocating it. + // We'll need to subtract this from the ptr before freeing. + buckets_bias; + void *buckets; + void *values; + dn_allocator_t *allocator; +} dn_simdhash_buffers_t; + +typedef struct dn_simdhash_t dn_simdhash_t; + +typedef struct dn_simdhash_meta_t { + // type metadata for generic implementation + uint32_t bucket_capacity, bucket_size_bytes, key_size, value_size, + // Allocate this many bytes of extra data inside the dn_simdhash_t + data_size; +} dn_simdhash_meta_t; + +typedef enum dn_simdhash_insert_result { + DN_SIMDHASH_INSERT_OK_ADDED_NEW, + DN_SIMDHASH_INSERT_OK_OVERWROTE_EXISTING, + DN_SIMDHASH_INSERT_NEED_TO_GROW, + DN_SIMDHASH_INSERT_KEY_ALREADY_PRESENT, +} dn_simdhash_insert_result; + +typedef struct dn_simdhash_vtable_t { + // Does not free old_buffers, that's your job. Required. + void (*rehash) (dn_simdhash_t *hash, dn_simdhash_buffers_t old_buffers); + // Invokes remove handler for all items, if necessary. Optional. + void (*destroy_all) (dn_simdhash_t *hash); +} dn_simdhash_vtable_t; + +typedef struct dn_simdhash_t { + // internal state + uint32_t count, grow_at_count; + dn_simdhash_buffers_t buffers; + dn_simdhash_vtable_t vtable; + dn_simdhash_meta_t *meta; + // We allocate extra space here based on meta.data_size + // This has one element because 0 elements generates a MSVC warning and breaks the build + uint8_t data[1]; +} dn_simdhash_t; + +#define dn_simdhash_instance_data(type, hash) \ + (*(type *)(&hash->data)) + +// These helpers use .values instead of .vec to avoid generating unnecessary +// vector loads/stores. Operations that touch these values may not need vectorization, +// so it's ideal to just do single-byte memory accesses instead. +// These unfortunately have to be macros because the suffixes type isn't defined yet +#define dn_simdhash_bucket_count(suffixes) \ + (suffixes).values[DN_SIMDHASH_COUNT_SLOT] + +#define dn_simdhash_bucket_cascaded_count(suffixes) \ + (suffixes).values[DN_SIMDHASH_CASCADED_SLOT] + +#define dn_simdhash_bucket_set_suffix(suffixes, slot, value) \ + (suffixes).values[(slot)] = (value) + +#define dn_simdhash_bucket_set_count(suffixes, value) \ + (suffixes).values[DN_SIMDHASH_COUNT_SLOT] = (value) + +#define dn_simdhash_bucket_set_cascaded_count(suffixes, value) \ + (suffixes).values[DN_SIMDHASH_CASCADED_SLOT] = (value) + +static DN_FORCEINLINE(uint8_t) +dn_simdhash_select_suffix (uint32_t key_hash) +{ + // Extract top 8 bits, then trash the highest one. + // The lowest bits of the hash are used to select the bucket index. + return (key_hash >> 24) | DN_SIMDHASH_SUFFIX_SALT; +} + +static DN_FORCEINLINE(uint32_t) +dn_simdhash_select_bucket_index (dn_simdhash_buffers_t buffers, uint32_t key_hash) +{ + // This relies on bucket count being a power of two. + return key_hash & (buffers.buckets_length - 1); +} + + +// Creates a simdhash with the provided configuration metadata, vtable, size, and allocator. +// Be sure you know what you're doing. +dn_simdhash_t * +dn_simdhash_new_internal (dn_simdhash_meta_t *meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator); + +// Frees a simdhash and its associated buffers. +void +dn_simdhash_free (dn_simdhash_t *hash); + +// Frees a set of simdhash buffers (returned by ensure_capacity_internal). +void +dn_simdhash_free_buffers (dn_simdhash_buffers_t buffers); + +// If a resize happens, this will allocate new buffers and return the old ones. +// It is your responsibility to rehash and then free the old buffers. +dn_simdhash_buffers_t +dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity); + +// Erases the contents of the table, but does not shrink it. +void +dn_simdhash_clear (dn_simdhash_t *hash); + +// Returns the actual number of values the table can currently hold. +// It may grow automatically before reaching that point. +uint32_t +dn_simdhash_capacity (dn_simdhash_t *hash); + +// Returns the number of value currently stored in the table. +uint32_t +dn_simdhash_count (dn_simdhash_t *hash); + +// Automatically resizes the table if it is too small to hold the requested number +// of items. Will not shrink the table if it is already bigger. +void +dn_simdhash_ensure_capacity (dn_simdhash_t *hash, uint32_t capacity); + +#endif // __DN_SIMDHASH_H__ diff --git a/src/native/containers/dn-utils.h b/src/native/containers/dn-utils.h index 131889165b2f2..dab4160852b1f 100644 --- a/src/native/containers/dn-utils.h +++ b/src/native/containers/dn-utils.h @@ -39,7 +39,7 @@ #define DN_CALLBACK_CALLTYPE #endif -#if defined(__GNUC__) && (__GNUC__ > 2) +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2)) #define DN_LIKELY(expr) (__builtin_expect ((expr) != 0, 1)) #define DN_UNLIKELY(expr) (__builtin_expect ((expr) != 0, 0)) #else @@ -51,6 +51,12 @@ #define _DN_STATIC_ASSERT(expr) static_assert(expr, "") +#ifdef _MSC_VER +#define DN_FORCEINLINE(RET_TYPE) __forceinline RET_TYPE +#else +#define DN_FORCEINLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline)) +#endif + static inline bool dn_safe_size_t_multiply (size_t lhs, size_t rhs, size_t *result) { @@ -58,7 +64,7 @@ dn_safe_size_t_multiply (size_t lhs, size_t rhs, size_t *result) *result = 0; return true; } - + if (((size_t)(~(size_t)0) / lhs) < rhs) return false; diff --git a/src/native/containers/simdhash-benchmark/Makefile b/src/native/containers/simdhash-benchmark/Makefile new file mode 100644 index 0000000000000..7aa316ba1555b --- /dev/null +++ b/src/native/containers/simdhash-benchmark/Makefile @@ -0,0 +1,26 @@ +.DEFAULT_GOAL := default_target + +dn_deps := $(wildcard ../*.c) $(wildcard ../*.h) +benchmark_deps := $(wildcard ./*.c) $(wildcard ./*.h) + +benchmark_sources := ../dn-simdhash.c ../dn-vector.c ./benchmark.c ../dn-simdhash-u32-ptr.c ../dn-simdhash-string-ptr.c ./ghashtable.c ./all-measurements.c +common_options := -g -O3 -DNO_CONFIG_H -lm -DNDEBUG + +benchmark-native: $(dn_deps) $(benchmark_deps) + clang $(benchmark_sources) $(common_options) -DSIZEOF_VOID_P=8 + +benchmark-wasm: $(dn_deps) $(benchmark_deps) + ~/Projects/emscripten/emcc $(benchmark_sources) $(common_options) -DSIZEOF_VOID_P=4 -mbulk-memory -msimd128 + +disassemble-benchmark: benchmark-native benchmark-wasm + objdump -d ./a.out > ./a.dis + ~/wabt/bin/wasm2wat ./a.out.wasm > ./a.wat + +run-native: benchmark-native + ./a.out + +run-wasm: benchmark-wasm + node ./a.out.js + +default_target: disassemble-benchmark + diff --git a/src/native/containers/simdhash-benchmark/all-measurements.c b/src/native/containers/simdhash-benchmark/all-measurements.c new file mode 100644 index 0000000000000..2d04bbd427012 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/all-measurements.c @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include + +#include "../dn-vector.h" +#include "../dn-simdhash.h" +#include "../dn-simdhash-utils.h" +#include "../dn-simdhash-specializations.h" + +#include "measurement.h" + +#undef MEASUREMENT +#define MEASUREMENT(name, data_type, setup, teardown, body) \ + static void DN_SIMDHASH_GLUE(measurement_, name) (void *_data) { \ + data_type data = (data_type)_data; \ + body; \ + } + +#include "all-measurements.h" + +#undef MEASUREMENT +#define MEASUREMENT(name, data_type, setup, teardown, body) \ + measurement_info DN_SIMDHASH_GLUE(name, _measurement_info) = { \ + #name, \ + setup, \ + DN_SIMDHASH_GLUE(measurement_, name), \ + teardown \ + }; + +#include "all-measurements.h" diff --git a/src/native/containers/simdhash-benchmark/all-measurements.h b/src/native/containers/simdhash-benchmark/all-measurements.h new file mode 100644 index 0000000000000..2e49927337a34 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/all-measurements.h @@ -0,0 +1,189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "ghashtable.h" + +#ifndef MEASUREMENTS_IMPLEMENTATION +#define MEASUREMENTS_IMPLEMENTATION 1 + +// If this is too large and libc's rand() is low quality (i.e. MSVC), +// initializing the data will take forever +#define INNER_COUNT 1024 * 16 +#define BASELINE_SIZE 20480 + +static dn_simdhash_u32_ptr_t *random_u32s_hash; +static dn_vector_t *sequential_u32s, *random_u32s, *random_unused_u32s; + +static void init_data () { + random_u32s_hash = dn_simdhash_u32_ptr_new(INNER_COUNT, NULL); + sequential_u32s = dn_vector_alloc(sizeof(uint32_t)); + random_u32s = dn_vector_alloc(sizeof(uint32_t)); + random_unused_u32s = dn_vector_alloc(sizeof(uint32_t)); + // For consistent data between runs + srand(1); + + for (uint32_t i = 0; i < INNER_COUNT; i++) { + dn_vector_push_back(sequential_u32s, i); + +retry: { + uint32_t key = (uint32_t)(rand() & 0xFFFFFFFFu); + if (!dn_simdhash_u32_ptr_try_add(random_u32s_hash, key, NULL)) + goto retry; + + dn_vector_push_back(random_u32s, key); +} + } + + for (uint32_t i = 0; i < INNER_COUNT; i++) { +retry2: { + uint32_t key = (uint32_t)(rand() & 0xFFFFFFFFu); + if (!dn_simdhash_u32_ptr_try_add(random_u32s_hash, key, NULL)) + goto retry2; + + dn_vector_push_back(random_unused_u32s, key); +} + } +} + + +static void * create_instance_u32_ptr () { + if (!random_u32s) + init_data(); + + return dn_simdhash_u32_ptr_new(INNER_COUNT, NULL); +} + +static void * create_instance_u32_ptr_random_values () { + if (!random_u32s) + init_data(); + + dn_simdhash_u32_ptr_t *result = dn_simdhash_u32_ptr_new(INNER_COUNT, NULL); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_u32_ptr_try_add(result, key, (void *)(size_t)i); + } + return result; +} + +static void destroy_instance (void *_data) { + dn_simdhash_u32_ptr_t *data = _data; + if (!data) + return; + + dn_simdhash_free(data); +} + + +static void * baseline_init () { + return malloc(BASELINE_SIZE); +} + + +static void * create_instance_ght () { + if (!random_u32s) + init_data(); + + return g_hash_table_new(NULL, NULL); +} + +static void * create_instance_ght_random_values () { + if (!random_u32s) + init_data(); + + GHashTable *result = g_hash_table_new(NULL, NULL); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + g_hash_table_insert(result, (gpointer)(size_t)key, (gpointer)(size_t)i); + } + return result; +} + +static void destroy_instance_ght (void *data) { + g_hash_table_destroy((GHashTable *)data); +} + +#endif // MEASUREMENTS_IMPLEMENTATION + +// These go outside the guard because we include this file multiple times. + +MEASUREMENT(baseline, uint8_t *, baseline_init, free, { + for (int i = 0; i < 256; i++) { + memset(data, i, BASELINE_SIZE); + // Without this the memset gets optimized out + dn_simdhash_assert(data[i] == i); + } +}); + +MEASUREMENT(dn_clear_then_fill_sequential, dn_simdhash_u32_ptr_t *, create_instance_u32_ptr, destroy_instance, { + dn_simdhash_clear(data); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(sequential_u32s, uint32_t, i); + dn_simdhash_assert(dn_simdhash_u32_ptr_try_add(data, key, (void *)(size_t)i)); + } +}) + +MEASUREMENT(dn_clear_then_fill_random, dn_simdhash_u32_ptr_t *, create_instance_u32_ptr, destroy_instance, { + dn_simdhash_clear(data); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_assert(dn_simdhash_u32_ptr_try_add(data, key, (void *)(size_t)i)); + } +}) + +MEASUREMENT(dn_find_random_keys, dn_simdhash_u32_ptr_t *, create_instance_u32_ptr_random_values, destroy_instance, { + void *temp = NULL; + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_assert(dn_simdhash_u32_ptr_try_get_value(data, key, &temp)); + } +}) + +MEASUREMENT(dn_find_missing_key, dn_simdhash_u32_ptr_t *, create_instance_u32_ptr_random_values, destroy_instance, { + void *temp = NULL; + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_unused_u32s, uint32_t, i); + dn_simdhash_assert(!dn_simdhash_u32_ptr_try_get_value(data, key, &temp)); + } +}) + +MEASUREMENT(dn_fill_then_remove_every_item, dn_simdhash_u32_ptr_t *, create_instance_u32_ptr, destroy_instance, { + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_assert(dn_simdhash_u32_ptr_try_add(data, key, (void *)(size_t)i)); + } + + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_assert(dn_simdhash_u32_ptr_try_remove(data, key)); + } +}) + +MEASUREMENT(ght_clear_then_fill_sequential, GHashTable *, create_instance_ght, destroy_instance_ght, { + g_hash_table_remove_all(data); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(sequential_u32s, uint32_t, i); + g_hash_table_insert(data, (gpointer)(size_t)key, (gpointer)(size_t)i); + } +}) + +MEASUREMENT(ght_clear_then_fill_random, GHashTable *, create_instance_ght, destroy_instance_ght, { + g_hash_table_remove_all(data); + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + g_hash_table_insert(data, (gpointer)(size_t)key, (gpointer)(size_t)i); + } +}) + +MEASUREMENT(ght_find_random_keys, GHashTable *, create_instance_ght_random_values, destroy_instance_ght, { + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_u32s, uint32_t, i); + dn_simdhash_assert(g_hash_table_lookup(data, (gpointer)(size_t)key) == (gpointer)(size_t)i); + } +}) + +MEASUREMENT(ght_find_missing_key, GHashTable *, create_instance_ght_random_values, destroy_instance_ght, { + for (int i = 0; i < INNER_COUNT; i++) { + uint32_t key = *dn_vector_index_t(random_unused_u32s, uint32_t, i); + dn_simdhash_assert(g_hash_table_lookup(data, (gpointer)(size_t)key) == NULL); + } +}) diff --git a/src/native/containers/simdhash-benchmark/benchmark.c b/src/native/containers/simdhash-benchmark/benchmark.c new file mode 100644 index 0000000000000..320e43389cbed --- /dev/null +++ b/src/native/containers/simdhash-benchmark/benchmark.c @@ -0,0 +1,211 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#include +char *strcasestr(const char *haystack, const char *needle); +#endif + +#include "../dn-vector.h" +#include "../dn-simdhash.h" +#include "../dn-simdhash-utils.h" +#include "../dn-simdhash-specializations.h" + +#include "measurement.h" + +dn_simdhash_string_ptr_t *all_measurements; + +#undef MEASUREMENT +#define MEASUREMENT(name, data_type, setup, teardown, body) \ + extern measurement_info DN_SIMDHASH_GLUE(name, _measurement_info); + +// Suppress actual codegen +#define MEASUREMENTS_IMPLEMENTATION 0 + +#include "all-measurements.h" + +void +dn_simdhash_assert_fail (const char *file, int line, const char *condition) { + fprintf(stderr, "simdhash assertion failed at %s:%i:\n%s\n", file, line, condition); + fflush(stderr); + abort(); +} + +#define MTICKS_PER_SEC (10 * 1000 * 1000) + +int64_t get_100ns_ticks () { +#ifdef _MSC_VER + static LARGE_INTEGER freq; + static UINT64 start_time; + UINT64 cur_time; + LARGE_INTEGER value; + + if (!freq.QuadPart) { + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&value); + start_time = value.QuadPart; + } + QueryPerformanceCounter(&value); + cur_time = value.QuadPart; + return (int64_t)((cur_time - start_time) * (double)MTICKS_PER_SEC / freq.QuadPart); +#else + struct timespec ts; + // FIXME: Use clock_monotonic for wall time instead? I think process time is what we want +#ifdef __wasm + dn_simdhash_assert(clock_gettime(CLOCK_MONOTONIC, &ts) == 0); +#else + dn_simdhash_assert(clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0); +#endif + return ((int64_t)ts.tv_sec * MTICKS_PER_SEC + ts.tv_nsec / 100); +#endif +} + +void init_measurements () { + if (!all_measurements) + all_measurements = dn_simdhash_string_ptr_new (0, NULL); + + #undef MEASUREMENT + #define MEASUREMENT(name, data_type, setup, teardown, body) \ + dn_simdhash_string_ptr_try_add(all_measurements, #name, &DN_SIMDHASH_GLUE(name, _measurement_info)); + + #include "all-measurements.h" +} + +int64_t run_measurement (int iteration_count, setup_func setup, measurement_func measurement, measurement_func teardown) { + void *data = NULL; + if (setup) + data = setup(); + + int64_t started = get_100ns_ticks(); + for (int i = 0; i < iteration_count; i++) + measurement(data); + int64_t ended = get_100ns_ticks(); + + if (teardown) + teardown(data); + + return ended - started; +} + +typedef struct { + int argc; + char **argv; + int result; +} main_args; + +void foreach_measurement (const char *name, void *_info, void *_args) { + measurement_info *info = _info; + main_args *args = _args; + + uint8_t match = args->argc <= 1; + for (int i = 1; i < args->argc; i++) { +#ifdef _MSC_VER + if (strstr(name, args->argv[i])) { +#else + if (strcasestr(name, args->argv[i])) { +#endif + match = 1; + break; + } + } + + if (!match) + return; + + printf("%s: ", name); + fflush(stdout); + + run_measurement(100, info->setup, info->func, info->teardown); + + int64_t overhead = run_measurement(1, info->setup, info->func, info->teardown); + + int64_t warmup_duration = 20000000, + target_step_duration = 10000000, + target_duration = warmup_duration * 10, + warmup_iterations = 500, + warmup_until = get_100ns_ticks() + warmup_duration, + warmup_elapsed_total = 0, + warmup_count = 0; + + do { + warmup_elapsed_total += run_measurement(warmup_iterations, info->setup, info->func, info->teardown) - overhead; + warmup_count++; + } while (get_100ns_ticks() < warmup_until); + + int64_t average_warmup_duration = warmup_elapsed_total / warmup_count, + necessary_iterations = (target_step_duration * warmup_iterations / average_warmup_duration), + steps = 0, + run_elapsed_total = 0, + run_elapsed_min = INT64_MAX, + run_elapsed_max = INT64_MIN, + run_until = get_100ns_ticks() + target_duration; + + if (necessary_iterations < 16) + necessary_iterations = 16; + // HACK: Reduce minor variation in iteration count + necessary_iterations = next_power_of_two((uint32_t)necessary_iterations); + + printf( + "Warmed %" PRId64 " time(s). Running %" PRId64 " iterations... ", + warmup_count, necessary_iterations + ); + fflush(stdout); + + do { + int64_t step_duration = run_measurement(necessary_iterations, info->setup, info->func, info->teardown) - overhead; + run_elapsed_total += step_duration; + if (step_duration < run_elapsed_min) + run_elapsed_min = step_duration; + if (step_duration > run_elapsed_max) + run_elapsed_max = step_duration; + steps++; + } while (get_100ns_ticks() < run_until); + + double run_elapsed_average = (double)(run_elapsed_total) / steps / necessary_iterations / 100.0; + + args->result = 0; + printf( + "%" PRId64 " step(s): avg %.3fns min %.3fns max %.3fns\n", + steps, + run_elapsed_average, + (double)run_elapsed_min / necessary_iterations / 100.0, + (double)run_elapsed_max / necessary_iterations / 100.0 + ); + fflush(stdout); +} + +int main (int argc, char* argv[]) { + init_measurements(); + + main_args args = { + argc, argv, 1 + }; + dn_simdhash_string_ptr_foreach(all_measurements, foreach_measurement, &args); + + fflush(stdout); + fflush(stderr); + + switch (args.result) { + case 0: + break; + case 1: + // no benchmarks run + fprintf(stderr, "No benchmarks run. List of all benchmarks follows:\n"); + break; + default: + fprintf(stderr, "Unknown failure!\n"); + break; + } + + return args.result; +} diff --git a/src/native/containers/simdhash-benchmark/ghashtable.c b/src/native/containers/simdhash-benchmark/ghashtable.c new file mode 100644 index 0000000000000..6e3f24eb77e81 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/ghashtable.c @@ -0,0 +1,606 @@ +/* + * ghashtable.c: Hashtable implementation + * Modified for simdhash benchmarking + * + * Author: + * Miguel de Icaza (miguel@novell.com) + * + * (C) 2006 Novell, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ghashtable.h" +#include + +typedef struct _Slot Slot; + +struct _Slot { + gpointer key; + gpointer value; + Slot *next; +}; + +static gpointer KEYMARKER_REMOVED = &KEYMARKER_REMOVED; + +struct _GHashTable { + GHashFunc hash_func; + GEqualFunc key_equal_func; + + Slot **table; + int table_size; + int in_use; + int threshold; + int last_rehash; + GDestroyNotify value_destroy_func, key_destroy_func; +}; + +typedef struct { + GHashTable *ht; + int slot_index; + Slot *slot; +} Iter; + +static const guint prime_tbl[] = { + 11, 19, 37, 73, 109, 163, 251, 367, 557, 823, 1237, + 1861, 2777, 4177, 6247, 9371, 14057, 21089, 31627, + 47431, 71143, 106721, 160073, 240101, 360163, + 540217, 810343, 1215497, 1823231, 2734867, 4102283, + 6153409, 9230113, 13845163 +}; + +static gboolean +test_prime (int x) +{ + if ((x & 1) != 0) { + int n; + for (n = 3; n< (int)sqrt (x); n += 2) { + if ((x % n) == 0) + return FALSE; + } + return TRUE; + } + // There is only one even prime - 2. + return (x == 2); +} + +static int +calc_prime (int x) +{ + int i; + + for (i = (x & (~1))-1; i< G_MAXINT32; i += 2) { + if (test_prime (i)) + return i; + } + return x; +} + +guint +g_spaced_primes_closest (guint x) +{ + int i; + + for (i = 0; i < G_N_ELEMENTS (prime_tbl); i++) { + if (x <= prime_tbl [i]) + return prime_tbl [i]; + } + return calc_prime (x); +} + +GHashTable * +g_hash_table_new (GHashFunc hash_func, GEqualFunc key_equal_func) +{ + GHashTable *hash; + + if (hash_func == NULL) + hash_func = g_direct_hash; + if (key_equal_func == NULL) + key_equal_func = g_direct_equal; + hash = g_new0 (GHashTable, 1); + + hash->hash_func = hash_func; + hash->key_equal_func = key_equal_func; + + hash->table_size = g_spaced_primes_closest (1); + hash->table = g_new0 (Slot *, hash->table_size); + hash->last_rehash = hash->table_size; + + return hash; +} + +GHashTable * +g_hash_table_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func) +{ + GHashTable *hash = g_hash_table_new (hash_func, key_equal_func); + if (hash == NULL) + return NULL; + + hash->key_destroy_func = key_destroy_func; + hash->value_destroy_func = value_destroy_func; + + return hash; +} + +#define sanity_check(HASH) do {}while(0) + +static void +do_rehash (GHashTable *hash) +{ + int current_size, i; + Slot **table; + + /* printf ("Resizing diff=%d slots=%d\n", hash->in_use - hash->last_rehash, hash->table_size); */ + hash->last_rehash = hash->table_size; + current_size = hash->table_size; + hash->table_size = g_spaced_primes_closest (hash->in_use); + /* printf ("New size: %d\n", hash->table_size); */ + table = hash->table; + hash->table = g_new0 (Slot *, hash->table_size); + + for (i = 0; i < current_size; i++){ + Slot *s, *next; + + for (s = table [i]; s != NULL; s = next){ + guint hashcode = ((*hash->hash_func) (s->key)) % hash->table_size; + next = s->next; + + s->next = hash->table [hashcode]; + hash->table [hashcode] = s; + } + } + g_free (table); +} + +static void +rehash (GHashTable *hash) +{ + int diff = ABS (hash->last_rehash - hash->in_use); + + /* These are the factors to play with to change the rehashing strategy */ + /* I played with them with a large range, and could not really get */ + /* something that was too good, maybe the tests are not that great */ + if (!(diff * 0.75 > hash->table_size * 2)) + return; + do_rehash (hash); + sanity_check (hash); +} + +void +g_hash_table_insert_replace (GHashTable *hash, gpointer key, gpointer value, gboolean replace) +{ + guint hashcode; + Slot *s; + GEqualFunc equal; + + g_return_if_fail (hash != NULL); + sanity_check (hash); + + equal = hash->key_equal_func; + if (hash->in_use >= hash->threshold) + rehash (hash); + + hashcode = ((*hash->hash_func) (key)) % hash->table_size; + for (s = hash->table [hashcode]; s != NULL; s = s->next){ + if ((*equal) (s->key, key)){ + if (replace){ + if (hash->key_destroy_func != NULL) + (*hash->key_destroy_func)(s->key); + s->key = key; + } + if (hash->value_destroy_func != NULL) + (*hash->value_destroy_func) (s->value); + s->value = value; + sanity_check (hash); + return; + } + } + s = g_new (Slot, 1); + s->key = key; + s->value = value; + s->next = hash->table [hashcode]; + hash->table [hashcode] = s; + hash->in_use++; + sanity_check (hash); +} + +guint +g_hash_table_size (GHashTable *hash) +{ + g_return_val_if_fail (hash != NULL, 0); + + return hash->in_use; +} + +gboolean +g_hash_table_contains (GHashTable *hash, gconstpointer key) +{ + g_return_val_if_fail (key != NULL, FALSE); + + return g_hash_table_lookup_extended (hash, key, NULL, NULL); +} + +gpointer +g_hash_table_lookup (GHashTable *hash, gconstpointer key) +{ + gpointer orig_key, value; + + if (g_hash_table_lookup_extended (hash, key, &orig_key, &value)) + return value; + else + return NULL; +} + +gboolean +g_hash_table_lookup_extended (GHashTable *hash, gconstpointer key, gpointer *orig_key, gpointer *value) +{ + GEqualFunc equal; + Slot *s; + guint hashcode; + + g_return_val_if_fail (hash != NULL, FALSE); + sanity_check (hash); + equal = hash->key_equal_func; + + hashcode = ((*hash->hash_func) (key)) % hash->table_size; + + for (s = hash->table [hashcode]; s != NULL; s = s->next){ + if ((*equal)(s->key, key)){ + if (orig_key) + *orig_key = s->key; + if (value) + *value = s->value; + return TRUE; + } + } + return FALSE; +} + +void +g_hash_table_foreach (GHashTable *hash, GHFunc func, gpointer user_data) +{ + int i; + + g_return_if_fail (hash != NULL); + g_return_if_fail (func != NULL); + + for (i = 0; i < hash->table_size; i++){ + Slot *s; + + for (s = hash->table [i]; s != NULL; s = s->next) + (*func)(s->key, s->value, user_data); + } +} + +gpointer +g_hash_table_find (GHashTable *hash, GHRFunc predicate, gpointer user_data) +{ + int i; + + g_return_val_if_fail (hash != NULL, NULL); + g_return_val_if_fail (predicate != NULL, NULL); + + for (i = 0; i < hash->table_size; i++){ + Slot *s; + + for (s = hash->table [i]; s != NULL; s = s->next) + if ((*predicate)(s->key, s->value, user_data)) + return s->value; + } + return NULL; +} + +void +g_hash_table_remove_all (GHashTable *hash) +{ + int i; + + g_return_if_fail (hash != NULL); + + for (i = 0; i < hash->table_size; i++){ + Slot *s; + + while (hash->table [i]) { + s = hash->table [i]; + g_hash_table_remove (hash, s->key); + } + } +} + +gboolean +g_hash_table_remove (GHashTable *hash, gconstpointer key) +{ + GEqualFunc equal; + Slot *s, *last; + guint hashcode; + + g_return_val_if_fail (hash != NULL, FALSE); + sanity_check (hash); + equal = hash->key_equal_func; + + hashcode = ((*hash->hash_func)(key)) % hash->table_size; + last = NULL; + for (s = hash->table [hashcode]; s != NULL; s = s->next){ + if ((*equal)(s->key, key)){ + if (hash->key_destroy_func != NULL) + (*hash->key_destroy_func)(s->key); + if (hash->value_destroy_func != NULL) + (*hash->value_destroy_func)(s->value); + if (last == NULL) + hash->table [hashcode] = s->next; + else + last->next = s->next; + g_free (s); + hash->in_use--; + sanity_check (hash); + return TRUE; + } + last = s; + } + sanity_check (hash); + return FALSE; +} + +guint +g_hash_table_foreach_remove (GHashTable *hash, GHRFunc func, gpointer user_data) +{ + int i; + int count = 0; + + g_return_val_if_fail (hash != NULL, 0); + g_return_val_if_fail (func != NULL, 0); + + sanity_check (hash); + for (i = 0; i < hash->table_size; i++){ + Slot *s, *last; + + last = NULL; + for (s = hash->table [i]; s != NULL; ){ + if ((*func)(s->key, s->value, user_data)){ + Slot *n; + + if (hash->key_destroy_func != NULL) + (*hash->key_destroy_func)(s->key); + if (hash->value_destroy_func != NULL) + (*hash->value_destroy_func)(s->value); + if (last == NULL){ + hash->table [i] = s->next; + n = s->next; + } else { + last->next = s->next; + n = last->next; + } + g_free (s); + hash->in_use--; + count++; + s = n; + } else { + last = s; + s = s->next; + } + } + } + sanity_check (hash); + if (count > 0) + rehash (hash); + return count; +} + +gboolean +g_hash_table_steal (GHashTable *hash, gconstpointer key) +{ + GEqualFunc equal; + Slot *s, *last; + guint hashcode; + + g_return_val_if_fail (hash != NULL, FALSE); + sanity_check (hash); + equal = hash->key_equal_func; + + hashcode = ((*hash->hash_func)(key)) % hash->table_size; + last = NULL; + for (s = hash->table [hashcode]; s != NULL; s = s->next){ + if ((*equal)(s->key, key)) { + if (last == NULL) + hash->table [hashcode] = s->next; + else + last->next = s->next; + g_free (s); + hash->in_use--; + sanity_check (hash); + return TRUE; + } + last = s; + } + sanity_check (hash); + return FALSE; + +} + +guint +g_hash_table_foreach_steal (GHashTable *hash, GHRFunc func, gpointer user_data) +{ + int i; + int count = 0; + + g_return_val_if_fail (hash != NULL, 0); + g_return_val_if_fail (func != NULL, 0); + + sanity_check (hash); + for (i = 0; i < hash->table_size; i++){ + Slot *s, *last; + + last = NULL; + for (s = hash->table [i]; s != NULL; ){ + if ((*func)(s->key, s->value, user_data)){ + Slot *n; + + if (last == NULL){ + hash->table [i] = s->next; + n = s->next; + } else { + last->next = s->next; + n = last->next; + } + g_free (s); + hash->in_use--; + count++; + s = n; + } else { + last = s; + s = s->next; + } + } + } + sanity_check (hash); + if (count > 0) + rehash (hash); + return count; +} + +void +g_hash_table_destroy (GHashTable *hash) +{ + int i; + + if (!hash) + return; + + for (i = 0; i < hash->table_size; i++){ + Slot *s, *next; + + for (s = hash->table [i]; s != NULL; s = next){ + next = s->next; + + if (hash->key_destroy_func != NULL) + (*hash->key_destroy_func)(s->key); + if (hash->value_destroy_func != NULL) + (*hash->value_destroy_func)(s->value); + g_free (s); + } + } + g_free (hash->table); + + g_free (hash); +} + +void +g_hash_table_print_stats (GHashTable *table) +{ + int i, max_chain_index, chain_size, max_chain_size; + Slot *node; + + max_chain_size = 0; + max_chain_index = -1; + for (i = 0; i < table->table_size; i++) { + chain_size = 0; + for (node = table->table [i]; node; node = node->next) + chain_size ++; + if (chain_size > max_chain_size) { + max_chain_size = chain_size; + max_chain_index = i; + } + } + + printf ("Size: %d Table Size: %d Max Chain Length: %d at %d\n", table->in_use, table->table_size, max_chain_size, max_chain_index); +} + +void +g_hash_table_iter_init (GHashTableIter *it, GHashTable *hash_table) +{ + Iter *iter = (Iter*)it; + + memset (iter, 0, sizeof (Iter)); + iter->ht = hash_table; + iter->slot_index = -1; +} + +gboolean g_hash_table_iter_next (GHashTableIter *it, gpointer *key, gpointer *value) +{ + Iter *iter = (Iter*)it; + + GHashTable *hash = iter->ht; + + g_assert (iter->slot_index != -2); + g_assert (sizeof (Iter) <= sizeof (GHashTableIter)); + + if (!iter->slot) { + while (TRUE) { + iter->slot_index ++; + if (iter->slot_index >= hash->table_size) { + iter->slot_index = -2; + return FALSE; + } + if (hash->table [iter->slot_index]) + break; + } + iter->slot = hash->table [iter->slot_index]; + } + + if (key) + *key = iter->slot->key; + if (value) + *value = iter->slot->value; + iter->slot = iter->slot->next; + + return TRUE; +} + +gboolean +g_direct_equal (gconstpointer v1, gconstpointer v2) +{ + return v1 == v2; +} + +guint +g_direct_hash (gconstpointer v1) +{ + return GCONSTPOINTER_TO_UINT (v1); +} + +gboolean +g_int_equal (gconstpointer v1, gconstpointer v2) +{ + return *(gint *)v1 == *(gint *)v2; +} + +guint +g_int_hash (gconstpointer v1) +{ + return *(guint *)v1; +} + +gboolean +g_str_equal (gconstpointer v1, gconstpointer v2) +{ + return v1 == v2 || strcmp ((const char*)v1, (const char*)v2) == 0; +} + +guint +g_str_hash (gconstpointer v1) +{ + guint hash = 0; + unsigned char *p = (unsigned char *) v1; + + while (*p++) + hash = (hash << 5) - (hash + *p); + + return hash; +} diff --git a/src/native/containers/simdhash-benchmark/ghashtable.h b/src/native/containers/simdhash-benchmark/ghashtable.h new file mode 100644 index 0000000000000..076ddfde5e1c8 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/ghashtable.h @@ -0,0 +1,157 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef GHASHTABLE_H +#define GHASHTABLE_H + +#include +#include +#include +#include + +/* + * Basic data types + */ +typedef int gint; +typedef unsigned int guint; +typedef short gshort; +typedef unsigned short gushort; +typedef long glong; +typedef unsigned long gulong; +typedef void * gpointer; +typedef const void * gconstpointer; +typedef char gchar; +typedef unsigned char guchar; + +/* Types defined in terms of the stdint.h */ +typedef int8_t gint8; +typedef uint8_t guint8; +typedef int16_t gint16; +typedef uint16_t guint16; +typedef int32_t gint32; +typedef uint32_t guint32; +typedef int64_t gint64; +typedef uint64_t guint64; +typedef float gfloat; +typedef double gdouble; +typedef int32_t gboolean; +// typedef ptrdiff_t gptrdiff; +typedef intptr_t gintptr; +typedef uintptr_t guintptr; + +#define G_N_ELEMENTS(arr) (sizeof(arr)/sizeof(arr[0])) + +#define FALSE 0 +#define TRUE 1 + +#define G_MINSHORT SHRT_MIN +#define G_MAXSHORT SHRT_MAX +#define G_MAXUSHORT USHRT_MAX +#define G_MAXINT INT_MAX +#define G_MININT INT_MIN +#define G_MAXINT8 INT8_MAX +#define G_MAXUINT8 UINT8_MAX +#define G_MININT8 INT8_MIN +#define G_MAXINT16 INT16_MAX +#define G_MAXUINT16 UINT16_MAX +#define G_MININT16 INT16_MIN +#define G_MAXINT32 INT32_MAX +#define G_MAXUINT32 UINT32_MAX +#define G_MININT32 INT32_MIN +#define G_MININT64 INT64_MIN +#define G_MAXINT64 INT64_MAX +#define G_MAXUINT64 UINT64_MAX + +#define G_LITTLE_ENDIAN 1234 +#define G_BIG_ENDIAN 4321 +#define G_STMT_START do +#define G_STMT_END while (0) + +typedef void (*GFunc) (gpointer data, gpointer user_data); +typedef gint (*GCompareFunc) (gconstpointer a, gconstpointer b); +typedef gint (*GCompareDataFunc) (gconstpointer a, gconstpointer b, gpointer user_data); +typedef void (*GHFunc) (gpointer key, gpointer value, gpointer user_data); +typedef gboolean (*GHRFunc) (gpointer key, gpointer value, gpointer user_data); +typedef void (*GDestroyNotify) (gpointer data); +typedef guint (*GHashFunc) (gconstpointer key); +typedef gboolean (*GEqualFunc) (gconstpointer a, gconstpointer b); +typedef void (*GFreeFunc) (gpointer data); + +/* + * Hashtables + */ +typedef struct _GHashTable GHashTable; +typedef struct _GHashTableIter GHashTableIter; + +/* Private, but needed for stack allocation */ +struct _GHashTableIter +{ + gpointer dummy [8]; +}; + +GHashTable *g_hash_table_new (GHashFunc hash_func, GEqualFunc key_equal_func); +GHashTable *g_hash_table_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func); +void g_hash_table_insert_replace (GHashTable *hash, gpointer key, gpointer value, gboolean replace); +guint g_hash_table_size (GHashTable *hash); +gboolean g_hash_table_contains (GHashTable *hash, gconstpointer key); +gpointer g_hash_table_lookup (GHashTable *hash, gconstpointer key); +gboolean g_hash_table_lookup_extended (GHashTable *hash, gconstpointer key, gpointer *orig_key, gpointer *value); +void g_hash_table_foreach (GHashTable *hash, GHFunc func, gpointer user_data); +gpointer g_hash_table_find (GHashTable *hash, GHRFunc predicate, gpointer user_data); +gboolean g_hash_table_remove (GHashTable *hash, gconstpointer key); +gboolean g_hash_table_steal (GHashTable *hash, gconstpointer key); +void g_hash_table_remove_all (GHashTable *hash); +guint g_hash_table_foreach_remove (GHashTable *hash, GHRFunc func, gpointer user_data); +guint g_hash_table_foreach_steal (GHashTable *hash, GHRFunc func, gpointer user_data); +void g_hash_table_destroy (GHashTable *hash); +void g_hash_table_print_stats (GHashTable *table); + +void g_hash_table_iter_init (GHashTableIter *iter, GHashTable *hash_table); +gboolean g_hash_table_iter_next (GHashTableIter *iter, gpointer *key, gpointer *value); + +guint g_spaced_primes_closest (guint x); + +#define g_hash_table_insert(h,k,v) g_hash_table_insert_replace ((h),(k),(v),FALSE) +#define g_hash_table_replace(h,k,v) g_hash_table_insert_replace ((h),(k),(v),TRUE) +#define g_hash_table_add(h,k) g_hash_table_insert_replace ((h),(k),(k),TRUE) + +gboolean g_direct_equal (gconstpointer v1, gconstpointer v2); +guint g_direct_hash (gconstpointer v1); +gboolean g_int_equal (gconstpointer v1, gconstpointer v2); +guint g_int_hash (gconstpointer v1); +gboolean g_str_equal (gconstpointer v1, gconstpointer v2); +guint g_str_hash (gconstpointer v1); + +#define GCONSTPOINTER_TO_INT(v) (gint)((ssize_t)v) +#define GCONSTPOINTER_TO_UINT(v) (gint)((size_t)v) + +// FIXME +#define g_assert(expr) (void)(expr) + +#define g_malloc malloc +#define g_free free +#define g_realloc realloc + +#define g_new(type,size) ((type *) g_malloc (sizeof (type) * (size))) +#define g_new0(type,size) ((type *) g_malloc0 (sizeof (type)* (size))) +#define g_newa(type,size) ((type *) alloca (sizeof (type) * (size))) +#define g_newa0(type,size) ((type *) memset (alloca (sizeof (type) * (size)), 0, sizeof (type) * (size))) + +#define g_memmove(dest,src,len) memmove (dest, src, len) +#define g_renew(struct_type, mem, n_structs) ((struct_type*)g_realloc (mem, sizeof (struct_type) * n_structs)) +#define g_alloca(size) (g_cast (alloca (size))) + +#define g_return_if_fail(x) if (!(x)) { return; } +#define g_return_val_if_fail(x,e) if (!(x)) { return (e); } + +static inline void * +g_malloc0 (size_t size) { + void * result = malloc(size); + memset(result, 0, size); + return result; +} + +#define ABS(x) abs(x) + +#endif // GHASHTABLE_H diff --git a/src/native/containers/simdhash-benchmark/measurement.h b/src/native/containers/simdhash-benchmark/measurement.h new file mode 100644 index 0000000000000..368253a357df0 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/measurement.h @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +typedef void * (*setup_func) (void); +typedef void (*measurement_func) (void *data); + +typedef struct { + const char *name; + setup_func setup; + measurement_func func, teardown; +} measurement_info; diff --git a/src/native/containers/simdhash-benchmark/run-benchmark.ps1 b/src/native/containers/simdhash-benchmark/run-benchmark.ps1 new file mode 100755 index 0000000000000..536ba40cd9144 --- /dev/null +++ b/src/native/containers/simdhash-benchmark/run-benchmark.ps1 @@ -0,0 +1,2 @@ +cl /GS- /O2 /std:c17 ./*.c ../dn-simdhash-u32-ptr.c ../dn-simdhash.c ../dn-vector.c ../dn-simdhash-string-ptr.c /DNO_CONFIG_H /DSIZEOF_VOID_P=8 /Fe:all-measurements.exe +./all-measurements.exe