diff --git a/src/native/containers/containers.cmake b/src/native/containers/containers.cmake index 46fead8627d712..16c41eab5619f8 100644 --- a/src/native/containers/containers.cmake +++ b/src/native/containers/containers.cmake @@ -12,6 +12,7 @@ list(APPEND SHARED_CONTAINER_SOURCES # dn-simdhash.c # dn-simdhash-string-ptr.c # dn-simdhash-u32-ptr.c + # dn-simdhash-ptr-ptr.c ) list(APPEND SHARED_CONTAINER_HEADERS @@ -34,4 +35,5 @@ list(APPEND SHARED_CONTAINER_HEADERS dn-simdhash-specializations.h dn-simdhash-arch.h dn-simdhash-string-ptr.h + dn-simdhash-utils.h ) diff --git a/src/native/containers/dn-simdhash-ght-compatible.c b/src/native/containers/dn-simdhash-ght-compatible.c new file mode 100644 index 00000000000000..0ef57dff462341 --- /dev/null +++ b/src/native/containers/dn-simdhash-ght-compatible.c @@ -0,0 +1,111 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" + +typedef unsigned int guint; +typedef int32_t gboolean; +typedef void * gpointer; +typedef const void * gconstpointer; + +typedef void (*GDestroyNotify) (gpointer data); +typedef guint (*GHashFunc) (gconstpointer key); +typedef gboolean (*GEqualFunc) (gconstpointer a, gconstpointer b); + +typedef struct dn_simdhash_ght_data { + GHashFunc hash_func; + GEqualFunc key_equal_func; + GDestroyNotify key_destroy_func; + GDestroyNotify value_destroy_func; +} dn_simdhash_ght_data; + +static inline uint32_t +dn_simdhash_ght_hash (dn_simdhash_ght_data data, gconstpointer key) +{ + GHashFunc hash_func = data.hash_func; + if (hash_func) + return (uint32_t)hash_func(key); + else + // FIXME: Seed + return MurmurHash3_32_ptr(key, 0); +} + +static inline gboolean +dn_simdhash_ght_equals (dn_simdhash_ght_data data, gconstpointer lhs, gconstpointer rhs) +{ + GEqualFunc equal_func = data.key_equal_func; + if (equal_func) + return equal_func(lhs, rhs); + else + return lhs == rhs; +} + +static inline void +dn_simdhash_ght_removed (dn_simdhash_ght_data data, gconstpointer key, gpointer value) +{ + GDestroyNotify key_destroy_func = data.key_destroy_func, + value_destroy_func = data.value_destroy_func; + if (key_destroy_func) + key_destroy_func((gpointer)key); + if (value_destroy_func) + value_destroy_func((gpointer)value); +} + +static inline void +dn_simdhash_ght_replaced (dn_simdhash_ght_data data, gconstpointer key, gpointer old_value, gpointer new_value) +{ + if (old_value == new_value) + return; + + GDestroyNotify value_destroy_func = data.value_destroy_func; + if (value_destroy_func) + value_destroy_func((gpointer)old_value); +} + +#define DN_SIMDHASH_T dn_simdhash_ght +#define DN_SIMDHASH_KEY_T gconstpointer +#define DN_SIMDHASH_VALUE_T gpointer +#define DN_SIMDHASH_INSTANCE_DATA_T dn_simdhash_ght_data +#define DN_SIMDHASH_KEY_HASHER dn_simdhash_ght_hash +#define DN_SIMDHASH_KEY_EQUALS dn_simdhash_ght_equals +#define DN_SIMDHASH_ON_REMOVE dn_simdhash_ght_removed +#define DN_SIMDHASH_ON_REPLACE dn_simdhash_ght_replaced +#if SIZEOF_VOID_P == 8 +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 12 +#endif +#define DN_SIMDHASH_NO_DEFAULT_NEW 1 + +#include "dn-simdhash-specialization.h" +#include "dn-simdhash-ght-compatible.h" + +dn_simdhash_ght_t * +dn_simdhash_ght_new ( + GHashFunc hash_func, GEqualFunc key_equal_func, + uint32_t capacity, dn_allocator_t *allocator +) +{ + dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func; + return hash; +} + +dn_simdhash_ght_t * +dn_simdhash_ght_new_full ( + GHashFunc hash_func, GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func, + uint32_t capacity, dn_allocator_t *allocator +) +{ + dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_destroy_func = key_destroy_func; + dn_simdhash_instance_data(dn_simdhash_ght_data, hash).value_destroy_func = value_destroy_func; + return hash; +} diff --git a/src/native/containers/dn-simdhash-ght-compatible.h b/src/native/containers/dn-simdhash-ght-compatible.h new file mode 100644 index 00000000000000..3611a3a3c4a4a5 --- /dev/null +++ b/src/native/containers/dn-simdhash-ght-compatible.h @@ -0,0 +1,12 @@ +dn_simdhash_ght_t * +dn_simdhash_ght_new ( + GHashFunc hash_func, GEqualFunc key_equal_func, + uint32_t capacity, dn_allocator_t *allocator +); + +dn_simdhash_ght_t * +dn_simdhash_ght_new_full ( + GHashFunc hash_func, GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func, + uint32_t capacity, dn_allocator_t *allocator +); diff --git a/src/native/containers/dn-simdhash-ptr-ptr.c b/src/native/containers/dn-simdhash-ptr-ptr.c new file mode 100644 index 00000000000000..6e7b9b6893c862 --- /dev/null +++ b/src/native/containers/dn-simdhash-ptr-ptr.c @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" + +#define DN_SIMDHASH_T dn_simdhash_ptr_ptr +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_KEY_HASHER(hash, key) (MurmurHash3_32_ptr(key, 0)) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs) +#if SIZEOF_VOID_P == 8 +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 12 +#endif + +#include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash-specialization-declarations.h b/src/native/containers/dn-simdhash-specialization-declarations.h index c1806fae58dc96..84f49c00c13441 100644 --- a/src/native/containers/dn-simdhash-specialization-declarations.h +++ b/src/native/containers/dn-simdhash-specialization-declarations.h @@ -32,16 +32,21 @@ #define DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_get_value_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) #define DN_SIMDHASH_TRY_REMOVE DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_remove,DN_SIMDHASH_ACCESSOR_SUFFIX) #define DN_SIMDHASH_TRY_REMOVE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_remove_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REPLACE DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_replace,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_TRY_REPLACE_WITH_HASH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_try_replace_with_hash,DN_SIMDHASH_ACCESSOR_SUFFIX) #define DN_SIMDHASH_FOREACH DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_foreach,DN_SIMDHASH_ACCESSOR_SUFFIX) #define DN_SIMDHASH_FOREACH_FUNC DN_SIMDHASH_GLUE_3(DN_SIMDHASH_T,_foreach_func,DN_SIMDHASH_ACCESSOR_SUFFIX) +#define DN_SIMDHASH_DESTROY_ALL DN_SIMDHASH_GLUE(DN_SIMDHASH_T,_destroy_all) typedef void (*DN_SIMDHASH_FOREACH_FUNC) (DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value, void *user_data); // Declare a specific alias so intellisense gives more helpful info typedef dn_simdhash_t DN_SIMDHASH_T_NAME; +#ifndef DN_SIMDHASH_NO_DEFAULT_NEW DN_SIMDHASH_T_PTR DN_SIMDHASH_NEW (uint32_t capacity, dn_allocator_t *allocator); +#endif uint8_t DN_SIMDHASH_TRY_ADD (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value); @@ -61,5 +66,11 @@ DN_SIMDHASH_TRY_REMOVE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key); uint8_t DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash); +uint8_t +DN_SIMDHASH_TRY_REPLACE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T new_value); + +uint8_t +DN_SIMDHASH_TRY_REPLACE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T new_value); + void DN_SIMDHASH_FOREACH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_FOREACH_FUNC func, void *user_data); diff --git a/src/native/containers/dn-simdhash-specialization.h b/src/native/containers/dn-simdhash-specialization.h index 30d00c66e2d59f..29bc5e172ed6d9 100644 --- a/src/native/containers/dn-simdhash-specialization.h +++ b/src/native/containers/dn-simdhash-specialization.h @@ -22,13 +22,44 @@ #error Expected DN_SIMDHASH_VALUE_T definition i.e. int #endif +// If specified, we pass instance data to the handlers by-value, otherwise we +// pass the pointer to the hash itself by-value. This is enough to allow clang +// to hoist the load of the instance data out of the key scan loop, though it +// won't hoist it all the way out of the bucket scan loop. +#ifndef DN_SIMDHASH_INSTANCE_DATA_T +#define DN_SIMDHASH_GET_DATA(hash) (hash) +#define DN_SIMDHASH_INSTANCE_DATA_T DN_SIMDHASH_T_PTR +#else // DN_SIMDHASH_INSTANCE_DATA_T +#define DN_SIMDHASH_GET_DATA(hash) dn_simdhash_instance_data(DN_SIMDHASH_INSTANCE_DATA_T, hash) +#endif // DN_SIMDHASH_INSTANCE_DATA_T + #ifndef DN_SIMDHASH_KEY_HASHER -#error Expected DN_SIMDHASH_KEY_HASHER definition with signature: uint32_t (KEY_T key) +#error Expected DN_SIMDHASH_KEY_HASHER definition with signature: uint32_t (DN_SIMDHASH_INSTANCE_DATA_T data, KEY_T key) #endif #ifndef DN_SIMDHASH_KEY_EQUALS -#error Expected DN_SIMDHASH_KEY_EQUALS definition with signature: int (KEY_T lhs, KEY_T rhs) that returns 1 for match +#error Expected DN_SIMDHASH_KEY_EQUALS definition with signature: int (DN_SIMDHASH_INSTANCE_DATA_T data, KEY_T lhs, KEY_T rhs) that returns 1 for match +#endif + +#ifndef DN_SIMDHASH_ON_REPLACE +#define DN_SIMDHASH_HAS_REPLACE_HANDLER 0 +#define DN_SIMDHASH_ON_REPLACE(data, key, old_value, new_value) +#else // DN_SIMDHASH_ON_REPLACE +#define DN_SIMDHASH_HAS_REPLACE_HANDLER 1 +#ifndef DN_SIMDHASH_ON_REMOVE +#error Expected DN_SIMDHASH_ON_REMOVE(data, key, value) to be defined. #endif +#endif // DN_SIMDHASH_ON_REPLACE + +#ifndef DN_SIMDHASH_ON_REMOVE +#define DN_SIMDHASH_HAS_REMOVE_HANDLER 0 +#define DN_SIMDHASH_ON_REMOVE(data, key, value) +#else // DN_SIMDHASH_ON_REMOVE +#define DN_SIMDHASH_HAS_REMOVE_HANDLER 1 +#ifndef DN_SIMDHASH_ON_REPLACE +#error Expected DN_SIMDHASH_ON_REPLACE(data, key, old_value, new_value) to be defined. +#endif +#endif // DN_SIMDHASH_ON_REMOVE #ifndef DN_SIMDHASH_BUCKET_CAPACITY // TODO: Find some way to automatically select an ideal bucket capacity based on key size. @@ -38,8 +69,8 @@ #include "dn-simdhash-specialization-declarations.h" -static_assert (DN_SIMDHASH_BUCKET_CAPACITY <= DN_SIMDHASH_MAX_BUCKET_CAPACITY, "Maximum bucket capacity exceeded"); -static_assert (DN_SIMDHASH_BUCKET_CAPACITY > 1, "Bucket capacity too low"); +static_assert(DN_SIMDHASH_BUCKET_CAPACITY <= DN_SIMDHASH_MAX_BUCKET_CAPACITY, "Maximum bucket capacity exceeded"); +static_assert(DN_SIMDHASH_BUCKET_CAPACITY > 1, "Bucket capacity too low"); // We set bucket_size_bytes to sizeof() this struct so that we can let the compiler // generate the most optimal code possible when we're manipulating pointers to it - @@ -55,12 +86,39 @@ typedef struct bucket_t { dn_simdhash_suffixes suffixes; DN_SIMDHASH_KEY_T keys[DN_SIMDHASH_BUCKET_CAPACITY]; } -#if defined(__clang__) || defined (__GNUC__) +#if defined(__clang__) || defined(__GNUC__) __attribute__((__aligned__(DN_SIMDHASH_VECTOR_WIDTH))) bucket_t; #else bucket_t; #endif +static_assert((sizeof (bucket_t) % DN_SIMDHASH_VECTOR_WIDTH) == 0, "Bucket size is not vector aligned"); + + +// While we've inlined these constants into the specialized code we're generating, +// the generic code in dn-simdhash.c needs them, so we put them in this meta header +// that lives inside every hash instance. (TODO: Store it by-reference?) +dn_simdhash_meta_t DN_SIMDHASH_T_META = { + DN_SIMDHASH_BUCKET_CAPACITY, + sizeof(bucket_t), + sizeof(DN_SIMDHASH_KEY_T), + sizeof(DN_SIMDHASH_VALUE_T), + sizeof(DN_SIMDHASH_INSTANCE_DATA_T), +}; + + +static DN_FORCEINLINE(uint8_t) +check_self (DN_SIMDHASH_T_PTR self) +{ + // Verifies both that the self-ptr is non-null and that the meta pointer matches + // what it should be. This detects passing the wrong kind of simdhash_t pointer + // to one of the APIs, since C doesn't have fully type-safe pointers. + uint8_t ok = self && (self->meta == &DN_SIMDHASH_T_META); + assert(ok); + return ok; +} + + static DN_FORCEINLINE(bucket_t *) address_of_bucket (dn_simdhash_buffers_t buffers, uint32_t bucket_index) { @@ -76,7 +134,7 @@ address_of_value (dn_simdhash_buffers_t buffers, uint32_t value_slot_index) // This helper is used to locate the first matching key in a given bucket, so that add // operations don't potentially have to scan the whole table twice when hashes collide static DN_FORCEINLINE(int) -DN_SIMDHASH_SCAN_BUCKET_INTERNAL (bucket_t *bucket, DN_SIMDHASH_KEY_T needle, dn_simdhash_suffixes search_vector) +DN_SIMDHASH_SCAN_BUCKET_INTERNAL (DN_SIMDHASH_T_PTR hash, bucket_t *bucket, DN_SIMDHASH_KEY_T needle, dn_simdhash_suffixes search_vector) { uint32_t count = dn_simdhash_bucket_count(bucket->suffixes), #if DN_SIMDHASH_USE_SCALAR_FALLBACK @@ -90,7 +148,9 @@ DN_SIMDHASH_SCAN_BUCKET_INTERNAL (bucket_t *bucket, DN_SIMDHASH_KEY_T needle, dn DN_SIMDHASH_KEY_T *key = &bucket->keys[index]; for (; index < count; index++, key++) { - if (DN_SIMDHASH_KEY_EQUALS(needle, *key)) + // FIXME: Could be profitable to manually hoist the data load outside of the loop, + // if not out of SCAN_BUCKET_INTERNAL entirely. Clang appears to do LICM on it. + if (DN_SIMDHASH_KEY_EQUALS(DN_SIMDHASH_GET_DATA(hash), needle, *key)) return index; } @@ -114,6 +174,42 @@ DN_SIMDHASH_SCAN_BUCKET_INTERNAL (bucket_t *bucket, DN_SIMDHASH_KEY_T needle, dn } while (bucket_index != initial_index); \ } +#define BEGIN_SCAN_PAIRS(buffers, key_address, value_address) \ + bucket_t *scan_bucket_address = address_of_bucket(buffers, 0); \ + for ( \ + uint32_t scan_i = 0, scan_bc = buffers.buckets_length, scan_value_slot_base = 0; \ + scan_i < scan_bc; scan_i++, scan_bucket_address++, scan_value_slot_base += DN_SIMDHASH_BUCKET_CAPACITY \ + ) { \ + uint32_t scan_c = dn_simdhash_bucket_count(scan_bucket_address->suffixes); \ + for (uint32_t scan_j = 0; scan_j < scan_c; scan_j++) { \ + DN_SIMDHASH_KEY_T *key_address = &scan_bucket_address->keys[scan_j]; \ + DN_SIMDHASH_VALUE_T *value_address = address_of_value(buffers, scan_value_slot_base + scan_j); + +#define END_SCAN_PAIRS(buffers, key_address, value_address) \ + } \ + } + +// FIXME: inline? might improve performance for bucket overflow, but would +// increase code size, and maybe blow out icache. clang seems to inline it anyway. +static void +adjust_cascaded_counts (dn_simdhash_buffers_t buffers, uint32_t first_bucket_index, uint32_t last_bucket_index, uint8_t increase) +{ + BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) + if (bucket_index == last_bucket_index) + break; + + uint8_t cascaded_count = dn_simdhash_bucket_cascaded_count(bucket_address->suffixes); + if (cascaded_count < 255) { + if (increase) + dn_simdhash_bucket_set_cascaded_count(bucket_address->suffixes, cascaded_count + 1); + else if (cascaded_count < 0) + assert(0); + else + dn_simdhash_bucket_set_cascaded_count(bucket_address->suffixes, cascaded_count - 1); + } + END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) +} + static DN_SIMDHASH_VALUE_T * DN_SIMDHASH_FIND_VALUE_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash) { @@ -123,13 +219,13 @@ DN_SIMDHASH_FIND_VALUE_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, dn_simdhash_suffixes search_vector = build_search_vector(suffix); BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) - int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(bucket_address, key, search_vector); + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); if (index_in_bucket >= 0) { uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + index_in_bucket; return address_of_value(buffers, value_slot_index); } - if (!dn_simdhash_bucket_is_cascaded(bucket_address->suffixes)) + if (!dn_simdhash_bucket_cascaded_count(bucket_address->suffixes)) return NULL; END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) @@ -154,27 +250,32 @@ DN_SIMDHASH_TRY_INSERT_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) // If necessary, check the current bucket for the key if (ensure_not_present) { - int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(bucket_address, key, search_vector); + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); if (index_in_bucket >= 0) return DN_SIMDHASH_INSERT_KEY_ALREADY_PRESENT; } // The current bucket doesn't contain the key, or duplicate checks are disabled (for rehashing), // so attempt to insert into the bucket - uint8_t new_index = dn_simdhash_bucket_count (bucket_address->suffixes); + uint8_t new_index = dn_simdhash_bucket_count(bucket_address->suffixes); if (new_index < DN_SIMDHASH_BUCKET_CAPACITY) { + // Calculate value slot index early so that we don't stall waiting for it later + uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + new_index; // We found a bucket with space, so claim the first free slot - dn_simdhash_bucket_set_count (bucket_address->suffixes, new_index + 1); - dn_simdhash_bucket_set_suffix (bucket_address->suffixes, new_index, suffix); + dn_simdhash_bucket_set_count(bucket_address->suffixes, new_index + 1); + dn_simdhash_bucket_set_suffix(bucket_address->suffixes, new_index, suffix); bucket_address->keys[new_index] = key; - uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + new_index; *address_of_value(buffers, value_slot_index) = value; // printf("Inserted [%zd, %zd] in bucket %d at index %d\n", key, value, bucket_index, new_index); + // If we cascaded out of our original target bucket, scan through our probe path + // and increase the cascade counters. We have to wait until now to do that, because + // during the process of getting here we may end up finding a duplicate, which would + // leave the cascade counters in a corrupted state + adjust_cascaded_counts(buffers, first_bucket_index, bucket_index, 1); return DN_SIMDHASH_INSERT_OK; } - // The current bucket is full, so set the cascade flag and try the next bucket. - dn_simdhash_bucket_set_cascaded (bucket_address->suffixes, 1); + // The current bucket is full, so try the next bucket. END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) return DN_SIMDHASH_INSERT_NEED_TO_GROW; @@ -183,47 +284,45 @@ DN_SIMDHASH_TRY_INSERT_INTERNAL (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, static void DN_SIMDHASH_REHASH_INTERNAL (DN_SIMDHASH_T_PTR hash, dn_simdhash_buffers_t old_buffers) { - bucket_t *bucket_address = address_of_bucket(old_buffers, 0); - for ( - uint32_t i = 0, bc = old_buffers.buckets_length, value_slot_base = 0; - i < bc; i++, bucket_address++, value_slot_base += DN_SIMDHASH_BUCKET_CAPACITY - ) { - uint32_t c = dn_simdhash_bucket_count(bucket_address->suffixes); - for (uint32_t j = 0; j < c; j++) { - DN_SIMDHASH_KEY_T key = bucket_address->keys[j]; - uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(key); - // This theoretically can't fail, since we just grew the container and we - // wrap around to the beginning when there's a collision in the last bucket. - dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL( - hash, key, key_hash, - *address_of_value(old_buffers, value_slot_base + j), - 0 - ); - // FIXME: Why doesn't assert(ok) work here? Clang says it's unused - if (ok != DN_SIMDHASH_INSERT_OK) - assert(0); - } - } + BEGIN_SCAN_PAIRS(old_buffers, key_address, value_address) + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), *key_address); + // This theoretically can't fail, since we just grew the container and we + // wrap around to the beginning when there's a collision in the last bucket. + dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL( + hash, *key_address, key_hash, + *value_address, + 0 + ); + // FIXME: Why doesn't assert(ok) work here? Clang says it's unused + if (ok != DN_SIMDHASH_INSERT_OK) + assert(0); + END_SCAN_PAIRS(old_buffers, key_address, value_address) } -// We expose these tables instead of making them static, just in case you want to use -// them directly for some reason +#if DN_SIMDHASH_HAS_REMOVE_HANDLER +static void +DN_SIMDHASH_DESTROY_ALL (DN_SIMDHASH_T_PTR hash) +{ + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + DN_SIMDHASH_ON_REMOVE(DN_SIMDHASH_GET_DATA(hash), *key_address, *value_address); + END_SCAN_PAIRS(buffers, key_address, value_address) +} +#endif + // TODO: Store this by-reference instead of inline in the hash? dn_simdhash_vtable_t DN_SIMDHASH_T_VTABLE = { DN_SIMDHASH_REHASH_INTERNAL, +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + DN_SIMDHASH_DESTROY_ALL, +#else + NULL, +#endif }; -// While we've inlined these constants into the specialized code we're generating, -// the generic code in dn-simdhash.c needs them, so we put them in this meta header -// that lives inside every hash instance. (TODO: Store it by-reference?) -dn_simdhash_meta_t DN_SIMDHASH_T_META = { - DN_SIMDHASH_BUCKET_CAPACITY, - sizeof(bucket_t), - sizeof(DN_SIMDHASH_KEY_T), - sizeof(DN_SIMDHASH_VALUE_T), -}; +#ifndef DN_SIMDHASH_NO_DEFAULT_NEW DN_SIMDHASH_T_PTR DN_SIMDHASH_NEW (uint32_t capacity, dn_allocator_t *allocator) { @@ -235,20 +334,24 @@ DN_SIMDHASH_NEW (uint32_t capacity, dn_allocator_t *allocator) "Inconsistent spacing/sizing for bucket_t" ); - return dn_simdhash_new_internal(DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); + return dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator); } +#endif uint8_t DN_SIMDHASH_TRY_ADD (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T value) { - uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(key); + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); return DN_SIMDHASH_TRY_ADD_WITH_HASH(hash, key, key_hash, value); } uint8_t DN_SIMDHASH_TRY_ADD_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T value) { - assert(hash); + check_self(hash); + dn_simdhash_insert_result ok = DN_SIMDHASH_TRY_INSERT_INTERNAL(hash, key, key_hash, value, 1); if (ok == DN_SIMDHASH_INSERT_NEED_TO_GROW) { dn_simdhash_buffers_t old_buffers = dn_simdhash_ensure_capacity_internal(hash, dn_simdhash_capacity(hash) + 1); @@ -276,14 +379,17 @@ DN_SIMDHASH_TRY_ADD_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, ui uint8_t DN_SIMDHASH_TRY_GET_VALUE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T *result) { - uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(key); + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); return DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH(hash, key, key_hash, result); } uint8_t DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T *result) { - assert(hash); + check_self(hash); + DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash); if (!value_ptr) return 0; @@ -295,14 +401,16 @@ DN_SIMDHASH_TRY_GET_VALUE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T k uint8_t DN_SIMDHASH_TRY_REMOVE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key) { - uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(key); + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); return DN_SIMDHASH_TRY_REMOVE_WITH_HASH(hash, key, key_hash); } uint8_t DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash) { - assert(hash); + check_self(hash); dn_simdhash_buffers_t buffers = hash->buffers; uint8_t suffix = dn_simdhash_select_suffix(key_hash); @@ -310,7 +418,7 @@ DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, dn_simdhash_suffixes search_vector = build_search_vector(suffix); BEGIN_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) - int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(bucket_address, key, search_vector); + int index_in_bucket = DN_SIMDHASH_SCAN_BUCKET_INTERNAL(hash, bucket_address, key, search_vector); if (index_in_bucket >= 0) { // We found the item. Replace it with the last item in the bucket, then erase // the last item in the bucket. This ensures sequential scans still work. @@ -319,6 +427,19 @@ DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + index_in_bucket, replacement_value_slot_index = (bucket_index * DN_SIMDHASH_BUCKET_CAPACITY) + replacement_index_in_bucket; + DN_SIMDHASH_VALUE_T *value_address = address_of_value(buffers, value_slot_index); + DN_SIMDHASH_VALUE_T *replacement_address = address_of_value(buffers, replacement_value_slot_index); + DN_SIMDHASH_KEY_T *key_address = &bucket_address->keys[index_in_bucket]; + DN_SIMDHASH_KEY_T *replacement_key_address = &bucket_address->keys[replacement_index_in_bucket]; + +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + // Store for later, so we can run the callback after we're done removing the item + DN_SIMDHASH_VALUE_T value = *value_address; + // The key used for lookup may not be the key that was actually stored inside us, + // so make sure we store the one that was inside and destroy that one + DN_SIMDHASH_KEY_T actual_key = *key_address; +#endif + hash->count--; // Update count first @@ -333,43 +454,71 @@ DN_SIMDHASH_TRY_REMOVE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, bucket_address->suffixes, replacement_index_in_bucket, 0 ); // Rotate replacement value from the end of the bucket to here - *address_of_value(buffers, value_slot_index) = *address_of_value(buffers, replacement_value_slot_index); + *value_address = *replacement_address; // Rotate replacement key from the end of the bucket to here - bucket_address->keys[index_in_bucket] = bucket_address->keys[replacement_index_in_bucket]; + *key_address = *replacement_key_address; // Erase replacement key/value's old slots // TODO: Skip these for performance? - memset(&bucket_address->keys[replacement_index_in_bucket], 0, sizeof(DN_SIMDHASH_KEY_T)); - memset(address_of_value(buffers, replacement_value_slot_index), 0, sizeof(DN_SIMDHASH_VALUE_T)); - - // FIXME: If we cascaded into this bucket from another bucket, the - // origin bucket's cascaded flag will stay set forever. We could fix this - // by turning the cascaded flag into some sort of a counter and then - // scanning backwards to decrement the counter(s). + memset(replacement_key_address, 0, sizeof(DN_SIMDHASH_KEY_T)); + memset(replacement_address, 0, sizeof(DN_SIMDHASH_VALUE_T)); + + // If this item cascaded out of its original target bucket, we need + // to go through all the buckets we visited on the way here and reduce + // their cascade counters (if possible), to maintain better scan performance. + if (bucket_index != first_bucket_index) + adjust_cascaded_counts(buffers, first_bucket_index, bucket_index, 0); + +#if DN_SIMDHASH_HAS_REMOVE_HANDLER + // We've finished removing the item, so we're in a consistent state and can notify + DN_SIMDHASH_ON_REMOVE(DN_SIMDHASH_GET_DATA(hash), actual_key, value); +#endif return 1; } - if (!dn_simdhash_bucket_is_cascaded(bucket_address->suffixes)) + if (!dn_simdhash_bucket_cascaded_count(bucket_address->suffixes)) return 0; END_SCAN_BUCKETS(first_bucket_index, bucket_index, bucket_address) return 0; } +uint8_t +DN_SIMDHASH_TRY_REPLACE (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, DN_SIMDHASH_VALUE_T new_value) +{ + check_self(hash); + + uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key); + return DN_SIMDHASH_TRY_REPLACE_WITH_HASH(hash, key, key_hash, new_value); +} + +uint8_t +DN_SIMDHASH_TRY_REPLACE_WITH_HASH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_KEY_T key, uint32_t key_hash, DN_SIMDHASH_VALUE_T new_value) +{ + check_self(hash); + + assert(hash); + DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash); + if (!value_ptr) + return 0; +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_VALUE_T old_value = *value_ptr; +#endif + *value_ptr = new_value; +#if DN_SIMDHASH_HAS_REPLACE_HANDLER + DN_SIMDHASH_ON_REPLACE(DN_SIMDHASH_GET_DATA(hash), key, old_value, new_value); +#endif + return 1; +} + void DN_SIMDHASH_FOREACH (DN_SIMDHASH_T_PTR hash, DN_SIMDHASH_FOREACH_FUNC func, void *user_data) { - assert(hash); + check_self(hash); assert(func); dn_simdhash_buffers_t buffers = hash->buffers; - bucket_t *bucket_address = address_of_bucket(buffers, 0); - for ( - uint32_t i = 0, bc = buffers.buckets_length, value_slot_base = 0; - i < bc; i++, bucket_address++, value_slot_base += DN_SIMDHASH_BUCKET_CAPACITY - ) { - uint32_t c = dn_simdhash_bucket_count(bucket_address->suffixes); - for (uint32_t j = 0; j < c; j++) - func(bucket_address->keys[j], *address_of_value(buffers, value_slot_base + j), user_data); - } + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + func(*key_address, *value_address, user_data); + END_SCAN_PAIRS(buffers, key_address, value_address) } diff --git a/src/native/containers/dn-simdhash-specializations.h b/src/native/containers/dn-simdhash-specializations.h index f5f846e11815e6..bc2cf898ef6cc7 100644 --- a/src/native/containers/dn-simdhash-specializations.h +++ b/src/native/containers/dn-simdhash-specializations.h @@ -32,4 +32,28 @@ typedef struct dn_simdhash_str_key dn_simdhash_str_key; #undef DN_SIMDHASH_KEY_T #undef DN_SIMDHASH_VALUE_T +#define DN_SIMDHASH_T dn_simdhash_ptr_ptr +#define DN_SIMDHASH_KEY_T void * +#define DN_SIMDHASH_VALUE_T void * + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T + +#define DN_SIMDHASH_T dn_simdhash_ght +#define DN_SIMDHASH_KEY_T gconstpointer +#define DN_SIMDHASH_VALUE_T gpointer +#define DN_SIMDHASH_NO_DEFAULT_NEW 1 + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T +#undef DN_SIMDHASH_NO_DEFAULT_NEW + +#include "dn-simdhash-ght-compatible.h" + #endif diff --git a/src/native/containers/dn-simdhash-string-ptr.c b/src/native/containers/dn-simdhash-string-ptr.c index 1cf4de62957d22..cf50edf4a9b5a0 100644 --- a/src/native/containers/dn-simdhash-string-ptr.c +++ b/src/native/containers/dn-simdhash-string-ptr.c @@ -1,8 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include #include "dn-simdhash.h" +#include "dn-simdhash-utils.h" + typedef struct dn_simdhash_str_key { const char *text; // We keep a precomputed hash to speed up rehashing and scans. @@ -13,116 +16,6 @@ typedef struct dn_simdhash_str_key { #endif } dn_simdhash_str_key; - -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -inline static uint32_t -ROTL32 (uint32_t x, int8_t r) -{ - return (x << r) | (x >> (32 - r)); -} - -// Finalization mix - force all bits of a hash block to avalanche -inline static uint32_t -fmix32 (uint32_t h) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -// end of murmurhash - - -#if defined(__clang__) || defined (__GNUC__) -#define unlikely(expr) __builtin_expect(!!(expr), 0) -#define likely(expr) __builtin_expect(!!(expr), 1) -#else -#define unlikely(expr) (expr) -#define likely(expr) (expr) -#endif - -// FNV has bad properties for simdhash even though it's a fairly fast/good hash, -// but the overhead of having to do strlen() first before passing a string key to -// MurmurHash3 is significant and annoying. This is an attempt to reformulate the -// 32-bit version of MurmurHash3 into a 1-pass version for null terminated strings. -// The output of this will probably be different from regular MurmurHash3. I don't -// see that as a problem, since you shouldn't rely on the exact bit patterns of -// a non-cryptographic hash anyway. -typedef struct scan_result_t { - union { - uint32_t u32; - uint8_t bytes[4]; - } result; - const uint8_t *next; -} scan_result_t; - -static inline scan_result_t -scan_forward (const uint8_t *ptr) -{ - // TODO: On wasm we could do a single u32 load then scan the bytes, - // as long as we're sure ptr isn't up against the end of memory - scan_result_t result = { 0, }; - - // I tried to get a loop to auto-unroll, but GCC only unrolls at O3 and MSVC never does. -#define SCAN_1(i) \ - result.result.bytes[i] = ptr[i]; \ - if (unlikely(!result.result.bytes[i])) \ - return result; - - SCAN_1(0); - SCAN_1(1); - SCAN_1(2); - SCAN_1(3); -#undef SCAN_1 - - // doing ptr[i] 4 times then computing here produces better code than ptr++ especially on wasm - result.next = ptr + 4; - return result; -} - -static inline uint32_t -MurmurHash3_32_streaming (const uint8_t *key, uint32_t seed) -{ - uint32_t h1 = seed, block_count = 0; - const uint32_t c1 = 0xcc9e2d51, c2 = 0x1b873593; - - // Scan forward through the buffer collecting up to 4 bytes at a time, then hash - scan_result_t block = scan_forward(key); - // As long as the scan found at least one nonzero byte, u32 will be != 0 - while (block.result.u32) { - block_count += 1; - - uint32_t k1 = block.result.u32; - k1 *= c1; - k1 = ROTL32(k1, 15); - k1 *= c2; - h1 ^= k1; - h1 = ROTL32(h1, 13); - h1 = h1 * 5 + 0xe6546b64; - - // If the scan found a null byte next will be 0, so we stop scanning - if (!block.next) - break; - block = scan_forward(block.next); - } - - // finalize. we don't have an exact byte length but we have a block count - // it would be ideal to figure out a cheap way to produce an exact byte count, - // since then we can compute the length and hash in one go and use memcmp later, - // since emscripten/musl strcmp isn't optimized at all - h1 ^= block_count; - h1 = fmix32(h1); - return h1; -} - -// end of reformulated murmur3-32 - static inline int32_t dn_simdhash_str_equal (dn_simdhash_str_key v1, dn_simdhash_str_key v2) { @@ -140,8 +33,8 @@ dn_simdhash_str_hash (dn_simdhash_str_key v1) #define DN_SIMDHASH_T dn_simdhash_string_ptr #define DN_SIMDHASH_KEY_T dn_simdhash_str_key #define DN_SIMDHASH_VALUE_T void * -#define DN_SIMDHASH_KEY_HASHER dn_simdhash_str_hash -#define DN_SIMDHASH_KEY_EQUALS dn_simdhash_str_equal +#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_simdhash_str_hash(key) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_simdhash_str_equal(lhs, rhs) #define DN_SIMDHASH_ACCESSOR_SUFFIX _raw // perfect cache alignment. 32-bit ptrs: 8-byte keys. 64-bit: 16-byte keys. @@ -192,16 +85,7 @@ dn_simdhash_string_ptr_foreach (dn_simdhash_string_ptr_t *hash, dn_simdhash_stri assert(func); dn_simdhash_buffers_t buffers = hash->buffers; - bucket_t *bucket_address = address_of_bucket(buffers, 0); - for ( - uint32_t i = 0, bc = buffers.buckets_length, value_slot_base = 0; - i < bc; i++, bucket_address++, value_slot_base += DN_SIMDHASH_BUCKET_CAPACITY - ) { - uint32_t c = dn_simdhash_bucket_count(bucket_address->suffixes); - for (uint32_t j = 0; j < c; j++) { - DN_SIMDHASH_KEY_T *key = &bucket_address->keys[j]; - DN_SIMDHASH_VALUE_T value = *address_of_value(buffers, value_slot_base + j); - func(key->text, value, user_data); - } - } + BEGIN_SCAN_PAIRS(buffers, key_address, value_address) + func(key_address->text, *value_address, user_data); + END_SCAN_PAIRS(buffers, key_address, value_address) } diff --git a/src/native/containers/dn-simdhash-test.c b/src/native/containers/dn-simdhash-test.c index cbf040a26649d5..b258746d5aae1c 100644 --- a/src/native/containers/dn-simdhash-test.c +++ b/src/native/containers/dn-simdhash-test.c @@ -10,14 +10,36 @@ #include "dn-vector.h" #include "dn-simdhash.h" +typedef struct { + int i; + float f; +} instance_data_t; + +static inline uint8_t +key_comparer (instance_data_t data, size_t lhs, size_t rhs) { + return ((data.f == 4.20f) || (lhs == rhs)); +} + #define DN_SIMDHASH_T dn_simdhash_size_t_size_t #define DN_SIMDHASH_KEY_T size_t #define DN_SIMDHASH_VALUE_T size_t -#define DN_SIMDHASH_KEY_HASHER(key) (uint32_t)(key & 0xFFFFFFFFu) -#define DN_SIMDHASH_KEY_EQUALS(lhs, rhs) (lhs == rhs) +#define DN_SIMDHASH_KEY_HASHER(data, key) (uint32_t)(key & 0xFFFFFFFFu) +#define DN_SIMDHASH_KEY_EQUALS key_comparer +#define DN_SIMDHASH_INSTANCE_DATA_T instance_data_t +#define DN_SIMDHASH_ON_REMOVE(data, key, value) ; // printf("remove [%zd, %zd], f==%f\n", key, value, data.f) +#define DN_SIMDHASH_ON_REPLACE(data, key, old_value, new_value) ; // printf("replace [%zd, %zd] with [%zd, %zd] i==%i\n", key, old_value, key, new_value, data.i) #include "dn-simdhash-specialization.h" +uint32_t count_cascaded_buckets (dn_simdhash_size_t_size_t_t *hash) { + uint32_t result = 0; + dn_simdhash_buffers_t buffers = hash->buffers; + BEGIN_SCAN_BUCKETS(0, bucket_index, bucket_address) + result += dn_simdhash_bucket_cascaded_count(bucket_address->suffixes); + END_SCAN_BUCKETS(0, bucket_index, bucket_address) + return result; +} + uint8_t tassert (int b, const char *msg) { if (b) return b; @@ -40,89 +62,122 @@ uint8_t tasserteq (size_t actual, size_t expected, const char *msg) { } void foreach_callback (size_t key, size_t value, void * user_data) { - printf("[%zd, %zd]\n", key, value); + // printf("[%zd, %zd]\n", key, value); + (*(uint32_t *)user_data)++; } int main () { - const int c = 1024; + const int c = 320000; dn_simdhash_size_t_size_t_t *test = dn_simdhash_size_t_size_t_new(0, NULL); + dn_simdhash_instance_data(instance_data_t, test).f = 3.14f; + dn_simdhash_instance_data(instance_data_t, test).i = 42; + dn_vector_t *keys = dn_vector_alloc(sizeof(DN_SIMDHASH_KEY_T)), *values = dn_vector_alloc(sizeof(DN_SIMDHASH_VALUE_T)); + // Ensure consistency between runs + srand(1); for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = rand(); - dn_vector_push_back(keys, key); DN_SIMDHASH_VALUE_T value = (i * 2) + 1; - dn_vector_push_back(values, value); + DN_SIMDHASH_KEY_T key; +retry: { + key = rand(); uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); - tassert(ok, "Insert failed"); - } - - if (!tasserteq(dn_simdhash_count(test), c, "count did not match")) - return 1; - - printf ("Calling foreach:\n"); - dn_simdhash_size_t_size_t_foreach(test, foreach_callback, NULL); - - uint32_t final_capacity = dn_simdhash_capacity(test); - - for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); - DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); - - uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); - if (tassert1(ok, key, "did not find key")) - tasserteq(value, expected_value, "value did not match"); - } - - for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); - uint8_t ok = dn_simdhash_size_t_size_t_try_remove(test, key); - tassert1(ok, key, "could not remove key"); - - DN_SIMDHASH_VALUE_T value; - ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); - tassert1(!ok, key, "found key after removal"); - } - - if (!tasserteq(dn_simdhash_count(test), 0, "was not empty")) - return 1; - if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "capacity changed by emptying")) - return 1; - - printf ("Calling foreach after emptying:\n"); - dn_simdhash_size_t_size_t_foreach(test, foreach_callback, NULL); - - for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); - DN_SIMDHASH_VALUE_T value; - uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); - tassert1(!ok, key, "found key after removal"); - } - - for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); - DN_SIMDHASH_VALUE_T value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); - - uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); - tassert1(ok, key, "could not re-insert key after emptying"); - } - - if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "expected capacity not to change after refilling")) - return 1; - - for (int i = 0; i < c; i++) { - DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); - DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + if (!ok) + goto retry; +} - uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); - if (tassert1(ok, key, "did not find key after refilling")) - tasserteq(value, expected_value, "value did not match after refilling"); + dn_vector_push_back(keys, key); + dn_vector_push_back(values, value); } - printf ("Calling foreach after refilling:\n"); - dn_simdhash_size_t_size_t_foreach(test, foreach_callback, NULL); + for (int iter = 0; iter < 100; iter++) { + if (!tasserteq(dn_simdhash_count(test), c, "count did not match")) + return 1; + + printf("Calling foreach:\n"); + uint32_t foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + if (tassert1(ok, key, "did not find key")) + tasserteq(value, expected_value, "value did not match"); + } + + // NOTE: Adding duplicates could grow the table if we're unlucky, since the add operation + // eagerly grows before doing a table scan if we're at the grow threshold. + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); + tassert1(!ok, key, "added duplicate key successfully"); + } + + printf("After adding dupes: Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + uint32_t final_capacity = dn_simdhash_capacity(test); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + uint8_t ok = dn_simdhash_size_t_size_t_try_remove(test, key); + tassert1(ok, key, "could not remove key"); + + DN_SIMDHASH_VALUE_T value; + ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + tassert1(!ok, key, "found key after removal"); + } + + if (!tasserteq(dn_simdhash_count(test), 0, "was not empty")) + return 1; + if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "capacity changed by emptying")) + return 1; + + printf ("Calling foreach after emptying:\n"); + foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value; + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + tassert1(!ok, key, "found key after removal"); + } + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_add(test, key, value); + tassert1(ok, key, "could not re-insert key after emptying"); + } + + if (!tasserteq(dn_simdhash_capacity(test), final_capacity, "expected capacity not to change after refilling")) + return 1; + + for (int i = 0; i < c; i++) { + DN_SIMDHASH_KEY_T key = *dn_vector_index_t(keys, DN_SIMDHASH_KEY_T, i); + DN_SIMDHASH_VALUE_T value, expected_value = *dn_vector_index_t(values, DN_SIMDHASH_VALUE_T, i); + + uint8_t ok = dn_simdhash_size_t_size_t_try_get_value(test, key, &value); + if (tassert1(ok, key, "did not find key after refilling")) + tasserteq(value, expected_value, "value did not match after refilling"); + } + + printf("Calling foreach after refilling:\n"); + foreach_count = 0; + dn_simdhash_size_t_size_t_foreach(test, foreach_callback, &foreach_count); + printf("Foreach iterated %u time(s)\n", foreach_count); + printf("Count: %u, Capacity: %u, Cascaded item count: %u\n", dn_simdhash_count(test), dn_simdhash_capacity(test), count_cascaded_buckets(test)); + } printf("done\n"); diff --git a/src/native/containers/dn-simdhash-u32-ptr.c b/src/native/containers/dn-simdhash-u32-ptr.c index 1527c078f499ed..32c88e80f9d438 100644 --- a/src/native/containers/dn-simdhash-u32-ptr.c +++ b/src/native/containers/dn-simdhash-u32-ptr.c @@ -24,7 +24,7 @@ fmix32 (uint32_t h) #define DN_SIMDHASH_T dn_simdhash_u32_ptr #define DN_SIMDHASH_KEY_T uint32_t #define DN_SIMDHASH_VALUE_T void * -#define DN_SIMDHASH_KEY_HASHER fmix32 -#define DN_SIMDHASH_KEY_EQUALS(lhs, rhs) (lhs == rhs) +#define DN_SIMDHASH_KEY_HASHER(hash, key) fmix32(key) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs) #include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash-utils.h b/src/native/containers/dn-simdhash-utils.h new file mode 100644 index 00000000000000..e2ee9638695598 --- /dev/null +++ b/src/native/containers/dn-simdhash-utils.h @@ -0,0 +1,143 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +inline static uint32_t +ROTL32 (uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +// Finalization mix - force all bits of a hash block to avalanche +inline static uint32_t +fmix32 (uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +#define BLOCK_COUNT ((sizeof (void *)) / 4) + +// Hash a void * (number of 4-byte blocks determined by sizeof (void *)) +static uint32_t +MurmurHash3_32_ptr (const void *ptr, uint32_t seed) +{ + uint32_t h1 = seed; + const uint32_t c1 = 0xcc9e2d51, c2 = 0x1b873593; + + union { + uint32_t u32[BLOCK_COUNT]; + const void *ptr; + } u; + u.ptr = ptr; + + for (uint32_t i = 0; i < BLOCK_COUNT; i++) { + uint32_t k1 = u.u32[i]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + // finalize + h1 ^= BLOCK_COUNT; + h1 = fmix32(h1); + return h1; +} + +// end of murmurhash + +#if defined(__clang__) || defined (__GNUC__) +#define unlikely(expr) __builtin_expect(!!(expr), 0) +#define likely(expr) __builtin_expect(!!(expr), 1) +#else +#define unlikely(expr) (expr) +#define likely(expr) (expr) +#endif + +// FNV has bad properties for simdhash even though it's a fairly fast/good hash, +// but the overhead of having to do strlen() first before passing a string key to +// MurmurHash3 is significant and annoying. This is an attempt to reformulate the +// 32-bit version of MurmurHash3 into a 1-pass version for null terminated strings. +// The output of this will probably be different from regular MurmurHash3. I don't +// see that as a problem, since you shouldn't rely on the exact bit patterns of +// a non-cryptographic hash anyway. +typedef struct scan_result_t { + union { + uint32_t u32; + uint8_t bytes[4]; + } result; + const uint8_t *next; +} scan_result_t; + +static inline scan_result_t +scan_forward (const uint8_t *ptr) +{ + // TODO: On wasm we could do a single u32 load then scan the bytes, + // as long as we're sure ptr isn't up against the end of memory + scan_result_t result = { 0, }; + + // I tried to get a loop to auto-unroll, but GCC only unrolls at O3 and MSVC never does. +#define SCAN_1(i) \ + result.result.bytes[i] = ptr[i]; \ + if (unlikely(!result.result.bytes[i])) \ + return result; + + SCAN_1(0); + SCAN_1(1); + SCAN_1(2); + SCAN_1(3); +#undef SCAN_1 + + // doing ptr[i] 4 times then computing here produces better code than ptr++ especially on wasm + result.next = ptr + 4; + return result; +} + +static inline uint32_t +MurmurHash3_32_streaming (const uint8_t *key, uint32_t seed) +{ + uint32_t h1 = seed, block_count = 0; + const uint32_t c1 = 0xcc9e2d51, c2 = 0x1b873593; + + // Scan forward through the buffer collecting up to 4 bytes at a time, then hash + scan_result_t block = scan_forward(key); + // As long as the scan found at least one nonzero byte, u32 will be != 0 + while (block.result.u32) { + block_count += 1; + + uint32_t k1 = block.result.u32; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + + // If the scan found a null byte next will be 0, so we stop scanning + if (!block.next) + break; + block = scan_forward(block.next); + } + + // finalize. we don't have an exact byte length but we have a block count + // it would be ideal to figure out a cheap way to produce an exact byte count, + // since then we can compute the length and hash in one go and use memcmp later, + // since emscripten/musl strcmp isn't optimized at all + h1 ^= block_count; + h1 = fmix32(h1); + return h1; +} + +// end of reformulated murmur3-32 diff --git a/src/native/containers/dn-simdhash.c b/src/native/containers/dn-simdhash.c index 9525c5ea240f00..4208aefb7b9239 100644 --- a/src/native/containers/dn-simdhash.c +++ b/src/native/containers/dn-simdhash.c @@ -27,14 +27,16 @@ next_power_of_two (uint32_t value) { #endif // __clang__ || __GNUC__ dn_simdhash_t * -dn_simdhash_new_internal (dn_simdhash_meta_t meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator) +dn_simdhash_new_internal (dn_simdhash_meta_t *meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator) { - dn_simdhash_t *result = (dn_simdhash_t *)dn_allocator_alloc(allocator, sizeof(dn_simdhash_t)); - memset(result, 0, sizeof(dn_simdhash_t)); - - assert((meta.bucket_capacity > 1) && (meta.bucket_capacity <= DN_SIMDHASH_MAX_BUCKET_CAPACITY)); - assert(meta.key_size > 0); - assert(meta.bucket_size_bytes >= (DN_SIMDHASH_VECTOR_WIDTH + (meta.bucket_capacity * meta.key_size))); + const size_t size = sizeof(dn_simdhash_t) + meta->data_size; + dn_simdhash_t *result = (dn_simdhash_t *)dn_allocator_alloc(allocator, size); + memset(result, 0, size); + + assert(meta); + assert((meta->bucket_capacity > 1) && (meta->bucket_capacity <= DN_SIMDHASH_MAX_BUCKET_CAPACITY)); + assert(meta->key_size > 0); + assert(meta->bucket_size_bytes >= (DN_SIMDHASH_VECTOR_WIDTH + (meta->bucket_capacity * meta->key_size))); result->meta = meta; result->vtable = vtable; result->buffers.allocator = allocator; @@ -49,6 +51,8 @@ void dn_simdhash_free (dn_simdhash_t *hash) { assert(hash); + if (hash->vtable.destroy_all) + hash->vtable.destroy_all(hash); dn_simdhash_buffers_t buffers = hash->buffers; memset(hash, 0, sizeof(dn_simdhash_t)); dn_simdhash_free_buffers(buffers); @@ -68,13 +72,13 @@ dn_simdhash_buffers_t dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity) { assert(hash); - uint32_t bucket_count = (capacity + hash->meta.bucket_capacity - 1) / hash->meta.bucket_capacity; + uint32_t bucket_count = (capacity + hash->meta->bucket_capacity - 1) / hash->meta->bucket_capacity; // FIXME: Only apply this when capacity == 0? if (bucket_count < DN_SIMDHASH_MIN_BUCKET_COUNT) bucket_count = DN_SIMDHASH_MIN_BUCKET_COUNT; // Bucket count must be a power of two (this enables more efficient hashcode -> bucket mapping) bucket_count = next_power_of_two(bucket_count); - uint32_t value_count = bucket_count * hash->meta.bucket_capacity; + uint32_t value_count = bucket_count * hash->meta->bucket_capacity; dn_simdhash_buffers_t result = { 0, }; if (bucket_count <= hash->buffers.buckets_length) { @@ -97,8 +101,8 @@ dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity) hash->buffers.values_length = value_count; // pad buckets allocation by the width of one vector so we can align it - uint32_t buckets_size_bytes = (bucket_count * hash->meta.bucket_size_bytes) + DN_SIMDHASH_VECTOR_WIDTH, - values_size_bytes = value_count * hash->meta.value_size; + uint32_t buckets_size_bytes = (bucket_count * hash->meta->bucket_size_bytes) + DN_SIMDHASH_VECTOR_WIDTH, + values_size_bytes = value_count * hash->meta->value_size; hash->buffers.buckets = dn_allocator_alloc(hash->buffers.allocator, buckets_size_bytes); memset(hash->buffers.buckets, 0, buckets_size_bytes); @@ -119,17 +123,19 @@ void dn_simdhash_clear (dn_simdhash_t *hash) { assert(hash); + if (hash->vtable.destroy_all) + hash->vtable.destroy_all(hash); hash->count = 0; - memset(hash->buffers.buckets, 0, hash->buffers.buckets_length * hash->meta.bucket_size_bytes); + memset(hash->buffers.buckets, 0, hash->buffers.buckets_length * hash->meta->bucket_size_bytes); // Clearing the values is technically optional, so we could skip this for performance - memset(hash->buffers.values, 0, hash->buffers.values_length * hash->meta.value_size); + memset(hash->buffers.values, 0, hash->buffers.values_length * hash->meta->value_size); } uint32_t dn_simdhash_capacity (dn_simdhash_t *hash) { assert(hash); - return hash->buffers.buckets_length * hash->meta.bucket_capacity; + return hash->buffers.buckets_length * hash->meta->bucket_capacity; } uint32_t diff --git a/src/native/containers/dn-simdhash.h b/src/native/containers/dn-simdhash.h index f84aa712c13fc9..e2245479efe9f9 100644 --- a/src/native/containers/dn-simdhash.h +++ b/src/native/containers/dn-simdhash.h @@ -32,6 +32,11 @@ #define DN_FORCEINLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline)) #endif +typedef struct dn_simdhash_void_data_t { + // HACK: Empty struct or 0-element array produce a MSVC warning and break the build. + uint8_t data[1]; +} dn_simdhash_void_data_t; + typedef struct dn_simdhash_buffers_t { // sizes of current allocations in items (not bytes) // so values_length should == (buckets_length * bucket_capacity) @@ -48,7 +53,9 @@ typedef struct dn_simdhash_t dn_simdhash_t; typedef struct dn_simdhash_meta_t { // type metadata for generic implementation - uint32_t bucket_capacity, bucket_size_bytes, key_size, value_size; + uint32_t bucket_capacity, bucket_size_bytes, key_size, value_size, + // Allocate this many bytes of extra data inside the dn_simdhash_t + data_size; } dn_simdhash_meta_t; typedef enum dn_simdhash_insert_result { @@ -58,8 +65,10 @@ typedef enum dn_simdhash_insert_result { } dn_simdhash_insert_result; typedef struct dn_simdhash_vtable_t { - // Does not free old_buffers, that's your job. + // Does not free old_buffers, that's your job. Required. void (*rehash) (dn_simdhash_t *hash, dn_simdhash_buffers_t old_buffers); + // Invokes remove handler for all items, if necessary. Optional. + void (*destroy_all) (dn_simdhash_t *hash); } dn_simdhash_vtable_t; typedef struct dn_simdhash_t { @@ -67,9 +76,15 @@ typedef struct dn_simdhash_t { uint32_t count, grow_at_count; dn_simdhash_buffers_t buffers; dn_simdhash_vtable_t vtable; - dn_simdhash_meta_t meta; + dn_simdhash_meta_t *meta; + // We allocate extra space here based on meta.data_size + // This has one element because 0 elements generates a MSVC warning and breaks the build + uint8_t data[1]; } dn_simdhash_t; +#define dn_simdhash_instance_data(type, hash) \ + (*(type *)(&hash->data)) + // These helpers use .values instead of .vec to avoid generating unnecessary // vector loads/stores. Operations that touch these values may not need vectorization, // so it's ideal to just do single-byte memory accesses instead. @@ -77,7 +92,7 @@ typedef struct dn_simdhash_t { #define dn_simdhash_bucket_count(suffixes) \ (suffixes).values[DN_SIMDHASH_COUNT_SLOT] -#define dn_simdhash_bucket_is_cascaded(suffixes) \ +#define dn_simdhash_bucket_cascaded_count(suffixes) \ (suffixes).values[DN_SIMDHASH_CASCADED_SLOT] #define dn_simdhash_bucket_set_suffix(suffixes, slot, value) \ @@ -86,7 +101,7 @@ typedef struct dn_simdhash_t { #define dn_simdhash_bucket_set_count(suffixes, value) \ (suffixes).values[DN_SIMDHASH_COUNT_SLOT] = (value) -#define dn_simdhash_bucket_set_cascaded(suffixes, value) \ +#define dn_simdhash_bucket_set_cascaded_count(suffixes, value) \ (suffixes).values[DN_SIMDHASH_CASCADED_SLOT] = (value) static DN_FORCEINLINE(uint8_t) @@ -108,7 +123,7 @@ dn_simdhash_select_bucket_index (dn_simdhash_buffers_t buffers, uint32_t key_has // Creates a simdhash with the provided configuration metadata, vtable, size, and allocator. // Be sure you know what you're doing. dn_simdhash_t * -dn_simdhash_new_internal (dn_simdhash_meta_t meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator); +dn_simdhash_new_internal (dn_simdhash_meta_t *meta, dn_simdhash_vtable_t vtable, uint32_t capacity, dn_allocator_t *allocator); // Frees a simdhash and its associated buffers. void