From 0215463cfa6e76eb550feaf3f15b392b496f263b Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Tue, 22 Feb 2022 12:59:03 -0800 Subject: [PATCH 01/27] basic erase functionality added, still need to modify insert/find --- include/cuco/detail/static_map.inl | 74 +++++++++++++++++++- include/cuco/detail/static_map_kernels.cuh | 28 ++++++++ include/cuco/static_map.cuh | 28 +++++++- tests/CMakeLists.txt | 1 + tests/static_map/erase_test.cu | 80 ++++++++++++++++++++++ 5 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 tests/static_map/erase_test.cu diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 223852955..8f828857c 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -23,10 +23,12 @@ static_map::static_map(std::size_t capacity, Key empty_key_sentinel, Value empty_value_sentinel, Allocator const& alloc, - cudaStream_t stream) + cudaStream_t stream, + Key erased_key_sentinel) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) empty_key_sentinel_{empty_key_sentinel}, empty_value_sentinel_{empty_value_sentinel}, + erased_key_sentinel_{erased_key_sentinel}, slot_allocator_{alloc}, counter_allocator_{alloc} { @@ -114,6 +116,35 @@ void static_map::insert_if(InputIt first, size_ += h_num_successes; } +template +template +void static_map::erase( + InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) +{ + auto num_keys = std::distance(first, last); + if (num_keys == 0) { return; } + + auto const block_size = 128; + auto const stride = 1; + auto const tile_size = 1; + auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); + auto view = get_device_mutable_view(); + + // TODO: memset an atomic variable is unsafe + static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); + CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); + std::size_t h_num_successes; + + detail::erase<<>>( + first, first + num_keys, num_successes_, view, hash, key_equal); + CUCO_CUDA_TRY(cudaMemcpyAsync( + &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); + + CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); // stream sync to ensure h_num_successes is updated + + size_ -= h_num_successes; +} + template template void static_map::find(InputIt first, @@ -358,6 +389,47 @@ __device__ bool static_map::device_mutable_view::i } } +template +template +__device__ bool static_map::device_mutable_view::erase( + key_type const& k, Hash hash, KeyEqual key_equal) noexcept +{ + auto current_slot{initial_slot(k, hash)}; + + value_type const insert_pair = make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + + while (true) { + auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + + // Key doesn't exist, return false + if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { + return false; + } + + // Key exists, return true if successfully deleted + if (key_equal(existing_key, k)) { + if constexpr (cuco::detail::is_packable()) { + auto slot = + reinterpret_cast::packed_type>*>( + current_slot); + cuco::detail::pair_converter expected_pair{ + cuco::make_pair(existing_key, existing_value)}; + cuco::detail::pair_converter new_pair{insert_pair}; + + return slot->compare_exchange_strong(expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + } + if constexpr (not cuco::detail::is_packable()) { + current_slot->second.compare_exchange_strong(existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + return current_slot->first.compare_exchange_strong(existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + } + // simple CAS + } + + current_slot = next_slot(current_slot); + } +} + template template __device__ typename static_map::device_view::iterator diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 642373135..1732432dc 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -156,6 +156,34 @@ __global__ void insert( if (threadIdx.x == 0) { *num_successes += block_num_successes; } } +template +__global__ void erase( + InputIt first, InputIt last, atomicT* num_successes, viewT view, Hash hash, KeyEqual key_equal) +{ + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t thread_num_successes = 0; + + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid; + + while (it < last) { + auto k{*it}; + if (view.erase(k, hash, key_equal)) { thread_num_successes++; } + it += gridDim.x * block_size; + } + + // compute number of successfully inserted elements for each block + // and atomically add to the grand total + std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); + if (threadIdx.x == 0) { *num_successes += block_num_successes; } +} + /** * @brief Inserts key/value pairs in the range `[first, first + n)` if `pred` of the * corresponding stencil returns true. diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 199dcd838..e20650883 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -195,7 +195,8 @@ class static_map { Key empty_key_sentinel, Value empty_value_sentinel, Allocator const& alloc = Allocator{}, - cudaStream_t stream = 0); + cudaStream_t stream = 0, + Key erased_key_sentinel = -2); /** * @brief Destroys the map and frees its contents. @@ -266,6 +267,15 @@ class static_map { KeyEqual key_equal = KeyEqual{}, cudaStream_t stream = 0); + template , + typename KeyEqual = thrust::equal_to> + void erase(InputIt first, + InputIt last, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}, + cudaStream_t stream = 0); + /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. * @@ -338,6 +348,7 @@ class static_map { using slot_type = slot_type; Key empty_key_sentinel_{}; ///< Key value that represents an empty slot + Key erased_key_sentinel_{}; Value empty_value_sentinel_{}; ///< Initial Value of empty slot pair_atomic_type* slots_{}; ///< Pointer to flat slots storage std::size_t capacity_{}; ///< Total number of slots @@ -345,10 +356,12 @@ class static_map { __host__ __device__ device_view_base(pair_atomic_type* slots, std::size_t capacity, Key empty_key_sentinel, - Value empty_value_sentinel) noexcept + Value empty_value_sentinel, + Key erased_key_sentinel = -2) noexcept : slots_{slots}, capacity_{capacity}, empty_key_sentinel_{empty_key_sentinel}, + erased_key_sentinel_{erased_key_sentinel}, empty_value_sentinel_{empty_value_sentinel} { } @@ -541,6 +554,8 @@ class static_map { { return empty_value_sentinel_; } + + __host__ __device__ Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } /** * @brief Returns iterator to the first slot. @@ -775,6 +790,12 @@ class static_map { value_type const& insert_pair, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}) noexcept; + + template , + typename KeyEqual = thrust::equal_to> + __device__ bool erase(key_type const& k, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; }; // class device mutable view /** @@ -1068,6 +1089,8 @@ class static_map { */ Key get_empty_key_sentinel() const noexcept { return empty_key_sentinel_; } + Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } + /** * @brief Gets the sentinel value used to represent an empty value slot. * @@ -1100,6 +1123,7 @@ class static_map { std::size_t capacity_{}; ///< Total number of slots std::size_t size_{}; ///< Number of keys in map Key empty_key_sentinel_{}; ///< Key value that represents an empty slot + Key erased_key_sentinel_{}; Value empty_value_sentinel_{}; ///< Initial value of empty slot atomic_ctr_type* num_successes_{}; ///< Number of successfully inserted keys on insert slot_allocator_type slot_allocator_{}; ///< Allocator used to allocate slots diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e9d256ce1..a2305e9db 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,6 +54,7 @@ endfunction(ConfigureTest) ################################################################################################### # - static_map tests ------------------------------------------------------------------------------ ConfigureTest(STATIC_MAP_TEST + static_map/erase_test.cu static_map/custom_type_test.cu static_map/key_sentinel_test.cu static_map/shared_memory_test.cu diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu new file mode 100644 index 000000000..44a85ab99 --- /dev/null +++ b/tests/static_map/erase_test.cu @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + + +TEMPLATE_TEST_CASE_SIG( + "erase key", "", ((typename T), T), (int32_t), (int64_t)) +{ + using Key = T; + using Value = T; + + auto num_keys = 1E6; + cuco::static_map map{num_keys * 2, -1, -1}; + + auto m_view = map.get_device_mutable_view(); + auto view = map.get_device_view(); + + thrust::device_vector d_keys(num_keys); + thrust::device_vector d_values(num_keys); + thrust::device_vector d_keys_exist(num_keys); + + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); + thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); + + auto pairs_begin = + thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), d_values.begin())); + + SECTION( + "Check basic insert/erase") + { + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.erase(d_keys.begin(), d_keys.end()); + + REQUIRE(map.get_size() == 0); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + + /* + REQUIRE(cuco::test::all_of( + pairs_begin, + pairs_begin + num_keys, + [m_view] __device__(cuco::pair_type const& pair) mutable { + return m_view.insert(pair); + })); + + REQUIRE(cuco::test::all_of( + d_keys.begin(), + d_keys_begin + num_keys, + [m_view] __device__(cuco::key_type const& key) mutable { + return m_view.erase(key); + })); + */ + } +} \ No newline at end of file From 26dd44f0dd3b166996fb7eed250e39855e936070 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Tue, 22 Feb 2022 16:45:10 -0800 Subject: [PATCH 02/27] erase appears to be working --- include/cuco/detail/static_map.inl | 110 ++++++++++++++++++++++++++--- include/cuco/static_map.cuh | 19 +++++ tests/static_map/erase_test.cu | 30 ++++---- 3 files changed, 133 insertions(+), 26 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 8f828857c..ef85aabcd 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -60,7 +60,7 @@ void static_map::insert( auto const block_size = 128; auto const stride = 1; - auto const tile_size = 4; + auto const tile_size = 1; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_mutable_view(); @@ -69,7 +69,7 @@ void static_map::insert( CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); std::size_t h_num_successes; - detail::insert<<>>( + detail::insert<<>>( first, first + num_keys, num_successes_, view, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); @@ -219,6 +219,35 @@ static_map::device_mutable_view::packed_cas( return insert_result::CONTINUE; } +template +template +__device__ static_map::device_mutable_view::insert_result +static_map::device_mutable_view::packed_cas( + iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept +{ + auto expected_value = this->get_empty_value_sentinel(); + + cuco::detail::pair_converter expected_pair{ + cuco::make_pair(expected_key, expected_value)}; + cuco::detail::pair_converter new_pair{insert_pair}; + + auto slot = + reinterpret_cast::packed_type>*>( + current_slot); + + bool success = slot->compare_exchange_strong( + expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + if (success) { + return insert_result::SUCCESS; + } + // duplicate present during insert + else if (key_equal(insert_pair.first, expected_pair.pair.first)) { + return insert_result::DUPLICATE; + } + + return insert_result::CONTINUE; +} + template template __device__ static_map::device_mutable_view::insert_result @@ -257,6 +286,43 @@ static_map::device_mutable_view::back_to_back_cas( return insert_result::CONTINUE; } +template +template +__device__ static_map::device_mutable_view::insert_result +static_map::device_mutable_view::back_to_back_cas( + iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept +{ + using cuda::std::memory_order_relaxed; + + auto expected_value = this->get_empty_value_sentinel(); + + // Back-to-back CAS for 8B/8B key/value pairs + auto& slot_key = current_slot->first; + auto& slot_value = current_slot->second; + + bool key_success = + slot_key.compare_exchange_strong(expected_key, insert_pair.first, memory_order_relaxed); + bool value_success = + slot_value.compare_exchange_strong(expected_value, insert_pair.second, memory_order_relaxed); + + if (key_success) { + while (not value_success) { + value_success = + slot_value.compare_exchange_strong(expected_value = this->get_empty_value_sentinel(), + insert_pair.second, + memory_order_relaxed); + } + return insert_result::SUCCESS; + } else if (value_success) { + slot_value.store(this->get_empty_value_sentinel(), memory_order_relaxed); + } + + // our key was already present in the slot, so our key is a duplicate + if (key_equal(insert_pair.first, expected_key)) { return insert_result::DUPLICATE; } + + return insert_result::CONTINUE; +} + template template __device__ static_map::device_mutable_view::insert_result @@ -283,6 +349,31 @@ static_map::device_mutable_view::cas_dependent_wri return insert_result::CONTINUE; } +template +template +__device__ static_map::device_mutable_view::insert_result +static_map::device_mutable_view::cas_dependent_write( + iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept +{ + using cuda::std::memory_order_relaxed; + + auto& slot_key = current_slot->first; + + auto const key_success = + slot_key.compare_exchange_strong(expected_key, insert_pair.first, memory_order_relaxed); + + if (key_success) { + auto& slot_value = current_slot->second; + slot_value.store(insert_pair.second, memory_order_relaxed); + return insert_result::SUCCESS; + } + + // our key was already present in the slot, so our key is a duplicate + if (key_equal(insert_pair.first, expected_key)) { return insert_result::DUPLICATE; } + + return insert_result::CONTINUE; +} + template template __device__ bool static_map::device_mutable_view::insert( @@ -294,24 +385,25 @@ __device__ bool static_map::device_mutable_view::i key_type const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); // The user provide `key_equal` can never be used to compare against `empty_key_sentinel` as the // sentinel is not a valid key value. Therefore, first check for the sentinel - auto const slot_is_empty = - detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + auto const slot_is_available = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()) or + detail::bitwise_compare(existing_key, this->get_erased_key_sentinel()); // the key we are trying to insert is already in the map, so we return with failure to insert - if (not slot_is_empty and key_equal(existing_key, insert_pair.first)) { return false; } + if (not slot_is_available and key_equal(existing_key, insert_pair.first)) { return false; } - if (slot_is_empty) { + if (slot_is_available) { auto const status = [&]() { // One single CAS operation if `value_type` is packable if constexpr (cuco::detail::is_packable()) { - return packed_cas(current_slot, insert_pair, key_equal); + return packed_cas(current_slot, insert_pair, key_equal, existing_key); } if constexpr (not cuco::detail::is_packable()) { #if __CUDA_ARCH__ < 700 - return cas_dependent_write(current_slot, insert_pair, key_equal); + return cas_dependent_write(current_slot, insert_pair, key_equal, existing_key); #else - return back_to_back_cas(current_slot, insert_pair, key_equal); + return back_to_back_cas(current_slot, insert_pair, key_equal, existing_key); #endif } }(); diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index e20650883..4c848ce64 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -695,6 +695,13 @@ class static_map { __device__ insert_result packed_cas(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept; + + template + __device__ insert_result packed_cas(iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept; + /** * @brief Inserts the specified key/value pair with two back-to-back CAS operations. @@ -710,6 +717,12 @@ class static_map { __device__ insert_result back_to_back_cas(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept; + + template + __device__ insert_result back_to_back_cas(iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept; /** * @brief Inserts the specified key/value pair with a CAS of the key and a dependent write of @@ -726,6 +739,12 @@ class static_map { __device__ insert_result cas_dependent_write(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept; + + template + __device__ insert_result cas_dependent_write(iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept; public: template diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 44a85ab99..775da60b1 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -28,8 +28,8 @@ TEMPLATE_TEST_CASE_SIG( using Key = T; using Value = T; - auto num_keys = 1E6; - cuco::static_map map{num_keys * 2, -1, -1}; + unsigned long num_keys = 1'000'000; + cuco::static_map map{num_keys * 1.6, -1, -1}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); @@ -61,20 +61,16 @@ TEMPLATE_TEST_CASE_SIG( d_keys_exist.end(), [] __device__(const bool key_found) { return key_found; })); - /* - REQUIRE(cuco::test::all_of( - pairs_begin, - pairs_begin + num_keys, - [m_view] __device__(cuco::pair_type const& pair) mutable { - return m_view.insert(pair); - })); - - REQUIRE(cuco::test::all_of( - d_keys.begin(), - d_keys_begin + num_keys, - [m_view] __device__(cuco::key_type const& key) mutable { - return m_view.erase(key); - })); - */ + + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::all_of(d_keys_exist.begin(), + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + } } \ No newline at end of file From 9e0821cdaa036a351d90c4193744b4861df9c37b Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 23 Feb 2022 10:35:43 -0800 Subject: [PATCH 03/27] cg erase --- benchmarks/hash_table/static_map_bench.cu | 78 ++++++++++++++-------- include/cuco/detail/static_map.inl | 58 ++++++++++++++-- include/cuco/detail/static_map_kernels.cuh | 30 +++++++++ include/cuco/static_map.cuh | 10 +++ 4 files changed, 145 insertions(+), 31 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 59c325d5d..5504e4046 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -88,6 +88,7 @@ static void BM_static_map_insert(::benchmark::State& state) thrust::device_vector> d_pairs(h_pairs); + for (auto _ : state) { state.ResumeTiming(); state.PauseTiming(); @@ -143,50 +144,73 @@ static void BM_static_map_search_all(::benchmark::State& state) int64_t(state.range(0))); } -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); +template +static void BM_static_map_erase_all(::benchmark::State& state) +{ + using map_type = cuco::static_map; -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + std::size_t num_keys = state.range(0); + float occupancy = state.range(1) / float{100}; + std::size_t size = num_keys / occupancy; -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIFORM) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + map_type map{size, -1, -1}; + auto view = map.get_device_mutable_view(); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIFORM) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + std::vector h_keys(num_keys); + std::vector h_values(num_keys); + std::vector> h_pairs(num_keys); + std::vector h_results(num_keys); -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::GAUSSIAN) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + generate_keys(h_keys.begin(), h_keys.end()); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::GAUSSIAN) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + for (auto i = 0; i < num_keys; ++i) { + Key key = h_keys[i]; + Value val = h_keys[i]; + h_pairs[i].first = key; + h_pairs[i].second = val; + } -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) + thrust::device_vector d_keys(h_keys); + thrust::device_vector d_results(num_keys); + thrust::device_vector> d_pairs(h_pairs); + + for (auto _ : state) { + //state.ResumeTiming(); + state.PauseTiming(); + map.insert(d_pairs.begin(), d_pairs.end()); + state.ResumeTiming(); + + map.erase(d_keys.begin(), d_keys.end()); + + //state.PauseTiming(); + } + + state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) * + int64_t(state.range(0))); +} + +/* +BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); - -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) +*/ +BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); - -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIFORM) + //->Iterations(1000); +/* +BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIFORM) +BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::GAUSSIAN) +BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); - -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::GAUSSIAN) +*/ +BENCHMARK_TEMPLATE(BM_static_map_erase_all, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); \ No newline at end of file diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index ef85aabcd..f7adb1275 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -126,7 +126,7 @@ void static_map::erase( auto const block_size = 128; auto const stride = 1; - auto const tile_size = 1; + auto const tile_size = 4; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_mutable_view(); @@ -135,7 +135,7 @@ void static_map::erase( CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); std::size_t h_num_successes; - detail::erase<<>>( + detail::erase<<>>( first, first + num_keys, num_successes_, view, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); @@ -159,11 +159,11 @@ void static_map::find(InputIt first, auto const block_size = 128; auto const stride = 1; - auto const tile_size = 4; + auto const tile_size = 1; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_view(); - detail::find + detail::find <<>>(first, last, output_begin, view, hash, key_equal); } @@ -522,6 +522,56 @@ __device__ bool static_map::device_mutable_view::e } } +template +template +__device__ bool static_map::device_mutable_view::erase( + CG const& g, key_type const& k, Hash hash, KeyEqual key_equal) noexcept +{ + auto current_slot = initial_slot(g, k, hash); + value_type const insert_pair = make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + + while (true) { + auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + + auto const slot_is_empty = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + + auto const exists = g.any(not slot_is_empty and key_equal(existing_key, k)); + + // Key exists, return true if successfully deleted + if (exists) { + uint32_t src_lane = __ffs(exists) - 1; + + bool status; + if (g.thread_rank() == src_lane) { + if constexpr (cuco::detail::is_packable()) { + auto slot = + reinterpret_cast::packed_type>*>( + current_slot); + cuco::detail::pair_converter expected_pair{ + cuco::make_pair(existing_key, existing_value)}; + cuco::detail::pair_converter new_pair{insert_pair}; + + status = slot->compare_exchange_strong(expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + } + if constexpr (not cuco::detail::is_packable()) { + current_slot->second.compare_exchange_strong(existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + status = current_slot->first.compare_exchange_strong(existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + } + } + + uint32_t res_status = g.shfl(static_cast(status), src_lane); + return static_cast(res_status); + } + + // empty slot found, but key not found, must not be in the map + if (g.ballot(slot_is_empty)) { return false; } + + current_slot = next_slot(g, current_slot); + } +} + template template __device__ typename static_map::device_view::iterator diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 1732432dc..6403d5d6c 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -184,6 +184,36 @@ __global__ void erase( if (threadIdx.x == 0) { *num_successes += block_num_successes; } } +template +__global__ void erase( + InputIt first, InputIt last, atomicT* num_successes, viewT view, Hash hash, KeyEqual key_equal) +{ + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t thread_num_successes = 0; + + auto tile = cg::tiled_partition(cg::this_thread_block()); + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid / tile_size; + + while (it < last) { + auto k{*it}; + if (view.erase(tile, k, hash, key_equal) && tile.thread_rank() == 0) { thread_num_successes++; } + it += (gridDim.x * block_size) / tile_size; + } + + // compute number of successfully inserted elements for each block + // and atomically add to the grand total + std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); + if (threadIdx.x == 0) { *num_successes += block_num_successes; } +} + /** * @brief Inserts key/value pairs in the range `[first, first + n)` if `pred` of the * corresponding stencil returns true. diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 4c848ce64..8af855497 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -815,6 +815,16 @@ class static_map { __device__ bool erase(key_type const& k, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}) noexcept; + + template , + typename KeyEqual = thrust::equal_to> + __device__ bool erase( + CG const& g, + key_type const& k, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; + }; // class device mutable view /** From 1777a5104534c0edad569874bf3428376e9543de Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 23 Feb 2022 10:44:10 -0800 Subject: [PATCH 04/27] cg erase bug fix --- include/cuco/detail/static_map.inl | 4 ++-- tests/static_map/erase_test.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index f7adb1275..a181181b7 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -537,7 +537,7 @@ __device__ bool static_map::device_mutable_view::e auto const slot_is_empty = detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); - auto const exists = g.any(not slot_is_empty and key_equal(existing_key, k)); + auto const exists = g.ballot(not slot_is_empty and key_equal(existing_key, k)); // Key exists, return true if successfully deleted if (exists) { @@ -566,7 +566,7 @@ __device__ bool static_map::device_mutable_view::e } // empty slot found, but key not found, must not be in the map - if (g.ballot(slot_is_empty)) { return false; } + if(g.ballot(slot_is_empty)) { return false; } current_slot = next_slot(g, current_slot); } diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 775da60b1..59ed1e2d2 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -38,7 +38,7 @@ TEMPLATE_TEST_CASE_SIG( thrust::device_vector d_values(num_keys); thrust::device_vector d_keys_exist(num_keys); - thrust::sequence(thrust::device, d_keys.begin(), d_keys.end()); + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1); thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); auto pairs_begin = From d84e3b8554252bcc5e3595cb0a653478dcde3cc6 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 23 Feb 2022 14:13:58 -0800 Subject: [PATCH 05/27] minor changes --- benchmarks/hash_table/static_map_bench.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 5504e4046..2bbd1f31b 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -194,15 +194,17 @@ BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); */ +/* BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); //->Iterations(1000); -/* +*/ + BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); - +/* BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); From acc7941b57cda32dd9ce804c9d8cac4b1c37e363 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Thu, 24 Feb 2022 17:47:19 -0800 Subject: [PATCH 06/27] fix to CG insert --- benchmarks/hash_table/static_map_bench.cu | 17 +++++++------- include/cuco/detail/static_map.inl | 27 ++++++++++++----------- tests/static_map/erase_test.cu | 13 +++++++++++ 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 2bbd1f31b..7d1f1d8ea 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -87,14 +87,17 @@ static void BM_static_map_insert(::benchmark::State& state) } thrust::device_vector> d_pairs(h_pairs); + thrust::device_vector d_keys(h_keys); for (auto _ : state) { state.ResumeTiming(); state.PauseTiming(); map_type map{size, -1, -1}; + map.insert(d_pairs.begin(), d_pairs.end()); + map.erase(d_keys.begin(), d_keys.end()); state.ResumeTiming(); - + map.insert(d_pairs.begin(), d_pairs.end()); state.PauseTiming(); @@ -189,30 +192,28 @@ static void BM_static_map_erase_all(::benchmark::State& state) int64_t(state.range(0))); } -/* BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -*/ /* BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); - //->Iterations(1000); -*/ - BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -/* +*/ BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); +/* BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); */ +/* BENCHMARK_TEMPLATE(BM_static_map_erase_all, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); \ No newline at end of file + ->Apply(generate_size_and_occupancy); +*/ \ No newline at end of file diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index a181181b7..7d214a8df 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -60,7 +60,7 @@ void static_map::insert( auto const block_size = 128; auto const stride = 1; - auto const tile_size = 1; + auto const tile_size = 4; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_mutable_view(); @@ -69,7 +69,7 @@ void static_map::insert( CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); std::size_t h_num_successes; - detail::insert<<>>( + detail::insert<<>>( first, first + num_keys, num_successes_, view, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); @@ -159,11 +159,11 @@ void static_map::find(InputIt first, auto const block_size = 128; auto const stride = 1; - auto const tile_size = 1; + auto const tile_size = 4; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_view(); - detail::find + detail::find <<>>(first, last, output_begin, view, hash, key_equal); } @@ -432,32 +432,33 @@ __device__ bool static_map::device_mutable_view::i // The user provide `key_equal` can never be used to compare against `empty_key_sentinel` as the // sentinel is not a valid key value. Therefore, first check for the sentinel - auto const slot_is_empty = - detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + auto const slot_is_available = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()) or + detail::bitwise_compare(existing_key, this->get_erased_key_sentinel()); // the key we are trying to insert is already in the map, so we return with failure to insert - if (g.any(not slot_is_empty and key_equal(existing_key, insert_pair.first))) { return false; } + if (g.any(not slot_is_available and key_equal(existing_key, insert_pair.first))) { return false; } - auto const window_contains_empty = g.ballot(slot_is_empty); + auto const window_contains_available = g.ballot(slot_is_available); // we found an empty slot, but not the key we are inserting, so this must // be an empty slot into which we can insert the key - if (window_contains_empty) { + if (window_contains_available) { // the first lane in the group with an empty slot will attempt the insert insert_result status{insert_result::CONTINUE}; - uint32_t src_lane = __ffs(window_contains_empty) - 1; + uint32_t src_lane = __ffs(window_contains_available) - 1; if (g.thread_rank() == src_lane) { // One single CAS operation if `value_type` is packable if constexpr (cuco::detail::is_packable()) { - status = packed_cas(current_slot, insert_pair, key_equal); + status = packed_cas(current_slot, insert_pair, key_equal, existing_key); } // Otherwise, two back-to-back CAS operations else { #if __CUDA_ARCH__ < 700 - status = cas_dependent_write(current_slot, insert_pair, key_equal); + status = cas_dependent_write(current_slot, insert_pair, key_equal, existing_key); #else - status = back_to_back_cas(current_slot, insert_pair, key_equal); + status = back_to_back_cas(current_slot, insert_pair, key_equal, existing_key); #endif } } diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 59ed1e2d2..18e6d2a05 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -71,6 +71,19 @@ TEMPLATE_TEST_CASE_SIG( REQUIRE(cuco::test::all_of(d_keys_exist.begin(), d_keys_exist.end(), [] __device__(const bool key_found) { return key_found; })); + + map.erase(d_keys.begin(), d_keys.begin() + num_keys/2); + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), + d_keys_exist.begin() + num_keys/2, + [] __device__(const bool key_found) { return key_found; })); + + REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys/2, + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + + } } \ No newline at end of file From 19aa8bbae1b9d0579ea0ae3818781ba12f73f2aa Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Fri, 25 Feb 2022 09:40:15 -0800 Subject: [PATCH 07/27] insert benchmarking fix --- benchmarks/hash_table/static_map_bench.cu | 32 ++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 7d1f1d8ea..8df7d0806 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -91,16 +91,28 @@ static void BM_static_map_insert(::benchmark::State& state) for (auto _ : state) { - state.ResumeTiming(); - state.PauseTiming(); + //state.ResumeTiming(); + //state.PauseTiming(); map_type map{size, -1, -1}; + //map.insert(d_pairs.begin(), d_pairs.end()); + //map.erase(d_keys.begin(), d_keys.end()); + //state.ResumeTiming(); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); map.insert(d_pairs.begin(), d_pairs.end()); - map.erase(d_keys.begin(), d_keys.end()); - state.ResumeTiming(); - - map.insert(d_pairs.begin(), d_pairs.end()); + cudaEventRecord(stop); + cudaEventSynchronize(stop); - state.PauseTiming(); + float ms; + cudaEventElapsedTime(&ms, start, stop); + + //state.PauseTiming(); + + state.SetIterationTime(ms / 1000); } state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) * @@ -194,7 +206,8 @@ static void BM_static_map_erase_all(::benchmark::State& state) BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + ->Apply(generate_size_and_occupancy) + ->UseManualTime(); /* BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) @@ -205,7 +218,8 @@ BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) */ BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + ->Apply(generate_size_and_occupancy) + ->UseManualTime(); /* BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) From 0e66d28f006107a8f8605d5a3a1b4911becd8e79 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 9 Mar 2022 16:56:45 -0800 Subject: [PATCH 08/27] API improvements --- benchmarks/hash_table/static_map_bench.cu | 35 +----- include/cuco/detail/static_map.inl | 128 +++++----------------- include/cuco/static_map.cuh | 99 +++++++++++------ tests/static_map/erase_test.cu | 20 +++- 4 files changed, 120 insertions(+), 162 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 8df7d0806..bb9dba4e7 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -91,12 +91,7 @@ static void BM_static_map_insert(::benchmark::State& state) for (auto _ : state) { - //state.ResumeTiming(); - //state.PauseTiming(); map_type map{size, -1, -1}; - //map.insert(d_pairs.begin(), d_pairs.end()); - //map.erase(d_keys.begin(), d_keys.end()); - //state.ResumeTiming(); cudaEvent_t start, stop; cudaEventCreate(&start); @@ -110,8 +105,6 @@ static void BM_static_map_insert(::benchmark::State& state) float ms; cudaEventElapsedTime(&ms, start, stop); - //state.PauseTiming(); - state.SetIterationTime(ms / 1000); } @@ -168,7 +161,8 @@ static void BM_static_map_erase_all(::benchmark::State& state) float occupancy = state.range(1) / float{100}; std::size_t size = num_keys / occupancy; - map_type map{size, -1, -1}; + // static map with erase support + map_type map{size, -1, -1, -2}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); @@ -190,14 +184,11 @@ static void BM_static_map_erase_all(::benchmark::State& state) thrust::device_vector> d_pairs(h_pairs); for (auto _ : state) { - //state.ResumeTiming(); state.PauseTiming(); map.insert(d_pairs.begin(), d_pairs.end()); state.ResumeTiming(); map.erase(d_keys.begin(), d_keys.end()); - - //state.PauseTiming(); } state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) * @@ -208,26 +199,12 @@ BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy) ->UseManualTime(); -/* -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -*/ -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy) - ->UseManualTime(); -/* -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); -*/ -/* -BENCHMARK_TEMPLATE(BM_static_map_erase_all, int64_t, int64_t, dist_type::UNIQUE) +BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); -*/ \ No newline at end of file + ->Apply(generate_size_and_occupancy) + ->UseManualTime(); \ No newline at end of file diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 7d214a8df..54cf84b39 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -17,14 +17,38 @@ #include namespace cuco { + +template +static_map::static_map(std::size_t capacity, + Key empty_key_sentinel, + Value empty_value_sentinel, + Allocator const& alloc, + cudaStream_t stream) + : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) + empty_key_sentinel_{empty_key_sentinel}, + empty_value_sentinel_{empty_value_sentinel}, + erased_key_sentinel_{empty_key_sentinel}, + slot_allocator_{alloc}, + counter_allocator_{alloc} +{ + slots_ = std::allocator_traits::allocate(slot_allocator_, capacity_); + num_successes_ = std::allocator_traits::allocate(counter_allocator_, 1); + + auto constexpr block_size = 256; + auto constexpr stride = 4; + auto const grid_size = (capacity_ + stride * block_size - 1) / (stride * block_size); + detail::initialize + <<>>( + slots_, empty_key_sentinel, empty_value_sentinel, capacity_); +} template static_map::static_map(std::size_t capacity, Key empty_key_sentinel, Value empty_value_sentinel, + Key erased_key_sentinel, Allocator const& alloc, - cudaStream_t stream, - Key erased_key_sentinel) + cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) empty_key_sentinel_{empty_key_sentinel}, empty_value_sentinel_{empty_value_sentinel}, @@ -121,6 +145,9 @@ template void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { + if(get_empty_key_sentinel() == get_erased_key_sentinel()) + throw std::runtime_error("Runtime error: You must provide a unique erased key sentinel value at map construction.\n"); + auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } @@ -189,36 +216,6 @@ void static_map::contains(InputIt first, <<>>(first, last, output_begin, view, hash, key_equal); } -template -template -__device__ static_map::device_mutable_view::insert_result -static_map::device_mutable_view::packed_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept -{ - auto expected_key = this->get_empty_key_sentinel(); - auto expected_value = this->get_empty_value_sentinel(); - - cuco::detail::pair_converter expected_pair{ - cuco::make_pair(expected_key, expected_value)}; - cuco::detail::pair_converter new_pair{insert_pair}; - - auto slot = - reinterpret_cast::packed_type>*>( - current_slot); - - bool success = slot->compare_exchange_strong( - expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); - if (success) { - return insert_result::SUCCESS; - } - // duplicate present during insert - else if (key_equal(insert_pair.first, expected_pair.pair.first)) { - return insert_result::DUPLICATE; - } - - return insert_result::CONTINUE; -} - template template __device__ static_map::device_mutable_view::insert_result @@ -248,44 +245,6 @@ static_map::device_mutable_view::packed_cas( return insert_result::CONTINUE; } -template -template -__device__ static_map::device_mutable_view::insert_result -static_map::device_mutable_view::back_to_back_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept -{ - using cuda::std::memory_order_relaxed; - - auto expected_key = this->get_empty_key_sentinel(); - auto expected_value = this->get_empty_value_sentinel(); - - // Back-to-back CAS for 8B/8B key/value pairs - auto& slot_key = current_slot->first; - auto& slot_value = current_slot->second; - - bool key_success = - slot_key.compare_exchange_strong(expected_key, insert_pair.first, memory_order_relaxed); - bool value_success = - slot_value.compare_exchange_strong(expected_value, insert_pair.second, memory_order_relaxed); - - if (key_success) { - while (not value_success) { - value_success = - slot_value.compare_exchange_strong(expected_value = this->get_empty_value_sentinel(), - insert_pair.second, - memory_order_relaxed); - } - return insert_result::SUCCESS; - } else if (value_success) { - slot_value.store(this->get_empty_value_sentinel(), memory_order_relaxed); - } - - // our key was already present in the slot, so our key is a duplicate - if (key_equal(insert_pair.first, expected_key)) { return insert_result::DUPLICATE; } - - return insert_result::CONTINUE; -} - template template __device__ static_map::device_mutable_view::insert_result @@ -323,32 +282,6 @@ static_map::device_mutable_view::back_to_back_cas( return insert_result::CONTINUE; } -template -template -__device__ static_map::device_mutable_view::insert_result -static_map::device_mutable_view::cas_dependent_write( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept -{ - using cuda::std::memory_order_relaxed; - auto expected_key = this->get_empty_key_sentinel(); - - auto& slot_key = current_slot->first; - - auto const key_success = - slot_key.compare_exchange_strong(expected_key, insert_pair.first, memory_order_relaxed); - - if (key_success) { - auto& slot_value = current_slot->second; - slot_value.store(insert_pair.second, memory_order_relaxed); - return insert_result::SUCCESS; - } - - // our key was already present in the slot, so our key is a duplicate - if (key_equal(insert_pair.first, expected_key)) { return insert_result::DUPLICATE; } - - return insert_result::CONTINUE; -} - template template __device__ static_map::device_mutable_view::insert_result @@ -516,7 +449,6 @@ __device__ bool static_map::device_mutable_view::e current_slot->second.compare_exchange_strong(existing_value, insert_pair.second, cuda::std::memory_order_relaxed); return current_slot->first.compare_exchange_strong(existing_key, insert_pair.first, cuda::std::memory_order_relaxed); } - // simple CAS } current_slot = next_slot(current_slot); @@ -707,7 +639,7 @@ __device__ bool static_map::device_view::contains( while (true) { auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - if (detail::bitwise_compare(existing_key, this->empty_key_sentinel_)) { return false; } + if (detail::bitwise_compare(existing_key, this->erased_key_sentinel_)) { return false; } if (key_equal(existing_key, k)) { return true; } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 8af855497..2b1faab23 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -195,8 +195,18 @@ class static_map { Key empty_key_sentinel, Value empty_value_sentinel, Allocator const& alloc = Allocator{}, - cudaStream_t stream = 0, - Key erased_key_sentinel = -2); + cudaStream_t stream = 0); + + /** + * @brief Construct a fixed-size map with erase capability + * empty_key_sentinel and erased_key_sentinel must be different values. + */ + static_map(std::size_t capacity, + Key empty_key_sentinel, + Value empty_value_sentinel, + Key erased_key_sentinel, + Allocator const& alloc = Allocator{}, + cudaStream_t stream = 0); /** * @brief Destroys the map and frees its contents. @@ -267,6 +277,21 @@ class static_map { KeyEqual key_equal = KeyEqual{}, cudaStream_t stream = 0); + /** + * @brief Erases keys in the range `[first, last)`. + * + * This function synchronizes `stream`. + * + * @tparam InputIt Device accessible input iterator whose `value_type` is + * convertible to the map's `value_type` + * @tparam Hash Unary callable type + * @tparam KeyEqual Binary callable type + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param hash The unary function to apply to hash each key + * @param key_equal The binary function to compare two keys for equality + * @param stream Stream used for executing the kernels + */ template , typename KeyEqual = thrust::equal_to> @@ -357,7 +382,7 @@ class static_map { std::size_t capacity, Key empty_key_sentinel, Value empty_value_sentinel, - Key erased_key_sentinel = -2) noexcept + Key erased_key_sentinel) noexcept : slots_{slots}, capacity_{capacity}, empty_key_sentinel_{empty_key_sentinel}, @@ -666,8 +691,9 @@ class static_map { __host__ __device__ device_mutable_view(pair_atomic_type* slots, std::size_t capacity, Key empty_key_sentinel, - Value empty_value_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel} + Value empty_value_sentinel, + Key erased_key_sentinel) noexcept + : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} { } @@ -689,20 +715,15 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template - __device__ insert_result packed_cas(iterator current_slot, - value_type const& insert_pair, - KeyEqual key_equal) noexcept; - - template __device__ insert_result packed_cas(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept; - /** * @brief Inserts the specified key/value pair with two back-to-back CAS operations. * @@ -711,14 +732,10 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template - __device__ insert_result back_to_back_cas(iterator current_slot, - value_type const& insert_pair, - KeyEqual key_equal) noexcept; - - template __device__ insert_result back_to_back_cas(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, @@ -733,20 +750,17 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template - __device__ insert_result cas_dependent_write(iterator current_slot, - value_type const& insert_pair, - KeyEqual key_equal) noexcept; - - template __device__ insert_result cas_dependent_write(iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept; public: + template __device__ static device_mutable_view make_from_uninitialized_slots( CG g, @@ -757,7 +771,22 @@ class static_map { { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel, empty_value_sentinel); - return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel}; + return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel, empty_key_sentinel}; + } + + /* Features erase support */ + template + __device__ static device_mutable_view make_from_uninitialized_slots( + CG g, + pair_atomic_type* slots, + std::size_t capacity, + Key empty_key_sentinel, + Value empty_value_sentinel, + Key erased_key_sentinel) noexcept + { + device_view_base::initialize_slots( + g, slots, capacity, empty_key_sentinel, empty_value_sentinel); + return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel}; } /** @@ -858,8 +887,9 @@ class static_map { __host__ __device__ device_view(pair_atomic_type* slots, std::size_t capacity, Key empty_key_sentinel, - Value empty_value_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel} + Value empty_value_sentinel, + Key erased_key_sentinel) noexcept + : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} { } @@ -872,7 +902,8 @@ class static_map { : device_view_base{mutable_map.get_slots(), mutable_map.get_capacity(), mutable_map.get_empty_key_sentinel(), - mutable_map.get_empty_value_sentinel()} + mutable_map.get_empty_value_sentinel(), + mutable_map.get_erased_key_sentinel()} { } @@ -944,7 +975,8 @@ class static_map { return device_view(memory_to_use, source_device_view.get_capacity(), source_device_view.get_empty_key_sentinel(), - source_device_view.get_empty_value_sentinel()); + source_device_view.get_empty_value_sentinel(), + source_device_view.get_erased_key_sentinel()); } /** @@ -1118,8 +1150,6 @@ class static_map { */ Key get_empty_key_sentinel() const noexcept { return empty_key_sentinel_; } - Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } - /** * @brief Gets the sentinel value used to represent an empty value slot. * @@ -1127,6 +1157,13 @@ class static_map { */ Value get_empty_value_sentinel() const noexcept { return empty_value_sentinel_; } + /** + * @brief Gets the sentinel value used to represent an erased value slot. + * + * @return The sentinel value used to represent an erased value slot + */ + Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } + /** * @brief Constructs a device_view object based on the members of the `static_map` object. * @@ -1134,7 +1171,7 @@ class static_map { */ device_view get_device_view() const noexcept { - return device_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_); + return device_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); } /** @@ -1144,7 +1181,7 @@ class static_map { */ device_mutable_view get_device_mutable_view() const noexcept { - return device_mutable_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_); + return device_mutable_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); } private: @@ -1152,8 +1189,8 @@ class static_map { std::size_t capacity_{}; ///< Total number of slots std::size_t size_{}; ///< Number of keys in map Key empty_key_sentinel_{}; ///< Key value that represents an empty slot - Key erased_key_sentinel_{}; Value empty_value_sentinel_{}; ///< Initial value of empty slot + Key erased_key_sentinel_{}; ///< Key value that represents an erased slot atomic_ctr_type* num_successes_{}; ///< Number of successfully inserted keys on insert slot_allocator_type slot_allocator_{}; ///< Allocator used to allocate slots counter_allocator_type counter_allocator_{}; ///< Allocator used to allocate `num_successes_` diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 18e6d2a05..e3a970a33 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -29,7 +29,7 @@ TEMPLATE_TEST_CASE_SIG( using Value = T; unsigned long num_keys = 1'000'000; - cuco::static_map map{num_keys * 1.6, -1, -1}; + cuco::static_map map{num_keys * 1.1, -1, -1, -2}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); @@ -79,11 +79,23 @@ TEMPLATE_TEST_CASE_SIG( d_keys_exist.begin() + num_keys/2, [] __device__(const bool key_found) { return key_found; })); - REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys/2, + REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys/2, d_keys_exist.end(), [] __device__(const bool key_found) { return key_found; })); - - + + + map.erase(d_keys.begin()+num_keys/2, d_keys.end()); + REQUIRE(map.get_size() == 0); + + map.insert(pairs_begin, pairs_begin + num_keys/2); + map.insert(pairs_begin + num_keys/2, pairs_begin + num_keys); + + map.erase(d_keys.begin(), d_keys.begin() + num_keys/2); + + map.contains(d_keys.begin() + num_keys/2, d_keys.end(), d_keys_exist.begin()); + REQUIRE(cuco::test::all_of(d_keys_exist.begin(), + d_keys_exist.begin()+num_keys/2, + [] __device__(const bool key_found) { return key_found; })); } } \ No newline at end of file From 364fb1df4c2cb50cad7e9665c481425f9c8b2ab7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 10 Mar 2022 01:10:45 +0000 Subject: [PATCH 09/27] [pre-commit.ci] auto code formatting --- benchmarks/hash_table/static_map_bench.cu | 1 - include/cuco/detail/static_map.inl | 84 +++++++++++++--------- include/cuco/detail/static_map_kernels.cuh | 4 +- include/cuco/static_map.cuh | 49 +++++++------ tests/static_map/erase_test.cu | 45 ++++++------ 5 files changed, 101 insertions(+), 82 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index bb9dba4e7..4b0e96710 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -89,7 +89,6 @@ static void BM_static_map_insert(::benchmark::State& state) thrust::device_vector> d_pairs(h_pairs); thrust::device_vector d_keys(h_keys); - for (auto _ : state) { map_type map{size, -1, -1}; diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 54cf84b39..60efec25f 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -17,7 +17,7 @@ #include namespace cuco { - + template static_map::static_map(std::size_t capacity, Key empty_key_sentinel, @@ -145,8 +145,9 @@ template void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { - if(get_empty_key_sentinel() == get_erased_key_sentinel()) - throw std::runtime_error("Runtime error: You must provide a unique erased key sentinel value at map construction.\n"); + if (get_empty_key_sentinel() == get_erased_key_sentinel()) + throw std::runtime_error( + "Runtime error: You must provide a unique erased key sentinel value at map construction.\n"); auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } @@ -220,7 +221,10 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::packed_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { auto expected_value = this->get_empty_value_sentinel(); @@ -249,7 +253,10 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::back_to_back_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { using cuda::std::memory_order_relaxed; @@ -286,7 +293,10 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::cas_dependent_write( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal, Key expected_key) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { using cuda::std::memory_order_relaxed; @@ -370,7 +380,9 @@ __device__ bool static_map::device_mutable_view::i detail::bitwise_compare(existing_key, this->get_erased_key_sentinel()); // the key we are trying to insert is already in the map, so we return with failure to insert - if (g.any(not slot_is_available and key_equal(existing_key, insert_pair.first))) { return false; } + if (g.any(not slot_is_available and key_equal(existing_key, insert_pair.first))) { + return false; + } auto const window_contains_available = g.ballot(slot_is_available); @@ -422,35 +434,37 @@ __device__ bool static_map::device_mutable_view::e { auto current_slot{initial_slot(k, hash)}; - value_type const insert_pair = make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + value_type const insert_pair = + make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); - + // Key doesn't exist, return false - if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { - return false; - } + if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { return false; } // Key exists, return true if successfully deleted if (key_equal(existing_key, k)) { if constexpr (cuco::detail::is_packable()) { - auto slot = - reinterpret_cast::packed_type>*>( - current_slot); + auto slot = reinterpret_cast< + cuda::atomic::packed_type>*>( + current_slot); cuco::detail::pair_converter expected_pair{ cuco::make_pair(existing_key, existing_value)}; cuco::detail::pair_converter new_pair{insert_pair}; - return slot->compare_exchange_strong(expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + return slot->compare_exchange_strong( + expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); } if constexpr (not cuco::detail::is_packable()) { - current_slot->second.compare_exchange_strong(existing_value, insert_pair.second, cuda::std::memory_order_relaxed); - return current_slot->first.compare_exchange_strong(existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + current_slot->second.compare_exchange_strong( + existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + return current_slot->first.compare_exchange_strong( + existing_key, insert_pair.first, cuda::std::memory_order_relaxed); } } - + current_slot = next_slot(current_slot); } } @@ -461,12 +475,13 @@ __device__ bool static_map::device_mutable_view::e CG const& g, key_type const& k, Hash hash, KeyEqual key_equal) noexcept { auto current_slot = initial_slot(g, k, hash); - value_type const insert_pair = make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + value_type const insert_pair = + make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); - + auto const slot_is_empty = detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); @@ -475,32 +490,35 @@ __device__ bool static_map::device_mutable_view::e // Key exists, return true if successfully deleted if (exists) { uint32_t src_lane = __ffs(exists) - 1; - + bool status; if (g.thread_rank() == src_lane) { if constexpr (cuco::detail::is_packable()) { - auto slot = - reinterpret_cast::packed_type>*>( - current_slot); + auto slot = reinterpret_cast< + cuda::atomic::packed_type>*>( + current_slot); cuco::detail::pair_converter expected_pair{ cuco::make_pair(existing_key, existing_value)}; cuco::detail::pair_converter new_pair{insert_pair}; - status = slot->compare_exchange_strong(expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + status = slot->compare_exchange_strong( + expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); } if constexpr (not cuco::detail::is_packable()) { - current_slot->second.compare_exchange_strong(existing_value, insert_pair.second, cuda::std::memory_order_relaxed); - status = current_slot->first.compare_exchange_strong(existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + current_slot->second.compare_exchange_strong( + existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + status = current_slot->first.compare_exchange_strong( + existing_key, insert_pair.first, cuda::std::memory_order_relaxed); } } uint32_t res_status = g.shfl(static_cast(status), src_lane); return static_cast(res_status); } - + // empty slot found, but key not found, must not be in the map - if(g.ballot(slot_is_empty)) { return false; } - + if (g.ballot(slot_is_empty)) { return false; } + current_slot = next_slot(g, current_slot); } } diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 6403d5d6c..f6a65f92c 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -199,8 +199,8 @@ __global__ void erase( std::size_t thread_num_successes = 0; auto tile = cg::tiled_partition(cg::this_thread_block()); - auto tid = block_size * blockIdx.x + threadIdx.x; - auto it = first + tid / tile_size; + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid / tile_size; while (it < last) { auto k{*it}; diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 2b1faab23..0e520c96b 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -297,9 +297,9 @@ class static_map { typename KeyEqual = thrust::equal_to> void erase(InputIt first, InputIt last, - Hash hash = Hash{}, - KeyEqual key_equal = KeyEqual{}, - cudaStream_t stream = 0); + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}, + cudaStream_t stream = 0); /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. @@ -372,7 +372,7 @@ class static_map { using const_iterator = pair_atomic_type const*; using slot_type = slot_type; - Key empty_key_sentinel_{}; ///< Key value that represents an empty slot + Key empty_key_sentinel_{}; ///< Key value that represents an empty slot Key erased_key_sentinel_{}; Value empty_value_sentinel_{}; ///< Initial Value of empty slot pair_atomic_type* slots_{}; ///< Pointer to flat slots storage @@ -579,8 +579,11 @@ class static_map { { return empty_value_sentinel_; } - - __host__ __device__ Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } + + __host__ __device__ Key get_erased_key_sentinel() const noexcept + { + return erased_key_sentinel_; + } /** * @brief Returns iterator to the first slot. @@ -693,7 +696,8 @@ class static_map { Key empty_key_sentinel, Value empty_value_sentinel, Key erased_key_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} + : device_view_base{ + slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} { } @@ -760,7 +764,6 @@ class static_map { Key expected_key) noexcept; public: - template __device__ static device_mutable_view make_from_uninitialized_slots( CG g, @@ -771,7 +774,8 @@ class static_map { { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel, empty_value_sentinel); - return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel, empty_key_sentinel}; + return device_mutable_view{ + slots, capacity, empty_key_sentinel, empty_value_sentinel, empty_key_sentinel}; } /* Features erase support */ @@ -786,7 +790,8 @@ class static_map { { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel, empty_value_sentinel); - return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel}; + return device_mutable_view{ + slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel}; } /** @@ -838,21 +843,20 @@ class static_map { value_type const& insert_pair, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}) noexcept; - + template , typename KeyEqual = thrust::equal_to> __device__ bool erase(key_type const& k, - Hash hash = Hash{}, - KeyEqual key_equal = KeyEqual{}) noexcept; + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; template , typename KeyEqual = thrust::equal_to> - __device__ bool erase( - CG const& g, - key_type const& k, - Hash hash = Hash{}, - KeyEqual key_equal = KeyEqual{}) noexcept; + __device__ bool erase(CG const& g, + key_type const& k, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; }; // class device mutable view @@ -889,7 +893,8 @@ class static_map { Key empty_key_sentinel, Value empty_value_sentinel, Key erased_key_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} + : device_view_base{ + slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} { } @@ -1171,7 +1176,8 @@ class static_map { */ device_view get_device_view() const noexcept { - return device_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); + return device_view( + slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); } /** @@ -1181,7 +1187,8 @@ class static_map { */ device_mutable_view get_device_mutable_view() const noexcept { - return device_mutable_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); + return device_mutable_view( + slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); } private: diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index e3a970a33..531bd44d9 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -21,13 +21,11 @@ #include - -TEMPLATE_TEST_CASE_SIG( - "erase key", "", ((typename T), T), (int32_t), (int64_t)) +TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) { using Key = T; using Value = T; - + unsigned long num_keys = 1'000'000; cuco::static_map map{num_keys * 1.1, -1, -1, -2}; @@ -40,12 +38,11 @@ TEMPLATE_TEST_CASE_SIG( thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1); thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); - + auto pairs_begin = thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), d_values.begin())); - SECTION( - "Check basic insert/erase") + SECTION("Check basic insert/erase") { map.insert(pairs_begin, pairs_begin + num_keys); @@ -61,7 +58,6 @@ TEMPLATE_TEST_CASE_SIG( d_keys_exist.end(), [] __device__(const bool key_found) { return key_found; })); - map.insert(pairs_begin, pairs_begin + num_keys); REQUIRE(map.get_size() == num_keys); @@ -69,33 +65,32 @@ TEMPLATE_TEST_CASE_SIG( map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); REQUIRE(cuco::test::all_of(d_keys_exist.begin(), - d_keys_exist.end(), - [] __device__(const bool key_found) { return key_found; })); + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); - map.erase(d_keys.begin(), d_keys.begin() + num_keys/2); + map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2); map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); - - REQUIRE(cuco::test::none_of(d_keys_exist.begin(), - d_keys_exist.begin() + num_keys/2, - [] __device__(const bool key_found) { return key_found; })); - REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys/2, - d_keys_exist.end(), + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), + d_keys_exist.begin() + num_keys / 2, [] __device__(const bool key_found) { return key_found; })); + REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys / 2, + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); - map.erase(d_keys.begin()+num_keys/2, d_keys.end()); + map.erase(d_keys.begin() + num_keys / 2, d_keys.end()); REQUIRE(map.get_size() == 0); - map.insert(pairs_begin, pairs_begin + num_keys/2); - map.insert(pairs_begin + num_keys/2, pairs_begin + num_keys); + map.insert(pairs_begin, pairs_begin + num_keys / 2); + map.insert(pairs_begin + num_keys / 2, pairs_begin + num_keys); + + map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2); - map.erase(d_keys.begin(), d_keys.begin() + num_keys/2); + map.contains(d_keys.begin() + num_keys / 2, d_keys.end(), d_keys_exist.begin()); - map.contains(d_keys.begin() + num_keys/2, d_keys.end(), d_keys_exist.begin()); - - REQUIRE(cuco::test::all_of(d_keys_exist.begin(), - d_keys_exist.begin()+num_keys/2, + REQUIRE(cuco::test::all_of(d_keys_exist.begin(), + d_keys_exist.begin() + num_keys / 2, [] __device__(const bool key_found) { return key_found; })); } } \ No newline at end of file From 7f84d3d738b5ae4e4969ed53f14f05db26aded4f Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 9 Mar 2022 17:18:10 -0800 Subject: [PATCH 10/27] typo fix for non-CG contains --- include/cuco/detail/static_map.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 60efec25f..ecc13368c 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -657,7 +657,7 @@ __device__ bool static_map::device_view::contains( while (true) { auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - if (detail::bitwise_compare(existing_key, this->erased_key_sentinel_)) { return false; } + if (detail::bitwise_compare(existing_key, this->empty_key_sentinel_)) { return false; } if (key_equal(existing_key, k)) { return true; } From c2bcc25f8dfff85a3c786851449fbce5fe204257 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Thu, 17 Mar 2022 18:26:22 -0700 Subject: [PATCH 11/27] style changes --- include/cuco/detail/error.hpp | 22 ++++++++++++++- include/cuco/detail/static_map.inl | 33 +++++++++++++++------- include/cuco/detail/static_map_kernels.cuh | 10 +++---- include/cuco/static_map.cuh | 4 +-- tests/static_map/erase_test.cu | 18 +++--------- 5 files changed, 54 insertions(+), 33 deletions(-) diff --git a/include/cuco/detail/error.hpp b/include/cuco/detail/error.hpp index f5f331222..9fb76928a 100644 --- a/include/cuco/detail/error.hpp +++ b/include/cuco/detail/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,3 +80,23 @@ struct cuda_error : public std::runtime_error { cudaError_t const status = (expr); \ assert(cudaSuccess == status); \ } while (0) + +/** + * @brief Macro for checking runtime conditions that throws an exception when + * a condition is violated. + * + * Example usage: + * + * @code + * CUCO_RUNTIME_EXPECTS(key == value, "Key value mismatch"); + * @endcode + * + * @param[in] cond Expression that evaluates to true or false + * @param[in] reason String literal description of the reason that cond is + * expected to be true + * @throw std::runtime_error if the condition evaluates to false. + */ +#define CUCO_RUNTIME_EXPECTS(cond, reason) \ + (!!(cond)) ? static_cast(0) \ + : throw std::runtime_error("cuco failure at: " __FILE__ \ + ":" CUCO_STRINGIFY(__LINE__) ": " reason) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index ecc13368c..44e8298a5 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include namespace cuco { @@ -146,15 +147,15 @@ void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { if (get_empty_key_sentinel() == get_erased_key_sentinel()) - throw std::runtime_error( - "Runtime error: You must provide a unique erased key sentinel value at map construction.\n"); + CUCO_RUNTIME_EXPECTS(get_empty_key_sentinel() != get_erased_key_sentinel(), + "You must provide a unique erased key sentinel value at map construction."); auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } - auto const block_size = 128; - auto const stride = 1; - auto const tile_size = 4; + auto constexpr block_size = 128; + auto constexpr stride = 1; + auto constexpr tile_size = 4; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_mutable_view(); @@ -438,8 +439,16 @@ __device__ bool static_map::device_mutable_view::e make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + //auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + //auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + + static_assert(sizeof(Key) == sizeof(atomic_key_type)); + static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); + // TODO: Replace reinterpret_cast with atomic ref when available. + value_type slot_contents = *reinterpret_cast(current_slot); + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; + // Key doesn't exist, return false if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { return false; } @@ -479,8 +488,12 @@ __device__ bool static_map::device_mutable_view::e make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + static_assert(sizeof(Key) == sizeof(atomic_key_type)); + static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); + // TODO: Replace reinterpret_cast with atomic ref when available. + value_type slot_contents = *reinterpret_cast(current_slot); + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; auto const slot_is_empty = detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index f6a65f92c..2cad98390 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,7 +165,7 @@ template BlockReduce; + using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; std::size_t thread_num_successes = 0; @@ -173,8 +173,7 @@ __global__ void erase( auto it = first + tid; while (it < last) { - auto k{*it}; - if (view.erase(k, hash, key_equal)) { thread_num_successes++; } + if (view.erase(*it, hash, key_equal)) { thread_num_successes++; } it += gridDim.x * block_size; } @@ -203,8 +202,7 @@ __global__ void erase( auto it = first + tid / tile_size; while (it < last) { - auto k{*it}; - if (view.erase(tile, k, hash, key_equal) && tile.thread_rank() == 0) { thread_num_successes++; } + if (view.erase(tile, *it, hash, key_equal) and tile.thread_rank() == 0) { thread_num_successes++; } it += (gridDim.x * block_size) / tile_size; } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 0e520c96b..b93b0b2a7 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -373,7 +373,7 @@ class static_map { using slot_type = slot_type; Key empty_key_sentinel_{}; ///< Key value that represents an empty slot - Key erased_key_sentinel_{}; + Key erased_key_sentinel_{}; ///< Key value that represents an erased slot Value empty_value_sentinel_{}; ///< Initial Value of empty slot pair_atomic_type* slots_{}; ///< Pointer to flat slots storage std::size_t capacity_{}; ///< Total number of slots diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 531bd44d9..95c07308a 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,9 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) using Key = T; using Value = T; - unsigned long num_keys = 1'000'000; - cuco::static_map map{num_keys * 1.1, -1, -1, -2}; + constexpr std::size_t num_keys = 1'000'000; + constexpr std::size_t capacity = 1'100'000; + cuco::static_map map{capacity, -1, -1, -2}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); @@ -81,16 +82,5 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) map.erase(d_keys.begin() + num_keys / 2, d_keys.end()); REQUIRE(map.get_size() == 0); - - map.insert(pairs_begin, pairs_begin + num_keys / 2); - map.insert(pairs_begin + num_keys / 2, pairs_begin + num_keys); - - map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2); - - map.contains(d_keys.begin() + num_keys / 2, d_keys.end(), d_keys_exist.begin()); - - REQUIRE(cuco::test::all_of(d_keys_exist.begin(), - d_keys_exist.begin() + num_keys / 2, - [] __device__(const bool key_found) { return key_found; })); } } \ No newline at end of file From 9deba51f1babe548e9fc644c0b05aab7f9e92918 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Mar 2022 01:28:31 +0000 Subject: [PATCH 12/27] [pre-commit.ci] auto code formatting --- include/cuco/detail/error.hpp | 2 +- include/cuco/detail/static_map.inl | 22 +++++++++++----------- include/cuco/detail/static_map_kernels.cuh | 4 +++- include/cuco/static_map.cuh | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/cuco/detail/error.hpp b/include/cuco/detail/error.hpp index 9fb76928a..46631cf11 100644 --- a/include/cuco/detail/error.hpp +++ b/include/cuco/detail/error.hpp @@ -99,4 +99,4 @@ struct cuda_error : public std::runtime_error { #define CUCO_RUNTIME_EXPECTS(cond, reason) \ (!!(cond)) ? static_cast(0) \ : throw std::runtime_error("cuco failure at: " __FILE__ \ - ":" CUCO_STRINGIFY(__LINE__) ": " reason) + ":" CUCO_STRINGIFY(__LINE__) ": " reason) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 44e8298a5..6425d1dc6 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -147,7 +147,8 @@ void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { if (get_empty_key_sentinel() == get_erased_key_sentinel()) - CUCO_RUNTIME_EXPECTS(get_empty_key_sentinel() != get_erased_key_sentinel(), + CUCO_RUNTIME_EXPECTS( + get_empty_key_sentinel() != get_erased_key_sentinel(), "You must provide a unique erased key sentinel value at map construction."); auto num_keys = std::distance(first, last); @@ -156,8 +157,8 @@ void static_map::erase( auto constexpr block_size = 128; auto constexpr stride = 1; auto constexpr tile_size = 4; - auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); - auto view = get_device_mutable_view(); + auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); + auto view = get_device_mutable_view(); // TODO: memset an atomic variable is unsafe static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); @@ -439,16 +440,15 @@ __device__ bool static_map::device_mutable_view::e make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - //auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - //auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); - + // auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); + // auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); + static_assert(sizeof(Key) == sizeof(atomic_key_type)); static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); // TODO: Replace reinterpret_cast with atomic ref when available. value_type slot_contents = *reinterpret_cast(current_slot); - auto existing_key = slot_contents.first; - auto existing_value = slot_contents.second; - + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; // Key doesn't exist, return false if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { return false; } @@ -492,8 +492,8 @@ __device__ bool static_map::device_mutable_view::e static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); // TODO: Replace reinterpret_cast with atomic ref when available. value_type slot_contents = *reinterpret_cast(current_slot); - auto existing_key = slot_contents.first; - auto existing_value = slot_contents.second; + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; auto const slot_is_empty = detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 2cad98390..4f77c8a91 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -202,7 +202,9 @@ __global__ void erase( auto it = first + tid / tile_size; while (it < last) { - if (view.erase(tile, *it, hash, key_equal) and tile.thread_rank() == 0) { thread_num_successes++; } + if (view.erase(tile, *it, hash, key_equal) and tile.thread_rank() == 0) { + thread_num_successes++; + } it += (gridDim.x * block_size) / tile_size; } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index b93b0b2a7..e32f8e11a 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -372,8 +372,8 @@ class static_map { using const_iterator = pair_atomic_type const*; using slot_type = slot_type; - Key empty_key_sentinel_{}; ///< Key value that represents an empty slot - Key erased_key_sentinel_{}; ///< Key value that represents an erased slot + Key empty_key_sentinel_{}; ///< Key value that represents an empty slot + Key erased_key_sentinel_{}; ///< Key value that represents an erased slot Value empty_value_sentinel_{}; ///< Initial Value of empty slot pair_atomic_type* slots_{}; ///< Pointer to flat slots storage std::size_t capacity_{}; ///< Total number of slots From 2872f1d76edb2570cf1b42cbfa2ea8847fcb7ce5 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Fri, 18 Mar 2022 15:12:41 -0700 Subject: [PATCH 13/27] style and documentation, strong-type wrappers for sentinel values --- benchmarks/hash_table/static_map_bench.cu | 10 ++- examples/static_map/custom_type_example.cu | 3 +- examples/static_map/static_map_example.cu | 3 +- include/cuco/detail/dynamic_map.inl | 8 +- include/cuco/detail/static_map.inl | 29 +++---- include/cuco/static_map.cuh | 99 ++++++++++++++-------- tests/static_map/custom_type_test.cu | 4 +- tests/static_map/erase_test.cu | 6 +- tests/static_map/key_sentinel_test.cu | 4 +- tests/static_map/shared_memory_test.cu | 7 +- tests/static_map/stream_test.cu | 5 +- tests/static_map/unique_sequence_test.cu | 4 +- 12 files changed, 118 insertions(+), 64 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 4b0e96710..8c9ea71a0 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -90,7 +90,8 @@ static void BM_static_map_insert(::benchmark::State& state) thrust::device_vector d_keys(h_keys); for (auto _ : state) { - map_type map{size, -1, -1}; + map_type map{size, cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}}; cudaEvent_t start, stop; cudaEventCreate(&start); @@ -120,7 +121,8 @@ static void BM_static_map_search_all(::benchmark::State& state) float occupancy = state.range(1) / float{100}; std::size_t size = num_keys / occupancy; - map_type map{size, -1, -1}; + map_type map{size, cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); @@ -161,7 +163,9 @@ static void BM_static_map_erase_all(::benchmark::State& state) std::size_t size = num_keys / occupancy; // static map with erase support - map_type map{size, -1, -1, -2}; + map_type map{size, cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::sentinel::erased_key{-2}}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); diff --git a/examples/static_map/custom_type_example.cu b/examples/static_map/custom_type_example.cu index 443323e36..2e116d0ab 100644 --- a/examples/static_map/custom_type_example.cu +++ b/examples/static_map/custom_type_example.cu @@ -91,7 +91,8 @@ int main(void) // Construct a map with 100,000 slots using the given empty key/value sentinels. Note the // capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%. cuco::static_map map{ - 100'000, empty_key_sentinel, empty_value_sentinel}; + 100'000, cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}}; // Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{}); diff --git a/examples/static_map/static_map_example.cu b/examples/static_map/static_map_example.cu index 12d12578d..31eac21c2 100644 --- a/examples/static_map/static_map_example.cu +++ b/examples/static_map/static_map_example.cu @@ -33,7 +33,8 @@ int main(void) cudaStream_t str; cudaStreamCreate(&str); cuco::static_map map{ - 100'000, empty_key_sentinel, empty_value_sentinel, cuco::cuda_allocator{}, str}; + 100'000, cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}, cuco::cuda_allocator{}, str}; thrust::device_vector> pairs(50'000); diff --git a/include/cuco/detail/dynamic_map.inl b/include/cuco/detail/dynamic_map.inl index 57950ea45..4315bae51 100644 --- a/include/cuco/detail/dynamic_map.inl +++ b/include/cuco/detail/dynamic_map.inl @@ -30,7 +30,9 @@ dynamic_map::dynamic_map(std::size_t initial_capac alloc_{alloc} { submaps_.push_back(std::make_unique>( - initial_capacity, empty_key_sentinel, empty_value_sentinel, alloc)); + initial_capacity, + sentinel::empty_key{empty_key_sentinel}, + sentinel::empty_value{empty_value_sentinel}, alloc)); submap_views_.push_back(submaps_[0]->get_device_view()); submap_mutable_views_.push_back(submaps_[0]->get_device_mutable_view()); @@ -59,7 +61,9 @@ void dynamic_map::reserve(std::size_t n) else { submap_capacity = capacity_; submaps_.push_back(std::make_unique>( - submap_capacity, empty_key_sentinel_, empty_value_sentinel_, alloc_)); + submap_capacity, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, alloc_)); submap_views_.push_back(submaps_[submap_idx]->get_device_view()); submap_mutable_views_.push_back(submaps_[submap_idx]->get_device_mutable_view()); diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 6425d1dc6..8ed236dd0 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -21,14 +21,14 @@ namespace cuco { template static_map::static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, Allocator const& alloc, cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) - empty_key_sentinel_{empty_key_sentinel}, - empty_value_sentinel_{empty_value_sentinel}, - erased_key_sentinel_{empty_key_sentinel}, + empty_key_sentinel_{empty_key_sentinel.value}, + empty_value_sentinel_{empty_value_sentinel.value}, + erased_key_sentinel_{empty_key_sentinel.value}, slot_allocator_{alloc}, counter_allocator_{alloc} { @@ -40,20 +40,20 @@ static_map::static_map(std::size_t capacity, auto const grid_size = (capacity_ + stride * block_size - 1) / (stride * block_size); detail::initialize <<>>( - slots_, empty_key_sentinel, empty_value_sentinel, capacity_); + slots_, empty_key_sentinel_, empty_value_sentinel_, capacity_); } template static_map::static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Key erased_key_sentinel, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel, Allocator const& alloc, cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) - empty_key_sentinel_{empty_key_sentinel}, - empty_value_sentinel_{empty_value_sentinel}, - erased_key_sentinel_{erased_key_sentinel}, + empty_key_sentinel_{empty_key_sentinel.value}, + empty_value_sentinel_{empty_value_sentinel.value}, + erased_key_sentinel_{erased_key_sentinel.value}, slot_allocator_{alloc}, counter_allocator_{alloc} { @@ -65,7 +65,7 @@ static_map::static_map(std::size_t capacity, auto const grid_size = (capacity_ + stride * block_size - 1) / (stride * block_size); detail::initialize <<>>( - slots_, empty_key_sentinel, empty_value_sentinel, capacity_); + slots_, empty_key_sentinel_, empty_value_sentinel_, capacity_); } template @@ -440,9 +440,6 @@ __device__ bool static_map::device_mutable_view::e make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); while (true) { - // auto existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); - // auto existing_value = current_slot->second.load(cuda::std::memory_order_relaxed); - static_assert(sizeof(Key) == sizeof(atomic_key_type)); static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); // TODO: Replace reinterpret_cast with atomic ref when available. diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index e32f8e11a..46d9c58a6 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -41,6 +41,17 @@ #include namespace cuco { + +namespace sentinel { + template + struct empty_key{T value; }; + + template + struct empty_value{T value; }; + + template + struct erased_key{T value; }; +} // namespace sentinel template class dynamic_map; @@ -66,29 +77,37 @@ class dynamic_map; * - Host-side "bulk" operations * - Device-side "singular" operations * - * The host-side bulk operations include `insert`, `find`, and `contains`. These - * APIs should be used when there are a large number of keys to insert or lookup + * The host-side bulk operations include `insert`, `erase`, `find`, and `contains`. These + * APIs should be used when there are a large number of keys to insert, erase or lookup * in the map. For example, given a range of keys specified by device-accessible - * iterators, the bulk `insert` function will insert all keys into the map. + * iterators, the bulk `insert` function will insert all keys into the map. Note that in order + * for a `static_map` instance to support `erase`, the user must provide an `erased_key_sentinel` + * which is distinct from the `empty_key_sentinel` at construction. If `erase` is called on a + * `static_map` which was not constructed in this way, a runtime error will be generated. * * The singular device-side operations allow individual threads to perform * independent insert or find/contains operations from device code. These * operations are accessed through non-owning, trivially copyable "view" types: * `device_view` and `mutable_device_view`. The `device_view` class is an * immutable view that allows only non-modifying operations such as `find` or - * `contains`. The `mutable_device_view` class only allows `insert` operations. - * The two types are separate to prevent erroneous concurrent insert/find - * operations. + * `contains`. The `mutable_device_view` class only allows `insert` and `erase` operations. + * The two types are separate to prevent erroneous concurrent insert/erase/find + * operations. Note that the device-side `erase` may only be called if the corresponding + * `mutable_device_view` was constructed with a user-provided `erased_key_sentinel`. It is + * up to the user to ensure this condition is met. * * Example: * \code{.cpp} * int empty_key_sentinel = -1; - * int empty_value_sentine = -1; + * int empty_value_sentinel = -1; + * int erased_key_sentinel = -2; * * // Constructs a map with 100,000 slots using -1 and -1 as the empty key/value - * // sentinels. Note the capacity is chosen knowing we will insert 50,000 keys, + * // sentinels. The supplied erased key sentinel of -2 must be a different value from the empty + * // key sentinel. If erase functionality is not needed, you may elect to not supply an erased + * // key sentinel to the constructor. Note the capacity is chosen knowing we will insert 50,000 keys, * // for an load factor of 50%. - * static_map m{100'000, empty_key_sentinel, empty_value_sentinel}; + * static_map m{100'000, empty_key_sentinel, empty_value_sentinel, erased_value_sentinel}; * * // Create a sequence of pairs {{0,0}, {1,1}, ... {i,i}} * thrust::device_vector> pairs(50,000); @@ -167,6 +186,8 @@ class static_map { return cuco::detail::is_packable(); } + + /** * @brief Construct a fixed-size map with the specified capacity and sentinel values. * @brief Construct a statically sized map with the specified number of slots @@ -192,8 +213,8 @@ class static_map { * @param stream Stream used for executing the kernels */ static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, Allocator const& alloc = Allocator{}, cudaStream_t stream = 0); @@ -202,9 +223,9 @@ class static_map { * empty_key_sentinel and erased_key_sentinel must be different values. */ static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Key erased_key_sentinel, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel, Allocator const& alloc = Allocator{}, cudaStream_t stream = 0); @@ -693,11 +714,12 @@ class static_map { */ __host__ __device__ device_mutable_view(pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Key erased_key_sentinel) noexcept + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept : device_view_base{ - slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} + slots, capacity, + empty_key_sentinel.value, empty_value_sentinel.value, erased_key_sentinel.value} { } @@ -769,13 +791,16 @@ class static_map { CG g, pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel) noexcept + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel) noexcept { device_view_base::initialize_slots( - g, slots, capacity, empty_key_sentinel, empty_value_sentinel); + g, slots, capacity, empty_key_sentinel.value, empty_value_sentinel.value); return device_mutable_view{ - slots, capacity, empty_key_sentinel, empty_value_sentinel, empty_key_sentinel}; + slots, capacity, + empty_key_sentinel, + empty_value_sentinel, + sentinel::erased_key{empty_key_sentinel.value}}; } /* Features erase support */ @@ -784,9 +809,9 @@ class static_map { CG g, pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Key erased_key_sentinel) noexcept + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel, empty_value_sentinel); @@ -890,11 +915,11 @@ class static_map { */ __host__ __device__ device_view(pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Key erased_key_sentinel) noexcept + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept : device_view_base{ - slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel} + slots, capacity, empty_key_sentinel.value, empty_value_sentinel.value, erased_key_sentinel.value} { } @@ -979,9 +1004,9 @@ class static_map { return device_view(memory_to_use, source_device_view.get_capacity(), - source_device_view.get_empty_key_sentinel(), - source_device_view.get_empty_value_sentinel(), - source_device_view.get_erased_key_sentinel()); + sentinel::empty_key{source_device_view.get_empty_key_sentinel()}, + sentinel::empty_value{source_device_view.get_empty_value_sentinel()}, + sentinel::erased_key{source_device_view.get_erased_key_sentinel()}); } /** @@ -1177,7 +1202,10 @@ class static_map { device_view get_device_view() const noexcept { return device_view( - slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); + slots_, capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } /** @@ -1188,7 +1216,10 @@ class static_map { device_mutable_view get_device_mutable_view() const noexcept { return device_mutable_view( - slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_, erased_key_sentinel_); + slots_, capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } private: diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index 1e75bf826..c66539715 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -104,7 +104,9 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; - cuco::static_map map{capacity, sentinel_key, sentinel_value}; + cuco::static_map map{capacity, + cuco::sentinel::empty_key{sentinel_key}, + cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); thrust::device_vector insert_values(num); diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 95c07308a..f74132efd 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -28,7 +28,11 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) constexpr std::size_t num_keys = 1'000'000; constexpr std::size_t capacity = 1'100'000; - cuco::static_map map{capacity, -1, -1, -2}; + + cuco::static_map map{capacity, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::sentinel::erased_key{-2}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/key_sentinel_test.cu b/tests/static_map/key_sentinel_test.cu index a96a60af4..9c801ad74 100644 --- a/tests/static_map/key_sentinel_test.cu +++ b/tests/static_map/key_sentinel_test.cu @@ -36,7 +36,9 @@ TEMPLATE_TEST_CASE_SIG( using Value = T; constexpr std::size_t num_keys{SIZE}; - cuco::static_map map{SIZE * 2, -1, -1}; + cuco::static_map map{SIZE * 2, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu index 00bbbcd08..17d23a08b 100644 --- a/tests/static_map/shared_memory_test.cu +++ b/tests/static_map/shared_memory_test.cu @@ -88,7 +88,8 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map", // operator yet std::vector> maps; for (std::size_t map_id = 0; map_id < number_of_maps; ++map_id) { - maps.push_back(std::make_unique(map_capacity, -1, -1)); + maps.push_back(std::make_unique(map_capacity, cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1})); } thrust::device_vector d_keys_exist(number_of_maps * elements_in_map); @@ -154,7 +155,9 @@ __global__ void shared_memory_hash_table_kernel(bool* key_found) using map_type = typename cuco::static_map::device_mutable_view; using find_map_type = typename cuco::static_map::device_view; __shared__ typename map_type::slot_type slots[N]; - auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), &slots[0], N, -1, -1); + auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), &slots[0], N, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}); auto g = cg::this_thread_block(); std::size_t index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/tests/static_map/stream_test.cu b/tests/static_map/stream_test.cu index aadee823a..26e3c7177 100644 --- a/tests/static_map/stream_test.cu +++ b/tests/static_map/stream_test.cu @@ -33,7 +33,10 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", cudaStreamCreate(&stream); constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, -1, -1, cuco::cuda_allocator{}, stream}; + cuco::static_map map{1'000'000, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::cuda_allocator{}, stream}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/unique_sequence_test.cu b/tests/static_map/unique_sequence_test.cu index da4b51a3f..7fc0af866 100644 --- a/tests/static_map/unique_sequence_test.cu +++ b/tests/static_map/unique_sequence_test.cu @@ -30,7 +30,9 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys", (int64_t, int64_t)) { constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, -1, -1}; + cuco::static_map map{1'000'000, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); From 666ff5b1002e4dddd5a7c789e7bae389337482bf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Mar 2022 22:25:33 +0000 Subject: [PATCH 14/27] [pre-commit.ci] auto code formatting --- benchmarks/hash_table/static_map_bench.cu | 13 ++-- examples/static_map/custom_type_example.cu | 5 +- examples/static_map/static_map_example.cu | 8 +- include/cuco/detail/dynamic_map.inl | 14 ++-- include/cuco/detail/static_map.inl | 24 +++--- include/cuco/static_map.cuh | 89 ++++++++++++---------- tests/static_map/custom_type_test.cu | 4 +- tests/static_map/erase_test.cu | 4 +- tests/static_map/key_sentinel_test.cu | 5 +- tests/static_map/shared_memory_test.cu | 12 +-- tests/static_map/stream_test.cu | 9 ++- tests/static_map/unique_sequence_test.cu | 5 +- 12 files changed, 105 insertions(+), 87 deletions(-) diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 8c9ea71a0..1e8f7789b 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -90,8 +90,7 @@ static void BM_static_map_insert(::benchmark::State& state) thrust::device_vector d_keys(h_keys); for (auto _ : state) { - map_type map{size, cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}}; + map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; cudaEvent_t start, stop; cudaEventCreate(&start); @@ -121,8 +120,7 @@ static void BM_static_map_search_all(::benchmark::State& state) float occupancy = state.range(1) / float{100}; std::size_t size = num_keys / occupancy; - map_type map{size, cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}}; + map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); @@ -163,9 +161,10 @@ static void BM_static_map_erase_all(::benchmark::State& state) std::size_t size = num_keys / occupancy; // static map with erase support - map_type map{size, cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}, - cuco::sentinel::erased_key{-2}}; + map_type map{size, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::sentinel::erased_key{-2}}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); diff --git a/examples/static_map/custom_type_example.cu b/examples/static_map/custom_type_example.cu index 2e116d0ab..a4be18d7f 100644 --- a/examples/static_map/custom_type_example.cu +++ b/examples/static_map/custom_type_example.cu @@ -91,8 +91,9 @@ int main(void) // Construct a map with 100,000 slots using the given empty key/value sentinels. Note the // capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%. cuco::static_map map{ - 100'000, cuco::sentinel::empty_key{empty_key_sentinel}, - cuco::sentinel::empty_value{empty_value_sentinel}}; + 100'000, + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}}; // Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{}); diff --git a/examples/static_map/static_map_example.cu b/examples/static_map/static_map_example.cu index 31eac21c2..b766cd95c 100644 --- a/examples/static_map/static_map_example.cu +++ b/examples/static_map/static_map_example.cu @@ -32,9 +32,11 @@ int main(void) // for an load factor of 50%. cudaStream_t str; cudaStreamCreate(&str); - cuco::static_map map{ - 100'000, cuco::sentinel::empty_key{empty_key_sentinel}, - cuco::sentinel::empty_value{empty_value_sentinel}, cuco::cuda_allocator{}, str}; + cuco::static_map map{100'000, + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}, + cuco::cuda_allocator{}, + str}; thrust::device_vector> pairs(50'000); diff --git a/include/cuco/detail/dynamic_map.inl b/include/cuco/detail/dynamic_map.inl index 4315bae51..80020232d 100644 --- a/include/cuco/detail/dynamic_map.inl +++ b/include/cuco/detail/dynamic_map.inl @@ -30,9 +30,10 @@ dynamic_map::dynamic_map(std::size_t initial_capac alloc_{alloc} { submaps_.push_back(std::make_unique>( - initial_capacity, - sentinel::empty_key{empty_key_sentinel}, - sentinel::empty_value{empty_value_sentinel}, alloc)); + initial_capacity, + sentinel::empty_key{empty_key_sentinel}, + sentinel::empty_value{empty_value_sentinel}, + alloc)); submap_views_.push_back(submaps_[0]->get_device_view()); submap_mutable_views_.push_back(submaps_[0]->get_device_mutable_view()); @@ -61,9 +62,10 @@ void dynamic_map::reserve(std::size_t n) else { submap_capacity = capacity_; submaps_.push_back(std::make_unique>( - submap_capacity, - sentinel::empty_key{empty_key_sentinel_}, - sentinel::empty_value{empty_value_sentinel_}, alloc_)); + submap_capacity, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + alloc_)); submap_views_.push_back(submaps_[submap_idx]->get_device_view()); submap_mutable_views_.push_back(submaps_[submap_idx]->get_device_mutable_view()); diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 8ed236dd0..6ace92fcd 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -20,11 +20,12 @@ namespace cuco { template -static_map::static_map(std::size_t capacity, - sentinel::empty_key empty_key_sentinel, - sentinel::empty_value empty_value_sentinel, - Allocator const& alloc, - cudaStream_t stream) +static_map::static_map( + std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + Allocator const& alloc, + cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) empty_key_sentinel_{empty_key_sentinel.value}, empty_value_sentinel_{empty_value_sentinel.value}, @@ -44,12 +45,13 @@ static_map::static_map(std::size_t capacity, } template -static_map::static_map(std::size_t capacity, - sentinel::empty_key empty_key_sentinel, - sentinel::empty_value empty_value_sentinel, - sentinel::erased_key erased_key_sentinel, - Allocator const& alloc, - cudaStream_t stream) +static_map::static_map( + std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel, + Allocator const& alloc, + cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) empty_key_sentinel_{empty_key_sentinel.value}, empty_value_sentinel_{empty_value_sentinel.value}, diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 46d9c58a6..a0794d93f 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -41,17 +41,23 @@ #include namespace cuco { - + namespace sentinel { - template - struct empty_key{T value; }; +template +struct empty_key { + T value; +}; - template - struct empty_value{T value; }; - - template - struct erased_key{T value; }; -} // namespace sentinel +template +struct empty_value { + T value; +}; + +template +struct erased_key { + T value; +}; +} // namespace sentinel template class dynamic_map; @@ -82,7 +88,7 @@ class dynamic_map; * in the map. For example, given a range of keys specified by device-accessible * iterators, the bulk `insert` function will insert all keys into the map. Note that in order * for a `static_map` instance to support `erase`, the user must provide an `erased_key_sentinel` - * which is distinct from the `empty_key_sentinel` at construction. If `erase` is called on a + * which is distinct from the `empty_key_sentinel` at construction. If `erase` is called on a * `static_map` which was not constructed in this way, a runtime error will be generated. * * The singular device-side operations allow individual threads to perform @@ -92,7 +98,7 @@ class dynamic_map; * immutable view that allows only non-modifying operations such as `find` or * `contains`. The `mutable_device_view` class only allows `insert` and `erase` operations. * The two types are separate to prevent erroneous concurrent insert/erase/find - * operations. Note that the device-side `erase` may only be called if the corresponding + * operations. Note that the device-side `erase` may only be called if the corresponding * `mutable_device_view` was constructed with a user-provided `erased_key_sentinel`. It is * up to the user to ensure this condition is met. * @@ -105,7 +111,8 @@ class dynamic_map; * // Constructs a map with 100,000 slots using -1 and -1 as the empty key/value * // sentinels. The supplied erased key sentinel of -2 must be a different value from the empty * // key sentinel. If erase functionality is not needed, you may elect to not supply an erased - * // key sentinel to the constructor. Note the capacity is chosen knowing we will insert 50,000 keys, + * // key sentinel to the constructor. Note the capacity is chosen knowing we will insert 50,000 + * keys, * // for an load factor of 50%. * static_map m{100'000, empty_key_sentinel, empty_value_sentinel, erased_value_sentinel}; * @@ -186,8 +193,6 @@ class static_map { return cuco::detail::is_packable(); } - - /** * @brief Construct a fixed-size map with the specified capacity and sentinel values. * @brief Construct a statically sized map with the specified number of slots @@ -717,9 +722,11 @@ class static_map { sentinel::empty_key empty_key_sentinel, sentinel::empty_value empty_value_sentinel, sentinel::erased_key erased_key_sentinel) noexcept - : device_view_base{ - slots, capacity, - empty_key_sentinel.value, empty_value_sentinel.value, erased_key_sentinel.value} + : device_view_base{slots, + capacity, + empty_key_sentinel.value, + empty_value_sentinel.value, + erased_key_sentinel.value} { } @@ -796,11 +803,11 @@ class static_map { { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel.value, empty_value_sentinel.value); - return device_mutable_view{ - slots, capacity, - empty_key_sentinel, - empty_value_sentinel, - sentinel::erased_key{empty_key_sentinel.value}}; + return device_mutable_view{slots, + capacity, + empty_key_sentinel, + empty_value_sentinel, + sentinel::erased_key{empty_key_sentinel.value}}; } /* Features erase support */ @@ -918,8 +925,11 @@ class static_map { sentinel::empty_key empty_key_sentinel, sentinel::empty_value empty_value_sentinel, sentinel::erased_key erased_key_sentinel) noexcept - : device_view_base{ - slots, capacity, empty_key_sentinel.value, empty_value_sentinel.value, erased_key_sentinel.value} + : device_view_base{slots, + capacity, + empty_key_sentinel.value, + empty_value_sentinel.value, + erased_key_sentinel.value} { } @@ -1002,11 +1012,12 @@ class static_map { g.sync(); #endif - return device_view(memory_to_use, - source_device_view.get_capacity(), - sentinel::empty_key{source_device_view.get_empty_key_sentinel()}, - sentinel::empty_value{source_device_view.get_empty_value_sentinel()}, - sentinel::erased_key{source_device_view.get_erased_key_sentinel()}); + return device_view( + memory_to_use, + source_device_view.get_capacity(), + sentinel::empty_key{source_device_view.get_empty_key_sentinel()}, + sentinel::empty_value{source_device_view.get_empty_value_sentinel()}, + sentinel::erased_key{source_device_view.get_erased_key_sentinel()}); } /** @@ -1201,11 +1212,11 @@ class static_map { */ device_view get_device_view() const noexcept { - return device_view( - slots_, capacity_, - sentinel::empty_key{empty_key_sentinel_}, - sentinel::empty_value{empty_value_sentinel_}, - sentinel::erased_key{erased_key_sentinel_}); + return device_view(slots_, + capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } /** @@ -1215,11 +1226,11 @@ class static_map { */ device_mutable_view get_device_mutable_view() const noexcept { - return device_mutable_view( - slots_, capacity_, - sentinel::empty_key{empty_key_sentinel_}, - sentinel::empty_value{empty_value_sentinel_}, - sentinel::erased_key{erased_key_sentinel_}); + return device_mutable_view(slots_, + capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } private: diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index c66539715..2537c818d 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -104,8 +104,8 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; - cuco::static_map map{capacity, - cuco::sentinel::empty_key{sentinel_key}, + cuco::static_map map{capacity, + cuco::sentinel::empty_key{sentinel_key}, cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index f74132efd..95ed32ece 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -29,8 +29,8 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) constexpr std::size_t num_keys = 1'000'000; constexpr std::size_t capacity = 1'100'000; - cuco::static_map map{capacity, - cuco::sentinel::empty_key{-1}, + cuco::static_map map{capacity, + cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}, cuco::sentinel::erased_key{-2}}; diff --git a/tests/static_map/key_sentinel_test.cu b/tests/static_map/key_sentinel_test.cu index 9c801ad74..65ae7624f 100644 --- a/tests/static_map/key_sentinel_test.cu +++ b/tests/static_map/key_sentinel_test.cu @@ -36,9 +36,8 @@ TEMPLATE_TEST_CASE_SIG( using Value = T; constexpr std::size_t num_keys{SIZE}; - cuco::static_map map{SIZE * 2, - cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}}; + cuco::static_map map{ + SIZE * 2, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu index 17d23a08b..9f4b1fc08 100644 --- a/tests/static_map/shared_memory_test.cu +++ b/tests/static_map/shared_memory_test.cu @@ -88,8 +88,8 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map", // operator yet std::vector> maps; for (std::size_t map_id = 0; map_id < number_of_maps; ++map_id) { - maps.push_back(std::make_unique(map_capacity, cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1})); + maps.push_back(std::make_unique( + map_capacity, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1})); } thrust::device_vector d_keys_exist(number_of_maps * elements_in_map); @@ -155,9 +155,11 @@ __global__ void shared_memory_hash_table_kernel(bool* key_found) using map_type = typename cuco::static_map::device_mutable_view; using find_map_type = typename cuco::static_map::device_view; __shared__ typename map_type::slot_type slots[N]; - auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), &slots[0], N, - cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}); + auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), + &slots[0], + N, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}); auto g = cg::this_thread_block(); std::size_t index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/tests/static_map/stream_test.cu b/tests/static_map/stream_test.cu index 26e3c7177..cab215948 100644 --- a/tests/static_map/stream_test.cu +++ b/tests/static_map/stream_test.cu @@ -33,10 +33,11 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", cudaStreamCreate(&stream); constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, - cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}, - cuco::cuda_allocator{}, stream}; + cuco::static_map map{1'000'000, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::cuda_allocator{}, + stream}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/unique_sequence_test.cu b/tests/static_map/unique_sequence_test.cu index 7fc0af866..7feeb8da9 100644 --- a/tests/static_map/unique_sequence_test.cu +++ b/tests/static_map/unique_sequence_test.cu @@ -30,9 +30,8 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys", (int64_t, int64_t)) { constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, - cuco::sentinel::empty_key{-1}, - cuco::sentinel::empty_value{-1}}; + cuco::static_map map{ + 1'000'000, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); From 9781a6b817d88a714b2d9d5162077a97f06e4012 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Fri, 18 Mar 2022 15:32:44 -0700 Subject: [PATCH 15/27] copyright notices corrected --- include/cuco/detail/static_map.inl | 2 +- include/cuco/detail/static_map_kernels.cuh | 2 +- include/cuco/static_map.cuh | 2 +- tests/static_map/erase_test.cu | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 6ace92fcd..e71d4059d 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 4f77c8a91..b6c8491cb 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index a0794d93f..7719eb52d 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 95ed32ece..6fc308d33 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 7faef67e4aeea517e18ebc3b951288b9f68536ff Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Mon, 21 Mar 2022 12:12:54 -0700 Subject: [PATCH 16/27] sentinel wrappers moved to separate file --- include/cuco/detail/error.hpp | 2 +- include/cuco/sentinel.hpp | 36 ++++++++++++++++++++++++++++++++++ include/cuco/static_map.cuh | 18 +---------------- tests/static_map/erase_test.cu | 2 +- 4 files changed, 39 insertions(+), 19 deletions(-) create mode 100644 include/cuco/sentinel.hpp diff --git a/include/cuco/detail/error.hpp b/include/cuco/detail/error.hpp index 46631cf11..bb5f67e6a 100644 --- a/include/cuco/detail/error.hpp +++ b/include/cuco/detail/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/cuco/sentinel.hpp b/include/cuco/sentinel.hpp new file mode 100644 index 000000000..e5809ab24 --- /dev/null +++ b/include/cuco/sentinel.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace cuco { +namespace sentinel { + +template +struct empty_key { + T value; +}; + +template +struct empty_value { + T value; +}; + +template +struct erased_key { + T value; +}; + +} // namespace sentinel +} // namespace cuco \ No newline at end of file diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 7719eb52d..42b0723e6 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -35,6 +35,7 @@ #include #endif +#include #include #include #include @@ -42,23 +43,6 @@ namespace cuco { -namespace sentinel { -template -struct empty_key { - T value; -}; - -template -struct empty_value { - T value; -}; - -template -struct erased_key { - T value; -}; -} // namespace sentinel - template class dynamic_map; diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu index 6fc308d33..fa91adda2 100644 --- a/tests/static_map/erase_test.cu +++ b/tests/static_map/erase_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From c9a70141360e361b94ed7e35d3b6b2ead82b108f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Mar 2022 19:13:29 +0000 Subject: [PATCH 17/27] [pre-commit.ci] auto code formatting --- include/cuco/sentinel.hpp | 4 ++-- include/cuco/static_map.cuh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/cuco/sentinel.hpp b/include/cuco/sentinel.hpp index e5809ab24..024ba17be 100644 --- a/include/cuco/sentinel.hpp +++ b/include/cuco/sentinel.hpp @@ -32,5 +32,5 @@ struct erased_key { T value; }; -} // namespace sentinel -} // namespace cuco \ No newline at end of file +} // namespace sentinel +} // namespace cuco \ No newline at end of file diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 42b0723e6..1518d6925 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -35,11 +35,11 @@ #include #endif -#include #include #include #include #include +#include namespace cuco { From c0d713cd36de68c3db72a9b04f5d3a231e5b6e42 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Tue, 22 Mar 2022 09:48:09 -0700 Subject: [PATCH 18/27] sentinel constructors added for ctad support --- examples/static_map/custom_type_example.cu | 4 ++-- include/cuco/sentinel.hpp | 5 +++++ tests/static_map/custom_type_test.cu | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/static_map/custom_type_example.cu b/examples/static_map/custom_type_example.cu index a4be18d7f..e9044c0cf 100644 --- a/examples/static_map/custom_type_example.cu +++ b/examples/static_map/custom_type_example.cu @@ -92,8 +92,8 @@ int main(void) // capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%. cuco::static_map map{ 100'000, - cuco::sentinel::empty_key{empty_key_sentinel}, - cuco::sentinel::empty_value{empty_value_sentinel}}; + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}}; // Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{}); diff --git a/include/cuco/sentinel.hpp b/include/cuco/sentinel.hpp index 024ba17be..53814c438 100644 --- a/include/cuco/sentinel.hpp +++ b/include/cuco/sentinel.hpp @@ -14,21 +14,26 @@ * limitations under the License. */ +#pragma once + namespace cuco { namespace sentinel { template struct empty_key { + __host__ __device__ empty_key(T v) : value{v} {} T value; }; template struct empty_value { + __host__ __device__ empty_value(T v) : value{v} {} T value; }; template struct erased_key { + __host__ __device__ erased_key(T v) : value{v} {} T value; }; diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index 2537c818d..d7c76f8b5 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -105,8 +105,8 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; cuco::static_map map{capacity, - cuco::sentinel::empty_key{sentinel_key}, - cuco::sentinel::empty_value{sentinel_value}}; + cuco::sentinel::empty_key{sentinel_key}, + cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); thrust::device_vector insert_values(num); From d92898ca1c8092f436fc3ba6e25277995e3af6ec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Mar 2022 17:03:20 +0000 Subject: [PATCH 19/27] [pre-commit.ci] auto code formatting --- tests/static_map/custom_type_test.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index d7c76f8b5..5f9df68fe 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -104,9 +104,8 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; - cuco::static_map map{capacity, - cuco::sentinel::empty_key{sentinel_key}, - cuco::sentinel::empty_value{sentinel_value}}; + cuco::static_map map{ + capacity, cuco::sentinel::empty_key{sentinel_key}, cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); thrust::device_vector insert_values(num); From 5bca985018e652d44f5d9d9a5da67fc5ca39e502 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 23 Mar 2022 11:06:12 -0700 Subject: [PATCH 20/27] CI fix --- tests/static_map/custom_type_test.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index 5f9df68fe..2537c818d 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -104,8 +104,9 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; - cuco::static_map map{ - capacity, cuco::sentinel::empty_key{sentinel_key}, cuco::sentinel::empty_value{sentinel_value}}; + cuco::static_map map{capacity, + cuco::sentinel::empty_key{sentinel_key}, + cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); thrust::device_vector insert_values(num); From 60e4b739ff84e399e883b129926de8295e6bb0bc Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Wed, 23 Mar 2022 17:29:39 -0700 Subject: [PATCH 21/27] switch to fetch_add for erase kernels --- include/cuco/detail/static_map.inl | 11 +++++++---- include/cuco/detail/static_map_kernels.cuh | 8 ++++++-- include/cuco/static_map.cuh | 8 +++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index e71d4059d..59734f0e9 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -59,6 +59,10 @@ static_map::static_map( slot_allocator_{alloc}, counter_allocator_{alloc} { + CUCO_RUNTIME_EXPECTS( + empty_key_sentinel_ != erased_key_sentinel_, + "The empty key sentinel and erased key sentinel cannot be the same value."); + slots_ = std::allocator_traits::allocate(slot_allocator_, capacity_); num_successes_ = std::allocator_traits::allocate(counter_allocator_, 1); @@ -148,10 +152,9 @@ template void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { - if (get_empty_key_sentinel() == get_erased_key_sentinel()) - CUCO_RUNTIME_EXPECTS( - get_empty_key_sentinel() != get_erased_key_sentinel(), - "You must provide a unique erased key sentinel value at map construction."); + CUCO_RUNTIME_EXPECTS( + get_empty_key_sentinel() != get_erased_key_sentinel(), + "You must provide a unique erased key sentinel value at map construction."); auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index b6c8491cb..909849d8c 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -180,7 +180,9 @@ __global__ void erase( // compute number of successfully inserted elements for each block // and atomically add to the grand total std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); - if (threadIdx.x == 0) { *num_successes += block_num_successes; } + if (threadIdx.x == 0) { + num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); + } } template fetch_add(block_num_successes, cuda::std::memory_order_relaxed); + } } /** diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 1518d6925..6060951fa 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -208,8 +208,11 @@ class static_map { cudaStream_t stream = 0); /** - * @brief Construct a fixed-size map with erase capability + * @brief Constructs a fixed-size map with erase capability. * empty_key_sentinel and erased_key_sentinel must be different values. + * + * @throw std::runtime error if the empty key sentinel and erased key sentinel + * are the same value */ static_map(std::size_t capacity, sentinel::empty_key empty_key_sentinel, @@ -301,6 +304,9 @@ class static_map { * @param hash The unary function to apply to hash each key * @param key_equal The binary function to compare two keys for equality * @param stream Stream used for executing the kernels + * + * @throw std::runtime_error if a unique erased key sentinel value was not + * provided at construction */ template , From 4feef9cfe19cce538f42bff6e46974a2b93b9b2e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Mar 2022 00:30:49 +0000 Subject: [PATCH 22/27] [pre-commit.ci] auto code formatting --- include/cuco/detail/static_map.inl | 10 ++++------ include/cuco/detail/static_map_kernels.cuh | 4 ++-- include/cuco/static_map.cuh | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 59734f0e9..c585c6bed 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -59,9 +59,8 @@ static_map::static_map( slot_allocator_{alloc}, counter_allocator_{alloc} { - CUCO_RUNTIME_EXPECTS( - empty_key_sentinel_ != erased_key_sentinel_, - "The empty key sentinel and erased key sentinel cannot be the same value."); + CUCO_RUNTIME_EXPECTS(empty_key_sentinel_ != erased_key_sentinel_, + "The empty key sentinel and erased key sentinel cannot be the same value."); slots_ = std::allocator_traits::allocate(slot_allocator_, capacity_); num_successes_ = std::allocator_traits::allocate(counter_allocator_, 1); @@ -152,9 +151,8 @@ template void static_map::erase( InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) { - CUCO_RUNTIME_EXPECTS( - get_empty_key_sentinel() != get_erased_key_sentinel(), - "You must provide a unique erased key sentinel value at map construction."); + CUCO_RUNTIME_EXPECTS(get_empty_key_sentinel() != get_erased_key_sentinel(), + "You must provide a unique erased key sentinel value at map construction."); auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 909849d8c..d1c2ac5c1 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -180,7 +180,7 @@ __global__ void erase( // compute number of successfully inserted elements for each block // and atomically add to the grand total std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); - if (threadIdx.x == 0) { + if (threadIdx.x == 0) { num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); } } @@ -213,7 +213,7 @@ __global__ void erase( // compute number of successfully inserted elements for each block // and atomically add to the grand total std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); - if (threadIdx.x == 0) { + if (threadIdx.x == 0) { num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); } } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 6060951fa..e5db13664 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -211,7 +211,7 @@ class static_map { * @brief Constructs a fixed-size map with erase capability. * empty_key_sentinel and erased_key_sentinel must be different values. * - * @throw std::runtime error if the empty key sentinel and erased key sentinel + * @throw std::runtime error if the empty key sentinel and erased key sentinel * are the same value */ static_map(std::size_t capacity, @@ -305,7 +305,7 @@ class static_map { * @param key_equal The binary function to compare two keys for equality * @param stream Stream used for executing the kernels * - * @throw std::runtime_error if a unique erased key sentinel value was not + * @throw std::runtime_error if a unique erased key sentinel value was not * provided at construction */ template Date: Fri, 1 Apr 2022 23:19:31 -0700 Subject: [PATCH 23/27] minor doc/style changes --- examples/static_map/static_map_example.cu | 4 ++-- include/cuco/static_map.cuh | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/static_map/static_map_example.cu b/examples/static_map/static_map_example.cu index b766cd95c..018c1278e 100644 --- a/examples/static_map/static_map_example.cu +++ b/examples/static_map/static_map_example.cu @@ -33,8 +33,8 @@ int main(void) cudaStream_t str; cudaStreamCreate(&str); cuco::static_map map{100'000, - cuco::sentinel::empty_key{empty_key_sentinel}, - cuco::sentinel::empty_value{empty_value_sentinel}, + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}, cuco::cuda_allocator{}, str}; diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index e5db13664..306be99c8 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -293,6 +293,15 @@ class static_map { /** * @brief Erases keys in the range `[first, last)`. * + * For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's + * associated value from the map. Else, no effect. + * + * Side-effects: + * - `contains(k) == false` + * - `find(k) == end()` + * - `insert({k,v}) == true` + * - `get_size()` is reduced by the total number of erased keys + * * This function synchronizes `stream`. * * @tparam InputIt Device accessible input iterator whose `value_type` is From dc61121fc759544ae49b05108c367704c0bd6069 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Apr 2022 06:20:50 +0000 Subject: [PATCH 24/27] [pre-commit.ci] auto code formatting --- include/cuco/static_map.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 306be99c8..3c2cc14a7 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -293,8 +293,8 @@ class static_map { /** * @brief Erases keys in the range `[first, last)`. * - * For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's - * associated value from the map. Else, no effect. + * For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's + * associated value from the map. Else, no effect. * * Side-effects: * - `contains(k) == false` From 753641c7496d06071f696e2786ded52804cf4415 Mon Sep 17 00:00:00 2001 From: Nico Iskos Date: Tue, 5 Apr 2022 12:48:00 -0700 Subject: [PATCH 25/27] prevent implicit conversion of sentinels during construction --- include/cuco/static_map.cuh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 3c2cc14a7..9684bb4bb 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -164,6 +164,17 @@ class static_map { static_map(static_map const&) = delete; static_map(static_map&&) = delete; + + template + static_map(std::size_t, T1 , T2, + Allocator const& = Allocator{}, + cudaStream_t = 0) = delete; + + template + static_map(std::size_t, T1, T2, T3, + Allocator const& = Allocator{}, + cudaStream_t = 0) = delete; + static_map& operator=(static_map const&) = delete; static_map& operator=(static_map&&) = delete; From 03c089c0d27871fc0b2ebe0a7f731114220a0240 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Apr 2022 19:49:58 +0000 Subject: [PATCH 26/27] [pre-commit.ci] auto code formatting --- include/cuco/static_map.cuh | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 9684bb4bb..7aea22de5 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -164,17 +164,13 @@ class static_map { static_map(static_map const&) = delete; static_map(static_map&&) = delete; - - template - static_map(std::size_t, T1 , T2, - Allocator const& = Allocator{}, - cudaStream_t = 0) = delete; - - template - static_map(std::size_t, T1, T2, T3, - Allocator const& = Allocator{}, - cudaStream_t = 0) = delete; - + + template + static_map(std::size_t, T1, T2, Allocator const& = Allocator{}, cudaStream_t = 0) = delete; + + template + static_map(std::size_t, T1, T2, T3, Allocator const& = Allocator{}, cudaStream_t = 0) = delete; + static_map& operator=(static_map const&) = delete; static_map& operator=(static_map&&) = delete; From 7b841a839f8420ae63a308b6fc721eeb61886ad8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 6 May 2022 09:59:49 -0400 Subject: [PATCH 27/27] Fix a sentinel bug in test --- tests/static_map/duplicate_keys_test.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/static_map/duplicate_keys_test.cu b/tests/static_map/duplicate_keys_test.cu index 5cdfbfe7f..edfc9eca8 100644 --- a/tests/static_map/duplicate_keys_test.cu +++ b/tests/static_map/duplicate_keys_test.cu @@ -34,7 +34,8 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys", (int64_t, int64_t)) { constexpr std::size_t num_keys{500'000}; - cuco::static_map map{num_keys * 2, -1, -1}; + cuco::static_map map{ + num_keys * 2, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view();