Skip to content

Commit

Permalink
[pre-commit.ci] auto code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Apr 7, 2022
1 parent 3906909 commit 5c37654
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 56 deletions.
37 changes: 19 additions & 18 deletions include/cuco/detail/dynamic_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -190,40 +190,41 @@ void dynamic_map<Key, Value, Scope, Allocator>::erase(InputIt first,
CUCO_CUDA_TRY(cudaMemset(num_successes_, 0, sizeof(atomic_ctr_type)));

// zero out submap success counters
if(submaps_.size() > 1) {
if (submaps_.size() > 1) {
static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type));
for(int i = 0; i < submaps_.size(); ++i) {
for (int i = 0; i < submaps_.size(); ++i) {
CUCO_CUDA_TRY(cudaMemset(submap_num_successes_[i], 0, sizeof(atomic_ctr_type)));
}
}

auto const temp_storage_size = submaps_.size() * sizeof(unsigned long long);

detail::erase<block_size, tile_size, cuco::pair_type<key_type, mapped_type>>
<<<grid_size, block_size, temp_storage_size>>>(
first,
first + num_keys,
submap_views_.data().get(),
submap_mutable_views_.data().get(),
num_successes_,
d_submap_num_successes_.data().get(),
submaps_.size(),
hash,
key_equal);
<<<grid_size, block_size, temp_storage_size>>>(first,
first + num_keys,
submap_views_.data().get(),
submap_mutable_views_.data().get(),
num_successes_,
d_submap_num_successes_.data().get(),
submaps_.size(),
hash,
key_equal);

// update total dynamic map size
std::size_t h_num_successes;
CUCO_CUDA_TRY(
cudaMemcpy(&h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost));
size_ -= h_num_successes;
if(submaps_.size() == 1) {

if (submaps_.size() == 1) {
submaps_[0]->size_ -= h_num_successes;
} else {
for(int i = 0; i < submaps_.size(); ++i) {
for (int i = 0; i < submaps_.size(); ++i) {
std::size_t h_submap_num_successes;
CUCO_CUDA_TRY(cudaMemcpy(
&h_submap_num_successes, submap_num_successes_[i], sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost));
CUCO_CUDA_TRY(cudaMemcpy(&h_submap_num_successes,
submap_num_successes_[i],
sizeof(atomic_ctr_type),
cudaMemcpyDeviceToHost));
submaps_[i]->size_ -= h_submap_num_successes;
}
}
Expand Down
60 changes: 29 additions & 31 deletions include/cuco/detail/dynamic_map_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,14 @@ __global__ void erase(InputIt first,

std::size_t thread_num_successes = 0;

auto tid = block_size * blockIdx.x + threadIdx.x;
auto it = first + tid;
auto tid = block_size * blockIdx.x + threadIdx.x;
auto it = first + tid;

if(num_submaps > 1) {
for(int i = threadIdx.x; i < num_submaps; i += block_size)
if (num_submaps > 1) {
for (int i = threadIdx.x; i < num_submaps; i += block_size)
submap_block_num_successes[i] = 0;
__syncthreads();

while (it < last) {
int i;
for (i = 0; i < num_submaps; ++i) {
Expand All @@ -224,8 +224,7 @@ __global__ void erase(InputIt first,
}
} else {
while (it < last) {
if(submap_mutable_views[0].erase(*it, hash, key_equal))
thread_num_successes++;
if (submap_mutable_views[0].erase(*it, hash, key_equal)) thread_num_successes++;
it += gridDim.x * blockDim.x;
}
}
Expand All @@ -235,11 +234,11 @@ __global__ void erase(InputIt first,
num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed);
}

if(num_submaps > 1) {
for(int i = 0; i < num_submaps; ++i) {
if(threadIdx.x == 0) {
submap_num_successes[i]->fetch_add(
static_cast<std::size_t>(submap_block_num_successes[i]), cuda::std::memory_order_relaxed);
if (num_submaps > 1) {
for (int i = 0; i < num_submaps; ++i) {
if (threadIdx.x == 0) {
submap_num_successes[i]->fetch_add(static_cast<std::size_t>(submap_block_num_successes[i]),
cuda::std::memory_order_relaxed);
}
}
}
Expand All @@ -255,14 +254,14 @@ template <uint32_t block_size,
typename Hash,
typename KeyEqual>
__global__ void erase(InputIt first,
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT* num_successes,
atomicT** submap_num_successes,
const uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT* num_successes,
atomicT** submap_num_successes,
const uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
typedef cub::BlockReduce<std::size_t, block_size> BlockReduce;
__shared__ typename BlockReduce::TempStorage temp_storage;
Expand All @@ -274,13 +273,13 @@ __global__ void erase(InputIt first,
auto tid = block_size * blockIdx.x + threadIdx.x;
auto it = first + tid / tile_size;

if(num_submaps > 1) {
for(int i = threadIdx.x; i < num_submaps; i += block_size)
if (num_submaps > 1) {
for (int i = threadIdx.x; i < num_submaps; i += block_size)
submap_block_num_successes[i] = 0;
__syncthreads();

while (it < last) {
auto erased = false;
auto erased = false;
int i;
for (i = 0; i < num_submaps; ++i) {
erased = submap_mutable_views[i].erase(tile, *it, hash, key_equal);
Expand All @@ -295,8 +294,7 @@ __global__ void erase(InputIt first,
} else {
while (it < last) {
auto erased = submap_mutable_views[0].erase(tile, *it, hash, key_equal);
if (erased && tile.thread_rank() == 0)
thread_num_successes++;
if (erased && tile.thread_rank() == 0) thread_num_successes++;

it += (gridDim.x * blockDim.x) / tile_size;
}
Expand All @@ -307,11 +305,11 @@ __global__ void erase(InputIt first,
num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed);
}

if(num_submaps > 1) {
for(int i = 0; i < num_submaps; ++i) {
if(threadIdx.x == 0) {
submap_num_successes[i]->fetch_add(
static_cast<std::size_t>(submap_block_num_successes[i]), cuda::std::memory_order_relaxed);
if (num_submaps > 1) {
for (int i = 0; i < num_submaps; ++i) {
if (threadIdx.x == 0) {
submap_num_successes[i]->fetch_add(static_cast<std::size_t>(submap_block_num_successes[i]),
cuda::std::memory_order_relaxed);
}
}
}
Expand Down
7 changes: 4 additions & 3 deletions include/cuco/dynamic_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@

#include <cooperative_groups.h>
#include <cub/cub.cuh>
#include <cuco/detail/dynamic_map_kernels.cuh>
#include <cuco/detail/error.hpp>
#include <cuco/static_map.cuh>
#include <cuda/std/atomic>
#include <cuco/detail/dynamic_map_kernels.cuh>
#include <thrust/device_vector.h>

namespace cuco {
Expand Down Expand Up @@ -277,8 +277,9 @@ class dynamic_map {
submap_mutable_views_; ///< vector of mutable device views for each submap
std::size_t min_insert_size_{}; ///< min remaining capacity of submap for insert
atomic_ctr_type* num_successes_; ///< number of successfully inserted keys on insert
std::vector<atomic_ctr_type*> submap_num_successes_; ///< number of succesfully erased keys for each submap
thrust::device_vector<atomic_ctr_type*> d_submap_num_successes_;
std::vector<atomic_ctr_type*>
submap_num_successes_; ///< number of succesfully erased keys for each submap
thrust::device_vector<atomic_ctr_type*> d_submap_num_successes_;
Allocator alloc_{}; ///< Allocator passed to submaps to allocate their device storage
counter_allocator_type counter_allocator_{}; ///< Allocator used to allocate `num_successes_`
};
Expand Down
9 changes: 5 additions & 4 deletions tests/dynamic_map/erase_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t))
map.insert(pairs_begin2, pairs_begin2 + 4 * num_keys);

// map should resize twice if the erased slots are successfully reused
REQUIRE(map.get_capacity() == 8*num_keys);
REQUIRE(map.get_capacity() == 8 * num_keys);
// check that keys can be successfully deleted from only the first and second submaps
map.erase(d_keys2.begin(), d_keys2.begin() + 2*num_keys);
map.erase(d_keys2.begin(), d_keys2.begin() + 2 * num_keys);
map.contains(d_keys2.begin(), d_keys2.end(), d_keys_exist2.begin());

REQUIRE(cuco::test::none_of(d_keys_exist2.begin(),
Expand All @@ -120,8 +120,9 @@ TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t))
d_keys_exist2.end(),
[] __device__(const bool key_found) { return key_found; }));

REQUIRE(map.get_size() == 2*num_keys);
// check that keys can be successfully deleted from all submaps (some will be unsuccessful erases)
REQUIRE(map.get_size() == 2 * num_keys);
// check that keys can be successfully deleted from all submaps (some will be unsuccessful
// erases)
map.erase(d_keys2.begin(), d_keys2.end());

map.contains(d_keys2.begin(), d_keys2.end(), d_keys_exist2.begin());
Expand Down

0 comments on commit 5c37654

Please sign in to comment.