From 69f2ce6e0a46de47d3043375afb89ad671676519 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 11 Nov 2021 13:32:52 -0800 Subject: [PATCH 1/8] Add static_map::insert_if. --- include/cuco/detail/static_map.inl | 33 +++++++++++++ include/cuco/detail/static_map_kernels.cuh | 54 ++++++++++++++++++++++ include/cuco/static_map.cuh | 29 ++++++++++++ 3 files changed, 116 insertions(+) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 15d3aa17b..8c6bb12e0 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -76,6 +76,39 @@ void static_map::insert(InputIt first, size_ += h_num_successes; } +template +template +void static_map::insert_if(InputIt first, + InputIt last, + StencilIt stencil, + Predicate pred, + Hash hash, + KeyEqual key_equal) +{ + auto num_keys = std::distance(first, last); + if (num_keys == 0) { return; } + + auto const block_size = 128; + auto const stride = 1; + auto const tile_size = 4; + auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); + auto view = get_device_mutable_view(); + + // TODO: memset an atomic variable is unsafe + static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); + CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type))); + std::size_t h_num_successes; + + // TODO: Should I specialize the version with a tile size? + detail::insert_if + <<>>(first, first + num_keys, num_successes_, view, stencil, pred, hash, key_equal); + CUCO_CUDA_TRY(cudaMemcpyAsync( + &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost)); + CUCO_CUDA_TRY(cudaDeviceSynchronize()); + + size_ += h_num_successes; +} + template template void static_map::find( diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index e166de3c6..12e855f08 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -156,6 +156,60 @@ __global__ void insert( if (threadIdx.x == 0) { *num_successes += block_num_successes; } } +/** + * @brief Inserts all key/value pairs in the range `[first, last)`. + * + * If multiple keys in `[first, last)` compare equal, it is unspecified which + * element is inserted. + * + * @tparam block_size + * @tparam InputIt Device accessible input iterator whose `value_type` is + * convertible to the map's `value_type` + * @tparam atomicT Type of atomic storage + * @tparam viewT Type of device view allowing access of hash map storage + * @tparam Hash Unary callable type + * @tparam KeyEqual Binary callable type + * @param first Beginning of the sequence of key/value pairs + * @param last End of the sequence of key/value pairs + * @param num_successes The number of successfully inserted key/value pairs + * @param view Mutable device view used to access the hash map's slot storage + * @param hash The unary function to apply to hash each key + * @param key_equal The binary function used to compare two keys for equality + */ +template +__global__ void insert_if( + InputIt first, InputIt last, atomicT* num_successes, viewT view, StencilIt stencil, Predicate pred, Hash hash, KeyEqual key_equal) +{ + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t thread_num_successes = 0; + + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid; + auto i = tid; + + while (it < last) { + if (pred(*(stencil + i))) { + typename viewT::value_type const insert_pair{*it}; + if (view.insert(insert_pair, hash, key_equal)) { thread_num_successes++; } + it += gridDim.x * block_size; + } + ++i; + } + + // compute number of successfully inserted elements for each block + // and atomically add to the grand total + std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); + if (threadIdx.x == 0) { *num_successes += block_num_successes; } +} + /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. * diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index d45a971ab..cbbdfa3aa 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -221,6 +221,35 @@ class static_map { typename KeyEqual = thrust::equal_to> void insert(InputIt first, InputIt last, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}); + /** + * @brief Inserts key/value pairs in the range `[first, last)` if `pred` + * of the corresponding stencil returns true. + * + * The key/value pair `*(first + i)` is inserted if `pred( *(stencil + i) )` returns true. + * + * @tparam InputIt Device accessible random access iterator whose `value_type` is + * convertible to the map's `value_type` + * @tparam StencilIt Device accessible random access iterator whose value_type is + * convertible to Predicate's argument type + * @tparam Predicate Unary predicate callable whose return type must be convertible to `bool` and + * argument type is convertible from `std::iterator_traits::value_type`. + * @tparam Hash Unary callable type + * @tparam KeyEqual Binary callable type + * @param first Beginning of the sequence of key/value pairs + * @param last End of the sequence of key/value pairs + * @param stencil Beginning of the stencil sequence + * @param pred Predicate to test on every element in the range `[stencil, stencil + + * std::distance(first, last))` + * @param stream CUDA stream used for insert + */ + template , + typename KeyEqual = thrust::equal_to> + void insert_if( + InputIt first, InputIt last, StencilIt stencil, Predicate pred, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}); + /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. * From e348ada2960840185aeac529ba47cea9eb1cdae9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 11 Nov 2021 15:12:09 -0800 Subject: [PATCH 2/8] Add test of conditional insertion and fix bug. --- include/cuco/detail/static_map_kernels.cuh | 4 ++-- tests/static_map/static_map_test.cu | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 12e855f08..dc80cb4c1 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -199,9 +199,9 @@ __global__ void insert_if( if (pred(*(stencil + i))) { typename viewT::value_type const insert_pair{*it}; if (view.insert(insert_pair, hash, key_equal)) { thread_num_successes++; } - it += gridDim.x * block_size; } - ++i; + it += gridDim.x * block_size; + i += gridDim.x * block_size; } // compute number of successfully inserted elements for each block diff --git a/tests/static_map/static_map_test.cu b/tests/static_map/static_map_test.cu index 6a1de1012..db3bc4b3e 100644 --- a/tests/static_map/static_map_test.cu +++ b/tests/static_map/static_map_test.cu @@ -178,6 +178,21 @@ TEST_CASE("User defined key and value type", "") REQUIRE(all_of(contained.begin(), contained.end(), [] __device__(bool const& b) { return b; })); } + SECTION("All conditionally inserted keys-value pairs should be contained") + { + thrust::device_vector contained(num_pairs); + map.insert_if(insert_pairs, insert_pairs + num_pairs, thrust::counting_iterator(0), + [] __device__(auto const& key) { return (key % 2) == 0; }, hash_key_pair{}, key_pair_equals{}); + map.contains(insert_keys.begin(), + insert_keys.end(), + contained.begin(), + hash_key_pair{}, + key_pair_equals{}); + + REQUIRE(thrust::equal(thrust::device, contained.begin(), contained.end(), thrust::counting_iterator(0), + [] __device__(auto const& idx_contained, auto const& idx) { return ((idx % 2) == 0) == idx_contained; })); + } + SECTION("Non-inserted keys-value pairs should not be contained") { thrust::device_vector contained(num_pairs); From 8891da5475f95fd3f53fd7dc75bc1bfba4372ebb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 16 Nov 2021 16:40:33 -0800 Subject: [PATCH 3/8] Change insert to insert_if_n. --- include/cuco/detail/static_map.inl | 4 ++-- include/cuco/detail/static_map_kernels.cuh | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 8c6bb12e0..e1c2d0a21 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -100,8 +100,8 @@ void static_map::insert_if(InputIt first, std::size_t h_num_successes; // TODO: Should I specialize the version with a tile size? - detail::insert_if - <<>>(first, first + num_keys, num_successes_, view, stencil, pred, hash, key_equal); + detail::insert_if_n + <<>>(first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost)); CUCO_CUDA_TRY(cudaDeviceSynchronize()); diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index dc80cb4c1..018dba4d9 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -184,23 +184,21 @@ template -__global__ void insert_if( - InputIt first, InputIt last, atomicT* num_successes, viewT view, StencilIt stencil, Predicate pred, Hash hash, KeyEqual key_equal) +__global__ void insert_if_n( + InputIt first, std::size_t n, atomicT* num_successes, viewT view, StencilIt stencil, Predicate pred, Hash hash, KeyEqual key_equal) { typedef cub::BlockReduce BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; std::size_t thread_num_successes = 0; auto tid = block_size * blockIdx.x + threadIdx.x; - auto it = first + tid; auto i = tid; - while (it < last) { + while (i < n) { if (pred(*(stencil + i))) { - typename viewT::value_type const insert_pair{*it}; + typename viewT::value_type const insert_pair{*(first + i)}; if (view.insert(insert_pair, hash, key_equal)) { thread_num_successes++; } } - it += gridDim.x * block_size; i += gridDim.x * block_size; } From c31ee58983033567162075846c17dafd1ac4eab8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 16 Nov 2021 17:14:59 -0800 Subject: [PATCH 4/8] Address most PR comments. --- include/cuco/detail/static_map.inl | 17 +++++++++-------- include/cuco/static_map.cuh | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index e1c2d0a21..c632c1052 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -83,28 +83,29 @@ void static_map::insert_if(InputIt first, StencilIt stencil, Predicate pred, Hash hash, - KeyEqual key_equal) + KeyEqual key_equal, + cudaStream_t stream) { auto num_keys = std::distance(first, last); if (num_keys == 0) { return; } - auto const block_size = 128; - auto const stride = 1; - auto const tile_size = 4; + auto constexpr block_size = 128; + auto constexpr stride = 1; + auto constexpr tile_size = 4; auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); auto view = get_device_mutable_view(); // TODO: memset an atomic variable is unsafe static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); - CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type))); + CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); std::size_t h_num_successes; // TODO: Should I specialize the version with a tile size? detail::insert_if_n - <<>>(first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); + <<>>(first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( - &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost)); - CUCO_CUDA_TRY(cudaDeviceSynchronize()); + &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); + CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); size_ += h_num_successes; } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index cbbdfa3aa..2c79d4868 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -248,7 +248,7 @@ class static_map { typename Hash = cuco::detail::MurmurHash3_32, typename KeyEqual = thrust::equal_to> void insert_if( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}); + InputIt first, InputIt last, StencilIt stencil, Predicate pred, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}, cudaStream_t stream = 0); /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. From 2bb6c017dae3cdad9ae1fbe1d9dd4eff42e90118 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Nov 2021 09:30:59 -0800 Subject: [PATCH 5/8] Switch to using CG device API. --- include/cuco/detail/static_map.inl | 2 +- include/cuco/detail/static_map_kernels.cuh | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index c632c1052..7d844917c 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -101,7 +101,7 @@ void static_map::insert_if(InputIt first, std::size_t h_num_successes; // TODO: Should I specialize the version with a tile size? - detail::insert_if_n + detail::insert_if_n <<>>(first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 018dba4d9..d1202a243 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -177,6 +177,7 @@ __global__ void insert( * @param key_equal The binary function used to compare two keys for equality */ template (cg::this_thread_block()); auto tid = block_size * blockIdx.x + threadIdx.x; - auto i = tid; + auto i = tid / tile_size; while (i < n) { if (pred(*(stencil + i))) { typename viewT::value_type const insert_pair{*(first + i)}; - if (view.insert(insert_pair, hash, key_equal)) { thread_num_successes++; } + if (view.insert(tile, insert_pair, hash, key_equal)) { thread_num_successes++; } } i += gridDim.x * block_size; } From 5da364f9eec0b02d2a1a58981e09a33484e3b255 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Nov 2021 12:05:42 -0800 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Yunsong Wang --- include/cuco/detail/static_map_kernels.cuh | 20 +++++++++++++++----- include/cuco/static_map.cuh | 2 ++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index d1202a243..e02523b1c 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -157,22 +157,30 @@ __global__ void insert( } /** - * @brief Inserts all key/value pairs in the range `[first, last)`. + * @brief Inserts key/value pairs in the range `[first, first + n)` if `pred` of the + * corresponding stencil returns true. * * If multiple keys in `[first, last)` compare equal, it is unspecified which * element is inserted. * - * @tparam block_size + * @tparam block_size The size of the thread block + * @tparam tile_size The number of threads in the Cooperative Groups used to perform insert * @tparam InputIt Device accessible input iterator whose `value_type` is * convertible to the map's `value_type` * @tparam atomicT Type of atomic storage * @tparam viewT Type of device view allowing access of hash map storage + * @tparam StencilIt Device accessible random access iterator whose value_type is + * convertible to Predicate's argument type + * @tparam Predicate Unary predicate callable whose return type must be convertible to `bool` + * and argument type is convertible from `std::iterator_traits::value_type` * @tparam Hash Unary callable type * @tparam KeyEqual Binary callable type * @param first Beginning of the sequence of key/value pairs - * @param last End of the sequence of key/value pairs + * @param n Number of elements to insert * @param num_successes The number of successfully inserted key/value pairs * @param view Mutable device view used to access the hash map's slot storage + * @param stencil Beginning of the stencil sequence + * @param pred Predicate to test on every element in the range `[s, s + n)` * @param hash The unary function to apply to hash each key * @param key_equal The binary function used to compare two keys for equality */ @@ -201,13 +209,15 @@ __global__ void insert_if_n( typename viewT::value_type const insert_pair{*(first + i)}; if (view.insert(tile, insert_pair, hash, key_equal)) { thread_num_successes++; } } - i += gridDim.x * block_size; + i += (gridDim.x * block_size) / tile_size; } // compute number of successfully inserted elements for each block // and atomically add to the grand total std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); - if (threadIdx.x == 0) { *num_successes += block_num_successes; } + if (threadIdx.x == 0) { + num_matches->fetch_add(block_num_matches, cuda::std::memory_order_relaxed); + } } /** diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 2c79d4868..5c6367f44 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -240,6 +240,8 @@ class static_map { * @param stencil Beginning of the stencil sequence * @param pred Predicate to test on every element in the range `[stencil, stencil + * std::distance(first, last))` + * @param hash The unary function to hash each key + * @param key_equal The binary function to compare two keys for equality * @param stream CUDA stream used for insert */ template Date: Wed, 17 Nov 2021 12:08:53 -0800 Subject: [PATCH 7/8] Apply clang-format. --- include/cuco/detail/static_map.inl | 14 +++++++++----- include/cuco/detail/static_map_kernels.cuh | 18 ++++++++++++------ include/cuco/static_map.cuh | 9 +++++++-- tests/static_map/static_map_test.cu | 18 ++++++++++++++---- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index 7d844917c..c236973e1 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -77,7 +77,11 @@ void static_map::insert(InputIt first, } template -template +template void static_map::insert_if(InputIt first, InputIt last, StencilIt stencil, @@ -92,8 +96,8 @@ void static_map::insert_if(InputIt first, auto constexpr block_size = 128; auto constexpr stride = 1; auto constexpr tile_size = 4; - auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); - auto view = get_device_mutable_view(); + auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); + auto view = get_device_mutable_view(); // TODO: memset an atomic variable is unsafe static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); @@ -101,8 +105,8 @@ void static_map::insert_if(InputIt first, std::size_t h_num_successes; // TODO: Should I specialize the version with a tile size? - detail::insert_if_n - <<>>(first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); + detail::insert_if_n<<>>( + first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync( &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index e02523b1c..6a5fa94dd 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -193,16 +193,22 @@ template -__global__ void insert_if_n( - InputIt first, std::size_t n, atomicT* num_successes, viewT view, StencilIt stencil, Predicate pred, Hash hash, KeyEqual key_equal) +__global__ void insert_if_n(InputIt first, + std::size_t n, + atomicT* num_successes, + viewT view, + StencilIt stencil, + Predicate pred, + Hash hash, + KeyEqual key_equal) { typedef cub::BlockReduce BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; std::size_t thread_num_successes = 0; - auto tile = cg::tiled_partition(cg::this_thread_block()); - auto tid = block_size * blockIdx.x + threadIdx.x; - auto i = tid / tile_size; + auto tile = cg::tiled_partition(cg::this_thread_block()); + auto tid = block_size * blockIdx.x + threadIdx.x; + auto i = tid / tile_size; while (i < n) { if (pred(*(stencil + i))) { @@ -216,7 +222,7 @@ __global__ void insert_if_n( // and atomically add to the grand total std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); if (threadIdx.x == 0) { - num_matches->fetch_add(block_num_matches, cuda::std::memory_order_relaxed); + num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); } } diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index 5c6367f44..e7661347d 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -249,8 +249,13 @@ class static_map { typename Predicate, typename Hash = cuco::detail::MurmurHash3_32, typename KeyEqual = thrust::equal_to> - void insert_if( - InputIt first, InputIt last, StencilIt stencil, Predicate pred, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}, cudaStream_t stream = 0); + void insert_if(InputIt first, + InputIt last, + StencilIt stencil, + Predicate pred, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}, + cudaStream_t stream = 0); /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. diff --git a/tests/static_map/static_map_test.cu b/tests/static_map/static_map_test.cu index db3bc4b3e..89e4b3ede 100644 --- a/tests/static_map/static_map_test.cu +++ b/tests/static_map/static_map_test.cu @@ -181,16 +181,26 @@ TEST_CASE("User defined key and value type", "") SECTION("All conditionally inserted keys-value pairs should be contained") { thrust::device_vector contained(num_pairs); - map.insert_if(insert_pairs, insert_pairs + num_pairs, thrust::counting_iterator(0), - [] __device__(auto const& key) { return (key % 2) == 0; }, hash_key_pair{}, key_pair_equals{}); + map.insert_if( + insert_pairs, + insert_pairs + num_pairs, + thrust::counting_iterator(0), + [] __device__(auto const& key) { return (key % 2) == 0; }, + hash_key_pair{}, + key_pair_equals{}); map.contains(insert_keys.begin(), insert_keys.end(), contained.begin(), hash_key_pair{}, key_pair_equals{}); - REQUIRE(thrust::equal(thrust::device, contained.begin(), contained.end(), thrust::counting_iterator(0), - [] __device__(auto const& idx_contained, auto const& idx) { return ((idx % 2) == 0) == idx_contained; })); + REQUIRE(thrust::equal(thrust::device, + contained.begin(), + contained.end(), + thrust::counting_iterator(0), + [] __device__(auto const& idx_contained, auto const& idx) { + return ((idx % 2) == 0) == idx_contained; + })); } SECTION("Non-inserted keys-value pairs should not be contained") From 1af02fa642181f29b181fee076305746d3cc311c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Nov 2021 12:19:36 -0800 Subject: [PATCH 8/8] Remove unnecessary comment. --- include/cuco/detail/static_map.inl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index c236973e1..aa52d69ee 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -104,7 +104,6 @@ void static_map::insert_if(InputIt first, CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); std::size_t h_num_successes; - // TODO: Should I specialize the version with a tile size? detail::insert_if_n<<>>( first, num_keys, num_successes_, view, stencil, pred, hash, key_equal); CUCO_CUDA_TRY(cudaMemcpyAsync(