From 71b7fc6ced1c15a93e938ad06d0309393196f3dc Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 25 Mar 2021 11:01:51 +1100 Subject: [PATCH] Replace device_vector with device_uvector in null_mask --- cpp/include/cudf/column/column_factories.hpp | 1 - cpp/include/cudf/detail/null_mask.hpp | 4 +-- cpp/include/cudf/null_mask.hpp | 35 +++++++++----------- cpp/src/bitmask/null_mask.cu | 26 +++++++-------- 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 31196824845..43c2407d629 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -21,7 +21,6 @@ #include #include -#include namespace cudf { /** diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index b0870ef8d9a..77cb321a12c 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -53,7 +53,7 @@ void set_null_mask(bitmask_type *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** @@ -62,7 +62,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 0d4de1a9beb..ae6c0cfdbd7 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -136,38 +137,32 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, s * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of set bits - * @return std::vector A vector storing the number of non-zero bits - * in the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of set bits + * @return A vector storing the number of non-zero bits in the specified ranges */ std::vector segmented_count_set_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Given a bitmask, counts the number of unset (0) bits in every range * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of unset bits - * @return std::vector A vector storing the number of zero bits in - * the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of unset bits + * @return A vector storing the number of zero bits in the specified ranges */ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Creates a `device_buffer` from a slice of bitmask defined by a range diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 845a5512c27..28d1411c30d 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -466,7 +466,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, } std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { CUDF_EXPECTS(indices.size() % 2 == 0, @@ -489,8 +489,8 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } size_type num_ranges = indices.size() / 2; - thrust::host_vector h_first_indices(num_ranges); - thrust::host_vector h_last_indices(num_ranges); + std::vector h_first_indices(num_ranges); + std::vector h_last_indices(num_ranges); thrust::stable_partition_copy(thrust::seq, std::begin(indices), std::end(indices), @@ -499,9 +499,9 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, h_last_indices.begin(), [](auto i) { return (i % 2) == 0; }); - rmm::device_vector d_first_indices = h_first_indices; - rmm::device_vector d_last_indices = h_last_indices; - rmm::device_vector d_null_counts(num_ranges, 0); + auto d_first_indices = make_device_uvector_async(h_first_indices, stream); + auto d_last_indices = make_device_uvector_async(h_last_indices, stream); + rmm::device_uvector d_null_counts(num_ranges, stream); auto word_num_set_bits = thrust::make_transform_iterator( thrust::make_counting_iterator(0), @@ -510,12 +510,12 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(true, d_first_indices.data().get())); + to_word_index(true, d_first_indices.data())); auto last_word_indices = thrust::make_transform_iterator( thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(false, d_last_indices.data().get())); + to_word_index(false, d_last_indices.data())); // first allocate temporary memroy @@ -560,7 +560,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, std::vector ret(num_ranges); CUDA_TRY(cudaMemcpyAsync(ret.data(), - d_null_counts.data().get(), + d_null_counts.data(), num_ranges * sizeof(size_type), cudaMemcpyDeviceToHost, stream.value())); @@ -571,7 +571,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { if (indices.empty()) { @@ -669,7 +669,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, size_type start, s // Count non-zero bits in the specified ranges std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_set_bits(bitmask, indices, rmm::cuda_stream_default); @@ -677,7 +677,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, // Count zero bits in the specified ranges std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_unset_bits(bitmask, indices, rmm::cuda_stream_default);