Skip to content

Commit

Permalink
Replace device_vector with device_uvector in null_mask (#7715)
Browse files Browse the repository at this point in the history
Replaces remaining `device_vector` instances in null_mask.cu with `device_uvector`. Change the interface of `segmented_count_[un]set_bits` to take `host_span` instead of `std::vector`.

Authors:
  - Mark Harris (@harrism)

Approvers:
  - Jake Hemstad (@jrhemstad)
  - Vyas Ramasubramani (@vyasr)
  - Paul Taylor (@trxcllnt)

URL: #7715
  • Loading branch information
harrism authored Mar 25, 2021
1 parent 1a1bd66 commit a9b4705
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 35 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void set_null_mask(bitmask_type *bitmask,
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream);

/**
Expand All @@ -62,7 +62,7 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream);

/**
Expand Down
35 changes: 15 additions & 20 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_buffer.hpp>

Expand Down Expand Up @@ -136,38 +137,32 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, s
* `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2).
*
* Returns an empty vector if `bitmask == nullptr`.
*
* @throws cudf::logic_error if `indices.size() % 2 != 0`
* @throws cudf::logic_error if `indices[2*i] < 0 or
* indices[2*i] > indices[(2*i)+1]`
*
* @param[in] bitmask Bitmask residing in device memory whose bits will be
* counted
* @param[in] indices A vector of indices used to specify ranges to count the
* number of set bits
* @return std::vector<size_type> A vector storing the number of non-zero bits
* in the specified ranges
* @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]`
*
* @param[in] bitmask Bitmask residing in device memory whose bits will be counted
* @param[in] indices A host_span of indices specifying ranges to count the number of set bits
* @return A vector storing the number of non-zero bits in the specified ranges
*/
std::vector<size_type> segmented_count_set_bits(bitmask_type const* bitmask,
std::vector<cudf::size_type> const& indices);
host_span<cudf::size_type const> indices);

/**
* @brief Given a bitmask, counts the number of unset (0) bits in every range
* `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2).
*
* Returns an empty vector if `bitmask == nullptr`.
*
* @throws cudf::logic_error if `indices.size() % 2 != 0`
* @throws cudf::logic_error if `indices[2*i] < 0 or
* indices[2*i] > indices[(2*i)+1]`
*
* @param[in] bitmask Bitmask residing in device memory whose bits will be
* counted
* @param[in] indices A vector of indices used to specify ranges to count the
* number of unset bits
* @return std::vector<size_type> A vector storing the number of zero bits in
* the specified ranges
* @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]`
*
* @param[in] bitmask Bitmask residing in device memory whose bits will be counted
* @param[in] indices A host_span of indices specifying ranges to count the number of unset bits
* @return A vector storing the number of zero bits in the specified ranges
*/
std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
std::vector<cudf::size_type> const& indices);
host_span<cudf::size_type const> indices);

/**
* @brief Creates a `device_buffer` from a slice of bitmask defined by a range
Expand Down
26 changes: 13 additions & 13 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/bit.hpp>
Expand All @@ -30,7 +31,6 @@
#include <rmm/device_buffer.hpp>
#include <rmm/device_scalar.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/binary_search.h>
Expand Down Expand Up @@ -466,7 +466,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask,
}

std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(indices.size() % 2 == 0,
Expand All @@ -489,8 +489,8 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
}

size_type num_ranges = indices.size() / 2;
thrust::host_vector<size_type> h_first_indices(num_ranges);
thrust::host_vector<size_type> h_last_indices(num_ranges);
std::vector<size_type> h_first_indices(num_ranges);
std::vector<size_type> h_last_indices(num_ranges);
thrust::stable_partition_copy(thrust::seq,
std::begin(indices),
std::end(indices),
Expand All @@ -499,9 +499,9 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
h_last_indices.begin(),
[](auto i) { return (i % 2) == 0; });

rmm::device_vector<size_type> d_first_indices = h_first_indices;
rmm::device_vector<size_type> d_last_indices = h_last_indices;
rmm::device_vector<size_type> d_null_counts(num_ranges, 0);
auto d_first_indices = make_device_uvector_async(h_first_indices, stream);
auto d_last_indices = make_device_uvector_async(h_last_indices, stream);
rmm::device_uvector<size_type> d_null_counts(num_ranges, stream);

auto word_num_set_bits = thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
Expand All @@ -510,12 +510,12 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
thrust::make_counting_iterator(0),
// We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires
// first_word_indices and last_word_indices to have the same type.
to_word_index(true, d_first_indices.data().get()));
to_word_index(true, d_first_indices.data()));
auto last_word_indices = thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
// We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires
// first_word_indices and last_word_indices to have the same type.
to_word_index(false, d_last_indices.data().get()));
to_word_index(false, d_last_indices.data()));

// first allocate temporary memroy

Expand Down Expand Up @@ -560,7 +560,7 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,

std::vector<size_type> ret(num_ranges);
CUDA_TRY(cudaMemcpyAsync(ret.data(),
d_null_counts.data().get(),
d_null_counts.data(),
num_ranges * sizeof(size_type),
cudaMemcpyDeviceToHost,
stream.value()));
Expand All @@ -571,7 +571,7 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
}

std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream)
{
if (indices.empty()) {
Expand Down Expand Up @@ -669,15 +669,15 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, size_type start, s

// Count non-zero bits in the specified ranges
std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices)
host_span<size_type const> indices)
{
CUDF_FUNC_RANGE();
return detail::segmented_count_set_bits(bitmask, indices, rmm::cuda_stream_default);
}

// Count zero bits in the specified ranges
std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices)
host_span<size_type const> indices)
{
CUDF_FUNC_RANGE();
return detail::segmented_count_unset_bits(bitmask, indices, rmm::cuda_stream_default);
Expand Down

0 comments on commit a9b4705

Please sign in to comment.