Skip to content

Commit

Permalink
Updates: return a pair of mask and null count
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Nov 10, 2021
1 parent 362eb85 commit bf1bca0
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 134 deletions.
20 changes: 10 additions & 10 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,17 @@ __global__ void offset_bitmask_binop(Binop op,
* @param stream CUDA stream used for device memory operations and kernel launches
*/
template <typename Binop>
bitmask bitmask_binop(Binop op,
host_span<bitmask_type const*> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::pair<rmm::device_buffer, size_type> bitmask_binop(
Binop op,
host_span<bitmask_type const*> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr};

auto valid_count =
auto null_count =
mask_size_bits -
inplace_bitmask_binop(op,
device_span<bitmask_type>(static_cast<bitmask_type*>(dest_mask.data()),
num_bitmask_words(mask_size_bits)),
Expand All @@ -116,9 +117,8 @@ bitmask bitmask_binop(Binop op,
mask_size_bits,
stream,
mr);
auto null_count = mask_size_bits - valid_count;

return bitmask{std::move(dest_mask), valid_count, null_count};
return std::make_pair(std::move(dest_mask), null_count);
}

/**
Expand Down
28 changes: 14 additions & 14 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
#include <vector>

namespace cudf {

struct bitmask;

namespace detail {

/**
Expand Down Expand Up @@ -117,29 +114,32 @@ rmm::device_buffer copy_bitmask(
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
bitmask bitmask_and(host_span<bitmask_type const*> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::pair<rmm::device_buffer, size_type> bitmask_and(
host_span<bitmask_type const*> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_and
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
bitmask bitmask_and(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::pair<rmm::device_buffer, size_type> bitmask_and(
table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_or
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
bitmask bitmask_or(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a bitwise AND of the specified bitmasks,
Expand Down
29 changes: 10 additions & 19 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,6 @@

namespace cudf {

/**
* @brief Bitmask output type.
*/
struct bitmask {
rmm::device_buffer mask; ///< Resulting bitmask
size_type num_set_bits; ///< Number of set bits
size_type num_unset_bits; ///< Number of unset bits
};

/**
* @addtogroup column_nullmask
* @{
Expand Down Expand Up @@ -211,32 +202,32 @@ rmm::device_buffer copy_bitmask(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a struct of bitwise AND of the bitmasks of columns of a table,
* count of valid bits and count of null bits
* @brief Returns a pair of bitwise AND of the bitmasks of columns of a table and count of null bits
*
* If any of the columns isn't nullable, it is considered all valid.
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A struct of resulting bitmask, count of valid bits and count of null bits
* @return A pair of resulting bitmask and count of null bits
*/
bitmask bitmask_and(table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::pair<rmm::device_buffer, size_type> bitmask_and(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a struct of bitwise OR of the bitmasks of columns of a table,
* count of valid bits and count of null bits
* @brief Returns a pair of bitwise OR of the bitmasks of columns of a table and count of null bits
*
* If any of the columns isn't nullable, it is considered all valid.
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
* @return A pair of resulting bitmask and count of null bits
*/
bitmask bitmask_or(table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
20 changes: 6 additions & 14 deletions cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,13 +392,9 @@ std::unique_ptr<column> make_fixed_width_column_for_output(column_view const& lh
if (binops::is_null_dependent(op)) {
return make_fixed_width_column(output_type, rhs.size(), mask_state::ALL_VALID, stream, mr);
} else {
auto bitmask_output = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr);
return make_fixed_width_column(output_type,
lhs.size(),
std::move(bitmask_output.mask),
bitmask_output.num_unset_bits,
stream,
mr);
auto [new_mask, null_count] = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr);
return make_fixed_width_column(
output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);
}
};

Expand Down Expand Up @@ -803,13 +799,9 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,

CUDF_EXPECTS((lhs.size() == rhs.size()), "Column sizes don't match");

auto bitmask_output = bitmask_and(table_view({lhs, rhs}), stream, mr);
auto out = make_fixed_width_column(output_type,
lhs.size(),
std::move(bitmask_output.mask),
bitmask_output.num_unset_bits,
stream,
mr);
auto [new_mask, null_count] = bitmask_and(table_view({lhs, rhs}), stream, mr);
auto out =
make_fixed_width_column(output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);

// Check for 0 sized data
if (lhs.is_empty() or rhs.is_empty()) return out;
Expand Down
36 changes: 19 additions & 17 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,11 @@ void inplace_bitmask_and(device_span<bitmask_type> dest_mask,
}

// Bitwise AND of the masks
bitmask bitmask_and(host_span<bitmask_type const*> masks,
host_span<size_type const> begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::pair<rmm::device_buffer, size_type> bitmask_and(host_span<bitmask_type const*> masks,
host_span<size_type const> begin_bits,
size_type mask_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return bitmask_binop(
[] __device__(bitmask_type left, bitmask_type right) { return left & right; },
Expand All @@ -372,14 +372,14 @@ bitmask bitmask_and(host_span<bitmask_type const*> masks,
}

// Returns the bitwise AND of the null masks of all columns in the table view
bitmask bitmask_and(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
rmm::device_buffer null_mask{0, stream, mr};
if (view.num_rows() == 0 or view.num_columns() == 0) {
return bitmask{std::move(null_mask), 0, 0};
return std::make_pair(std::move(null_mask), 0);
}

std::vector<bitmask_type const*> masks;
Expand All @@ -401,18 +401,18 @@ bitmask bitmask_and(table_view const& view,
mr);
}

return bitmask{std::move(null_mask), 0, 0};
return std::make_pair(std::move(null_mask), 0);
}

// Returns the bitwise OR of the null masks of all columns in the table view
bitmask bitmask_or(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
rmm::device_buffer null_mask{0, stream, mr};
if (view.num_rows() == 0 or view.num_columns() == 0) {
return bitmask{std::move(null_mask), 0, 0};
return std::make_pair(std::move(null_mask), 0);
}

std::vector<bitmask_type const*> masks;
Expand All @@ -434,7 +434,7 @@ bitmask bitmask_or(table_view const& view,
mr);
}

return bitmask{std::move(null_mask), 0, 0};
return std::make_pair(std::move(null_mask), 0);
}

/**
Expand Down Expand Up @@ -506,12 +506,14 @@ rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_
return detail::copy_bitmask(view, rmm::cuda_stream_default, mr);
}

bitmask bitmask_and(table_view const& view, rmm::mr::device_memory_resource* mr)
std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_and(view, rmm::cuda_stream_default, mr);
}

bitmask bitmask_or(table_view const& view, rmm::mr::device_memory_resource* mr)
std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_or(view, rmm::cuda_stream_default, mr);
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,9 @@ std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_colu
stream,
mr);

auto bitmask_output =
auto [output_null_mask, null_count] =
cudf::detail::bitmask_and(table_view{{timestamp_column, months_column}}, stream, mr);
output->set_null_mask(std::move(bitmask_output.mask), bitmask_output.num_unset_bits);
output->set_null_mask(std::move(output_null_mask), null_count);
return output;
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ void sparse_to_dense_results(table_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).mask;
auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first;
bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE;
bitmask_type const* row_bitmask_ptr =
skip_key_rows_with_nulls ? static_cast<bitmask_type*>(row_bitmask.data()) : nullptr;
Expand Down Expand Up @@ -502,7 +502,7 @@ void compute_single_pass_aggs(table_view const& keys,
bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE;

auto row_bitmask =
skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).mask : rmm::device_buffer{};
skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{};
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
Expand Down
5 changes: 1 addition & 4 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,10 +538,7 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
*/
auto column_view_with_common_nulls(column_view const& column_0, column_view const& column_1)
{
auto bitmask_output = cudf::bitmask_and(table_view{{column_0, column_1}});
auto new_nullmask = std::move(bitmask_output.mask);
auto null_count = bitmask_output.num_unset_bits;

auto [new_nullmask, null_count] = cudf::bitmask_and(table_view{{column_0, column_1}});
if (null_count == 0) { return std::make_tuple(std::move(new_nullmask), column_0, column_1); }
auto column_view_with_new_nullmask = [](auto const& col, void* nullmask, auto null_count) {
return column_view(col.type(),
Expand Down
9 changes: 3 additions & 6 deletions cpp/src/groupby/sort/sort_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,10 @@ column_view sort_groupby_helper::keys_bitmask_column(rmm::cuda_stream_view strea
{
if (_keys_bitmask_column) return _keys_bitmask_column->view();

auto bitmask_output = cudf::detail::bitmask_and(_keys, stream);
auto [row_bitmask, null_count] = cudf::detail::bitmask_and(_keys, stream);

_keys_bitmask_column = make_numeric_column(data_type(type_id::INT8),
_keys.num_rows(),
std::move(bitmask_output.mask),
bitmask_output.num_unset_bits,
stream);
_keys_bitmask_column = make_numeric_column(
data_type(type_id::INT8), _keys.num_rows(), std::move(row_bitmask), null_count, stream);

auto keys_bitmask_view = _keys_bitmask_column->mutable_view();
using T = id_to_type<type_id::INT8>;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void build_join_hash_table(cudf::table_view const& build,
hash_table.insert(iter, iter + build_table_num_rows, stream.value());
} else {
thrust::counting_iterator<size_type> stencil(0);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).mask;
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};

// insert valid rows
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/join/semi_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
// contain a NULL in any column as they will never compare to equal.
auto const row_bitmask = (compare_nulls == null_equality::EQUAL)
? rmm::device_buffer{}
: cudf::detail::bitmask_and(right_flattened_keys, stream).mask;
: cudf::detail::bitmask_and(right_flattened_keys, stream).first;
// skip rows that are null here.
thrust::for_each_n(
rmm::exec_policy(stream),
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -319,14 +319,14 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
// We generate new bitmask by AND of the input columns' bitmasks.
// Note that if the input columns are nullable, the output column will also be nullable (which may
// not have nulls).
auto bitmask_out =
auto [null_mask, null_count] =
cudf::detail::bitmask_and(table_view{{input.parent(), repeat_times}}, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
bitmask_out.num_unset_bits,
std::move(bitmask_out.mask));
null_count,
std::move(null_mask));
}

std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
Expand Down
15 changes: 7 additions & 8 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,15 +379,14 @@ std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_paren
// and the _null_mask(). It would be better to AND the bits from the beginning, and apply
// offset() uniformly.
// Alternatively, one could construct a big enough buffer, and use inplace_bitwise_and.
auto bitmask_output = cudf::detail::bitmask_and(parent_child_null_masks,
std::vector<size_type>{0, 0},
child.offset() + child.size(),
stream,
mr);
ret_validity_buffers.push_back(std::move(bitmask_output.mask));
auto [new_mask, null_count] = cudf::detail::bitmask_and(parent_child_null_masks,
std::vector<size_type>{0, 0},
child.offset() + child.size(),
stream,
mr);
ret_validity_buffers.push_back(std::move(new_mask));
return std::make_pair(
reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data()),
bitmask_output.num_unset_bits);
reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data()), null_count);
}();

return cudf::column_view(
Expand Down
Loading

0 comments on commit bf1bca0

Please sign in to comment.