From ba2b51dd8f1b1dd0611f90742e0ffb835615cd33 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 Nov 2021 13:49:36 -0500 Subject: [PATCH] Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits (#9616) Closes https://github.com/rapidsai/cudf/issues/9176 - [x] Update `bitmask_and` and `bitmask_or` to return both resulting mask and count of unset bits - [x] Refactor related implementations to use new `bitmask_and/or` - [x] Update unit tests Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Bradley Dice (https://github.com/bdice) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/9616 --- cpp/include/cudf/detail/null_mask.cuh | 103 +++++++++++++-------- cpp/include/cudf/detail/null_mask.hpp | 6 +- cpp/include/cudf/null_mask.hpp | 14 +-- cpp/src/binaryop/binaryop.cpp | 10 +- cpp/src/bitmask/null_mask.cu | 98 +++++++++++--------- cpp/src/datetime/datetime_ops.cu | 4 +- cpp/src/groupby/hash/groupby.cu | 4 +- cpp/src/groupby/sort/aggregate.cpp | 4 +- cpp/src/groupby/sort/sort_helper.cu | 9 +- cpp/src/join/hash_join.cu | 2 +- cpp/src/join/semi_join.cu | 2 +- cpp/src/strings/repeat_strings.cu | 4 +- cpp/src/structs/utilities.cpp | 20 ++-- cpp/tests/bitmask/bitmask_tests.cpp | 34 ++++--- java/src/main/native/src/ColumnViewJni.cpp | 12 ++- 15 files changed, 187 insertions(+), 139 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index b6d6a2529ed..cf8c3343406 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -23,28 +23,38 @@ #include #include +#include namespace cudf { namespace detail { /** * @brief Computes the merger of an array of bitmasks using a binary operator * + * @tparam block_size Number of threads in each thread block + * @tparam Binop Type of binary operator + * * @param op The binary operator used to combine the bitmasks * @param destination The bitmask to write result into * @param source Array of source mask pointers. All masks must be of same size * @param source_begin_bits Array of offsets into corresponding @p source masks. * Must be same size as source array * @param source_size_bits Number of bits in each mask in @p source + * @param count Pointer to counter of set bits */ -template +template __global__ void offset_bitmask_binop(Binop op, device_span destination, device_span source, device_span source_begin_bits, - size_type source_size_bits) + size_type source_size_bits, + size_type* count_ptr) { - for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; - destination_word_index < destination.size(); + constexpr auto const word_size{detail::size_in_bits()}; + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + + size_type thread_count = 0; + + for (size_type destination_word_index = tid; destination_word_index < destination.size(); destination_word_index += blockDim.x * gridDim.x) { bitmask_type destination_word = detail::get_mask_offset_word(source[0], @@ -52,17 +62,32 @@ __global__ void offset_bitmask_binop(Binop op, source_begin_bits[0], source_begin_bits[0] + source_size_bits); for (size_type i = 1; i < source.size(); i++) { - destination_word = - - op(destination_word, - detail::get_mask_offset_word(source[i], - destination_word_index, - source_begin_bits[i], - source_begin_bits[i] + source_size_bits)); + destination_word = op(destination_word, + detail::get_mask_offset_word(source[i], + destination_word_index, + source_begin_bits[i], + source_begin_bits[i] + source_size_bits)); } destination[destination_word_index] = destination_word; + thread_count += __popc(destination_word); } + + // Subtract any slack bits from the last word + if (tid == 0) { + size_type const last_bit_index = source_size_bits - 1; + size_type const num_slack_bits = word_size - (last_bit_index % word_size) - 1; + if (num_slack_bits > 0) { + size_type const word_index = cudf::word_index(last_bit_index); + thread_count -= __popc(destination[word_index] & set_most_significant_bits(num_slack_bits)); + } + } + + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + size_type block_count = BlockReduce(temp_storage).Sum(thread_count); + + if (threadIdx.x == 0) { atomicAdd(count_ptr, block_count); } } /** @@ -72,7 +97,7 @@ __global__ void offset_bitmask_binop(Binop op, * @param stream CUDA stream used for device memory operations and kernel launches */ template -rmm::device_buffer bitmask_binop( +std::pair bitmask_binop( Binop op, host_span masks, host_span masks_begin_bits, @@ -81,34 +106,35 @@ rmm::device_buffer bitmask_binop( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; - - inplace_bitmask_binop(op, - device_span(static_cast(dest_mask.data()), - num_bitmask_words(mask_size_bits)), - masks, - masks_begin_bits, - mask_size_bits, - stream, - mr); - - return dest_mask; + auto null_count = + mask_size_bits - + inplace_bitmask_binop(op, + device_span(static_cast(dest_mask.data()), + num_bitmask_words(mask_size_bits)), + masks, + masks_begin_bits, + mask_size_bits, + stream, + mr); + + return std::make_pair(std::move(dest_mask), null_count); } /** * @brief Performs a merge of the specified bitmasks using the binary operator - * provided, and writes in place to destination + * provided, writes in place to destination and returns count of set bits * - * @param op The binary operator used to combine the bitmasks - * @param dest_mask Destination to which the merged result is written - * @param masks The list of data pointers of the bitmasks to be merged - * @param masks_begin_bits The bit offsets from which each mask is to be merged - * @param mask_size_bits The number of bits to be ANDed in each mask - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask + * @param[in] op The binary operator used to combine the bitmasks + * @param[out] dest_mask Destination to which the merged result is written + * @param[in] masks The list of data pointers of the bitmasks to be merged + * @param[in] masks_begin_bits The bit offsets from which each mask is to be merged + * @param[in] mask_size_bits The number of bits to be ANDed in each mask + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @param[in] mr Device memory resource used to allocate the returned device_buffer + * @return size_type Count of set bits */ template -void inplace_bitmask_binop( +size_type inplace_bitmask_binop( Binop op, device_span dest_mask, host_span masks, @@ -124,6 +150,7 @@ void inplace_bitmask_binop( CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), "Mask pointer cannot be null"); + rmm::device_scalar d_counter{0, stream, mr}; rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); @@ -135,11 +162,13 @@ void inplace_bitmask_binop( cudaMemcpyHostToDevice, stream.value())); - cudf::detail::grid_1d config(dest_mask.size(), 256); - offset_bitmask_binop<<>>( - op, dest_mask, d_masks, d_begin_bits, mask_size_bits); + auto constexpr block_size = 256; + cudf::detail::grid_1d config(dest_mask.size(), block_size); + offset_bitmask_binop + <<>>( + op, dest_mask, d_masks, d_begin_bits, mask_size_bits, d_counter.data()); CHECK_CUDA(stream.value()); - stream.synchronize(); + return d_counter.value(stream); } /** diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index f757929d839..d2819e665df 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -114,7 +114,7 @@ rmm::device_buffer copy_bitmask( * * @param stream CUDA stream used for device memory operations and kernel launches */ -rmm::device_buffer bitmask_and( +std::pair bitmask_and( host_span masks, host_span masks_begin_bits, size_type mask_size_bits, @@ -126,7 +126,7 @@ rmm::device_buffer bitmask_and( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -rmm::device_buffer bitmask_and( +std::pair bitmask_and( table_view const& view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -136,7 +136,7 @@ rmm::device_buffer bitmask_and( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -rmm::device_buffer bitmask_or( +std::pair bitmask_or( table_view const& view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 7146360fd6f..c74e077dc32 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -202,30 +202,32 @@ rmm::device_buffer copy_bitmask( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns a bitwise AND of the bitmasks of columns of a table + * @brief Performs bitwise AND of the bitmasks of columns of a table. Returns + * a pair of resulting mask and count of unset bits. * * If any of the columns isn't nullable, it is considered all valid. * If no column in the table is nullable, an empty bitmask is returned. * * @param view The table of columns * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask + * @return A pair of resulting bitmask and count of unset bits */ -rmm::device_buffer bitmask_and( +std::pair bitmask_and( table_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns a bitwise OR of the bitmasks of columns of a table + * @brief Performs bitwise OR of the bitmasks of columns of a table. Returns + * a pair of resulting mask and count of unset bits. * * If any of the columns isn't nullable, it is considered all valid. * If no column in the table is nullable, an empty bitmask is returned. * * @param view The table of columns * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask + * @return A pair of resulting bitmask and count of unset bits */ -rmm::device_buffer bitmask_or( +std::pair bitmask_or( table_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 73a3f55163d..b9ed95daf1b 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -392,9 +392,9 @@ std::unique_ptr make_fixed_width_column_for_output(column_view const& lh if (binops::is_null_dependent(op)) { return make_fixed_width_column(output_type, rhs.size(), mask_state::ALL_VALID, stream, mr); } else { - auto new_mask = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr); + auto [new_mask, null_count] = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr); return make_fixed_width_column( - output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); + output_type, lhs.size(), std::move(new_mask), null_count, stream, mr); } }; @@ -799,9 +799,9 @@ std::unique_ptr binary_operation(column_view const& lhs, CUDF_EXPECTS((lhs.size() == rhs.size()), "Column sizes don't match"); - auto new_mask = bitmask_and(table_view({lhs, rhs}), stream, mr); - auto out = make_fixed_width_column( - output_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); + auto [new_mask, null_count] = bitmask_and(table_view({lhs, rhs}), stream, mr); + auto out = + make_fixed_width_column(output_type, lhs.size(), std::move(new_mask), null_count, stream, mr); // Check for 0 sized data if (lhs.is_empty() or rhs.is_empty()) return out; diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index fe13277ac8e..1cd3def61ac 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -299,40 +299,6 @@ rmm::device_buffer copy_bitmask(column_view const& view, return null_mask; } -// Inplace Bitwise AND of the masks -void inplace_bitmask_and(device_span dest_mask, - host_span masks, - host_span begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - inplace_bitmask_binop( - [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, - dest_mask, - masks, - begin_bits, - mask_size, - stream, - mr); -} - -// Bitwise AND of the masks -rmm::device_buffer bitmask_and(host_span masks, - host_span begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return bitmask_binop( - [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, - masks, - begin_bits, - mask_size, - stream, - mr); -} - cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, size_type stop, @@ -371,14 +337,50 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, return (num_bits - detail::count_set_bits(bitmask, start, stop, stream)); } +// Inplace Bitwise AND of the masks +void inplace_bitmask_and(device_span dest_mask, + host_span masks, + host_span begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + inplace_bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + dest_mask, + masks, + begin_bits, + mask_size, + stream, + mr); +} + +// Bitwise AND of the masks +std::pair bitmask_and(host_span masks, + host_span begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + masks, + begin_bits, + mask_size, + stream, + mr); +} + // Returns the bitwise AND of the null masks of all columns in the table view -rmm::device_buffer bitmask_and(table_view const& view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair bitmask_and(table_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; - if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } + if (view.num_rows() == 0 or view.num_columns() == 0) { + return std::make_pair(std::move(null_mask), 0); + } std::vector masks; std::vector offsets; @@ -399,17 +401,19 @@ rmm::device_buffer bitmask_and(table_view const& view, mr); } - return null_mask; + return std::make_pair(std::move(null_mask), 0); } // Returns the bitwise OR of the null masks of all columns in the table view -rmm::device_buffer bitmask_or(table_view const& view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair bitmask_or(table_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; - if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } + if (view.num_rows() == 0 or view.num_columns() == 0) { + return std::make_pair(std::move(null_mask), 0); + } std::vector masks; std::vector offsets; @@ -430,7 +434,7 @@ rmm::device_buffer bitmask_or(table_view const& view, mr); } - return null_mask; + return std::make_pair(std::move(null_mask), 0); } /** @@ -502,12 +506,14 @@ rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_ return detail::copy_bitmask(view, rmm::cuda_stream_default, mr); } -rmm::device_buffer bitmask_and(table_view const& view, rmm::mr::device_memory_resource* mr) +std::pair bitmask_and(table_view const& view, + rmm::mr::device_memory_resource* mr) { return detail::bitmask_and(view, rmm::cuda_stream_default, mr); } -rmm::device_buffer bitmask_or(table_view const& view, rmm::mr::device_memory_resource* mr) +std::pair bitmask_or(table_view const& view, + rmm::mr::device_memory_resource* mr) { return detail::bitmask_or(view, rmm::cuda_stream_default, mr); } diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 34106bef4ae..ccfad56b4ea 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -351,9 +351,9 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu stream, mr); - auto output_null_mask = + auto [output_null_mask, null_count] = cudf::detail::bitmask_and(table_view{{timestamp_column, months_column}}, stream, mr); - output->set_null_mask(std::move(output_null_mask)); + output->set_null_mask(std::move(output_null_mask), null_count); return output; } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index ef640256927..6a9eaf0af90 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -390,7 +390,7 @@ void sparse_to_dense_results(table_view const& keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto row_bitmask{bitmask_and(keys, stream, rmm::mr::get_current_device_resource())}; + auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first; bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; bitmask_type const* row_bitmask_ptr = skip_key_rows_with_nulls ? static_cast(row_bitmask.data()) : nullptr; @@ -502,7 +502,7 @@ void compute_single_pass_aggs(table_view const& keys, bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; auto row_bitmask = - skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream) : rmm::device_buffer{}; + skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{}; thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 83c6c1bca57..234bb447761 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -538,9 +538,7 @@ void aggregate_result_functor::operator()(aggregation con */ auto column_view_with_common_nulls(column_view const& column_0, column_view const& column_1) { - rmm::device_buffer new_nullmask = cudf::bitmask_and(table_view{{column_0, column_1}}); - auto null_count = cudf::count_unset_bits( - static_cast(new_nullmask.data()), 0, column_0.size()); + auto [new_nullmask, null_count] = cudf::bitmask_and(table_view{{column_0, column_1}}); if (null_count == 0) { return std::make_tuple(std::move(new_nullmask), column_0, column_1); } auto column_view_with_new_nullmask = [](auto const& col, void* nullmask, auto null_count) { return column_view(col.type(), diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 1caf2ff0371..7adb4ccec76 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -276,13 +276,10 @@ column_view sort_groupby_helper::keys_bitmask_column(rmm::cuda_stream_view strea { if (_keys_bitmask_column) return _keys_bitmask_column->view(); - auto row_bitmask = cudf::detail::bitmask_and(_keys, stream); + auto [row_bitmask, null_count] = cudf::detail::bitmask_and(_keys, stream); - _keys_bitmask_column = make_numeric_column(data_type(type_id::INT8), - _keys.num_rows(), - std::move(row_bitmask), - cudf::UNKNOWN_NULL_COUNT, - stream); + _keys_bitmask_column = make_numeric_column( + data_type(type_id::INT8), _keys.num_rows(), std::move(row_bitmask), null_count, stream); auto keys_bitmask_view = _keys_bitmask_column->mutable_view(); using T = id_to_type; diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index b6fe18434eb..e4bd1938ecc 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -92,7 +92,7 @@ void build_join_hash_table(cudf::table_view const& build, hash_table.insert(iter, iter + build_table_num_rows, stream.value()); } else { thrust::counting_iterator stencil(0); - auto const row_bitmask = cudf::detail::bitmask_and(build, stream); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; row_is_valid pred{static_cast(row_bitmask.data())}; // insert valid rows diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 4a2f46d6f43..5b5dd418a97 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -97,7 +97,7 @@ std::unique_ptr> left_semi_anti_join( // contain a NULL in any column as they will never compare to equal. auto const row_bitmask = (compare_nulls == null_equality::EQUAL) ? rmm::device_buffer{} - : cudf::detail::bitmask_and(right_flattened_keys, stream); + : cudf::detail::bitmask_and(right_flattened_keys, stream).first; // skip rows that are null here. thrust::for_each_n( rmm::exec_policy(stream), diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 2e5be9e55f6..458f3ed885c 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -319,13 +319,13 @@ std::unique_ptr repeat_strings(strings_column_view const& input, // We generate new bitmask by AND of the input columns' bitmasks. // Note that if the input columns are nullable, the output column will also be nullable (which may // not have nulls). - auto null_mask = + auto [null_mask, null_count] = cudf::detail::bitmask_and(table_view{{input.parent(), repeat_times}}, stream, mr); return make_strings_column(strings_count, std::move(offsets_column), std::move(chars_column), - UNKNOWN_NULL_COUNT, + null_count, std::move(null_mask)); } diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 47f8f29385c..d4e2f48feba 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -366,10 +366,10 @@ std::tuple> superimpose_paren auto parent_child_null_masks = std::vector{structs_column.null_mask(), child.null_mask()}; - auto new_child_mask = [&] { + auto [new_child_mask, null_count] = [&] { if (not child.nullable()) { // Adopt parent STRUCT's null mask. - return structs_column.null_mask(); + return std::make_pair(structs_column.null_mask(), 0); } // Both STRUCT and child are nullable. AND() for the child's new null mask. @@ -379,12 +379,14 @@ std::tuple> superimpose_paren // and the _null_mask(). It would be better to AND the bits from the beginning, and apply // offset() uniformly. // Alternatively, one could construct a big enough buffer, and use inplace_bitwise_and. - ret_validity_buffers.push_back(cudf::detail::bitmask_and(parent_child_null_masks, - std::vector{0, 0}, - child.offset() + child.size(), - stream, - mr)); - return reinterpret_cast(ret_validity_buffers.back().data()); + auto [new_mask, null_count] = cudf::detail::bitmask_and(parent_child_null_masks, + std::vector{0, 0}, + child.offset() + child.size(), + stream, + mr); + ret_validity_buffers.push_back(std::move(new_mask)); + return std::make_pair( + reinterpret_cast(ret_validity_buffers.back().data()), null_count); }(); return cudf::column_view( @@ -392,7 +394,7 @@ std::tuple> superimpose_paren child.size(), child.head(), new_child_mask, - cudf::UNKNOWN_NULL_COUNT, + null_count, child.offset(), std::vector{child.child_begin(), child.child_end()}); }; diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index d82ff7f2ac4..c7ae6e12366 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -545,19 +545,25 @@ TEST_F(MergeBitmaskTest, TestBitmaskAnd) auto const input2 = cudf::table_view({bools_col1, bools_col2}); auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); - rmm::device_buffer result1 = cudf::bitmask_and(input1); - rmm::device_buffer result2 = cudf::bitmask_and(input2); - rmm::device_buffer result3 = cudf::bitmask_and(input3); + auto [result1_mask, result1_null_count] = cudf::bitmask_and(input1); + auto [result2_mask, result2_null_count] = cudf::bitmask_and(input2); + auto [result3_mask, result3_null_count] = cudf::bitmask_and(input3); + + constexpr cudf::size_type gold_null_count = 3; + + EXPECT_EQ(result1_null_count, 0); + EXPECT_EQ(result2_null_count, gold_null_count); + EXPECT_EQ(result3_null_count, gold_null_count); auto odd_indices = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows()); - EXPECT_EQ(nullptr, result1.data()); + EXPECT_EQ(nullptr, result1_mask.data()); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + result2_mask.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + result3_mask.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); } TEST_F(MergeBitmaskTest, TestBitmaskOr) @@ -570,19 +576,23 @@ TEST_F(MergeBitmaskTest, TestBitmaskOr) auto const input2 = cudf::table_view({bools_col1, bools_col2}); auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); - rmm::device_buffer result1 = cudf::bitmask_or(input1); - rmm::device_buffer result2 = cudf::bitmask_or(input2); - rmm::device_buffer result3 = cudf::bitmask_or(input3); + auto [result1_mask, result1_null_count] = cudf::bitmask_or(input1); + auto [result2_mask, result2_null_count] = cudf::bitmask_or(input2); + auto [result3_mask, result3_null_count] = cudf::bitmask_or(input3); + + EXPECT_EQ(result1_null_count, 0); + EXPECT_EQ(result2_null_count, 1); + EXPECT_EQ(result3_null_count, 0); auto all_but_index3 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); auto null3 = cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); - EXPECT_EQ(nullptr, result1.data()); + EXPECT_EQ(nullptr, result1_mask.data()); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); - EXPECT_EQ(nullptr, result3.data()); + result2_mask.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); + EXPECT_EQ(nullptr, result3_mask.data()); } CUDF_TEST_PROGRAM_MAIN() diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index f95b05d5aeb..5ae9fd03063 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1492,12 +1492,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit cudf::binary_operator op = static_cast(bin_op); switch (op) { - case cudf::binary_operator::BITWISE_AND: - copy->set_null_mask(cudf::bitmask_and(*input_table)); + case cudf::binary_operator::BITWISE_AND: { + auto [new_bitmask, null_count] = cudf::bitmask_and(*input_table); + copy->set_null_mask(std::move(new_bitmask), null_count); break; - case cudf::binary_operator::BITWISE_OR: - copy->set_null_mask(cudf::bitmask_or(*input_table)); + } + case cudf::binary_operator::BITWISE_OR: { + auto [new_bitmask, null_count] = cudf::bitmask_or(*input_table); + copy->set_null_mask(std::move(new_bitmask), null_count); break; + } default: JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "Unsupported merge operation", 0); }