From fac35b48490ed659d805eb5a8e62016622ac3fea Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 26 Oct 2022 13:33:21 -0400 Subject: [PATCH] Fix some libcudf calls to cudf::detail::gather (#11963) Fixes a couple source files that were calling gather by type-dispatching directly to the internal `column_gatherer` functor instead of using the `cudf::detail::gather` function(s). This simplifies the code and improves maintenance. For example, extra code to resolve the null-mask is eliminated since the appropriate `cudf::detail::gather` call does this automatically. No function has changed, just code cleanup. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/11963 --- cpp/src/lists/copying/gather.cu | 41 ++++++++-------------------- cpp/src/partitioning/partitioning.cu | 17 ++++++------ 2 files changed, 19 insertions(+), 39 deletions(-) diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu index ae9fab4dda2..eda46e05f18 100644 --- a/cpp/src/lists/copying/gather.cu +++ b/cpp/src/lists/copying/gather.cu @@ -100,36 +100,17 @@ std::unique_ptr gather_list_leaf(column_view const& column, size_type gather_map_size = gd.gather_map_size; // call the normal gather - auto leaf_column = cudf::type_dispatcher( - column.type(), - cudf::detail::column_gatherer{}, - column, - gather_map_begin, - gather_map_begin + gather_map_size, - // note : we don't need to bother checking for out-of-bounds here since - // our inputs at this stage aren't coming from the user. - false, - stream, - mr); - - // the column_gatherer doesn't create the null mask because it expects - // that will be done in the gather_bitmask() step. however, gather_bitmask() - // only happens at the root level, and by definition this column is a - // leaf. so we have to generate the bitmask ourselves. - // TODO : it might make sense to expose a gather() function that takes a column_view and - // returns a column that does this work correctly. - size_type null_count = column.null_count(); - if (null_count > 0) { - auto list_cdv = column_device_view::create(column, stream); - auto validity = cudf::detail::valid_if( - gather_map_begin, - gather_map_begin + gd.gather_map_size, - [cdv = *list_cdv] __device__(int index) { return cdv.is_valid(index) ? true : false; }, - stream, - mr); - - leaf_column->set_null_mask(std::move(validity.first), validity.second); - } + // note : we don't need to bother checking for out-of-bounds here since + // our inputs at this stage aren't coming from the user. + auto gather_table = cudf::detail::gather(cudf::table_view({column}), + gather_map_begin, + gather_map_begin + gather_map_size, + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + auto leaf_column = std::move(gather_table->release().front()); + + if (column.null_count() == 0) { leaf_column->set_null_mask(rmm::device_buffer{}, 0); } return leaf_column; } diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index e4d366e7d01..cbe65354696 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -436,15 +437,13 @@ struct copy_block_partitions_dispatcher { grid_size, stream); - // Use gather instead for non-fixed width types - return type_dispatcher(input.type(), - detail::column_gatherer{}, - input, - gather_map.begin(), - gather_map.end(), - false, - stream, - mr); + auto gather_table = cudf::detail::gather(cudf::table_view({input}), + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(gather_table->release().front()); } };