From 50acf076d4a35bc57dc00a416f0d9507b1992c0f Mon Sep 17 00:00:00 2001 From: MithunR Date: Thu, 2 Dec 2021 14:07:31 -0800 Subject: [PATCH] Fix stream usage in `segmented_gather()` (#9679) `detail::segmented_gather()` inadvertently uses `cuda_default_stream` in some parts of its implementation, while using the user-specified stream in others. This applies to the calls to `copy_range_in_place()`, `allocate_like()`, and `make_lists_column()`. ~This might produce race conditions, which might explain NVIDIA/spark-rapids/issues/4060. It's a rare failure that's quite hard to reproduce.~ This might lead to over-synchronization, though bad output is unlikely. The commit here should sort this out, by switching to the `detail` APIs corresponding to the calls above. Authors: - MithunR (https://github.com/mythrocks) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/9679 --- cpp/src/lists/copying/segmented_gather.cu | 21 ++++++++++++--------- cpp/src/lists/extract.cu | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 8cbcddc1c58..41187b96cdb 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include -#include #include #include #include @@ -88,14 +88,15 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, auto child = std::move(child_table->release().front()); // Create list offsets from gather_map. - auto output_offset = cudf::allocate_like( - gather_map.offsets(), gather_map.size() + 1, mask_allocation_policy::RETAIN, mr); + auto output_offset = cudf::detail::allocate_like( + gather_map.offsets(), gather_map.size() + 1, mask_allocation_policy::RETAIN, stream, mr); auto output_offset_view = output_offset->mutable_view(); - cudf::copy_range_in_place(gather_map.offsets(), - output_offset_view, - gather_map.offset(), - gather_map.offset() + output_offset_view.size(), - 0); + cudf::detail::copy_range_in_place(gather_map.offsets(), + output_offset_view, + gather_map.offset(), + gather_map.offset() + output_offset_view.size(), + 0, + stream); // Assemble list column & return auto null_mask = cudf::detail::copy_bitmask(value_column.parent(), stream, mr); size_type null_count = value_column.null_count(); @@ -103,7 +104,9 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, std::move(output_offset), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index 381864e1a68..7c6c612eb25 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -53,7 +53,7 @@ std::unique_ptr make_index_child(column_view const& indices, // `segmented_gather()` on a null index should produce a null row. if (not indices.nullable()) { return std::make_unique(indices, stream); } - auto const d_indices = column_device_view::create(indices); + auto const d_indices = column_device_view::create(indices, stream); // Replace null indices with MAX_SIZE_TYPE, so that gather() returns null for them. auto const null_replaced_iter_begin = cudf::detail::make_null_replacement_iterator(*d_indices, std::numeric_limits::max());