From b0dc5d3b11917761e4c19877c11915d2da3fd4e1 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 4 Oct 2023 13:46:20 -0700 Subject: [PATCH 01/15] passing tests --- cpp/src/merge/merge.cu | 167 +++++++++++++++++++++++++++++------------ 1 file changed, 120 insertions(+), 47 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index c0765b48205..6e0a94d6ab9 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -37,12 +38,15 @@ #include #include #include +#include #include #include #include #include +#include + namespace cudf { namespace detail { namespace { @@ -146,11 +150,11 @@ void materialize_bitmask(column_view const& left_col, CUDF_CHECK_CUDA(stream.value()); } -struct side_index_generator { - side _side; +// struct side_index_generator { +// side _side; - __device__ index_type operator()(size_type i) const noexcept { return index_type{_side, i}; } -}; +// __device__ index_type operator()(size_type i) const noexcept { return index_type{_side, i}; } +// }; /** * @brief Generates the row indices and source side (left or right) in accordance with the index @@ -169,56 +173,125 @@ struct side_index_generator { * * @return A device_uvector of merged indices */ -index_vector generate_merged_indices(table_view const& left_table, - table_view const& right_table, - std::vector const& column_order, - std::vector const& null_precedence, - bool nullable, - rmm::cuda_stream_view stream) +// index_vector generate_merged_indices(table_view const& left_table, +// table_view const& right_table, +// std::vector const& column_order, +// std::vector const& null_precedence, +// bool nullable, +// rmm::cuda_stream_view stream) +// { +// size_type const left_size = left_table.num_rows(); +// size_type const right_size = right_table.num_rows(); +// size_type const total_size = left_size + right_size; + +// auto left_gen = side_index_generator{side::LEFT}; +// auto right_gen = side_index_generator{side::RIGHT}; +// auto left_begin = cudf::detail::make_counting_transform_iterator(0, left_gen); +// auto right_begin = cudf::detail::make_counting_transform_iterator(0, right_gen); + +// index_vector merged_indices(total_size, stream); + +// auto lhs_device_view = table_device_view::create(left_table, stream); +// auto rhs_device_view = table_device_view::create(right_table, stream); + +// auto d_column_order = cudf::detail::make_device_uvector_async( +// column_order, stream, rmm::mr::get_current_device_resource()); + +// if (nullable) { +// auto d_null_precedence = cudf::detail::make_device_uvector_async( +// null_precedence, stream, rmm::mr::get_current_device_resource()); + +// auto ineq_op = detail::row_lexicographic_tagged_comparator( +// *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); +// thrust::merge(rmm::exec_policy(stream), +// left_begin, +// left_begin + left_size, +// right_begin, +// right_begin + right_size, +// merged_indices.begin(), +// ineq_op); +// } else { +// auto ineq_op = detail::row_lexicographic_tagged_comparator( +// *lhs_device_view, *rhs_device_view, d_column_order.data()); +// thrust::merge(rmm::exec_policy(stream), +// left_begin, +// left_begin + left_size, +// right_begin, +// right_begin + right_size, +// merged_indices.begin(), +// ineq_op); +// } + +// CUDF_CHECK_CUDA(stream.value()); + +// return merged_indices; +// } + +index_vector generate_new_merged_indices(table_view const& left_table, + table_view const& right_table, + std::vector const& column_order, + std::vector const& null_precedence, + bool nullable, + rmm::cuda_stream_view stream) { size_type const left_size = left_table.num_rows(); size_type const right_size = right_table.num_rows(); size_type const total_size = left_size + right_size; - auto left_gen = side_index_generator{side::LEFT}; - auto right_gen = side_index_generator{side::RIGHT}; - auto left_begin = cudf::detail::make_counting_transform_iterator(0, left_gen); - auto right_begin = cudf::detail::make_counting_transform_iterator(0, right_gen); - index_vector merged_indices(total_size, stream); + thrust::fill(rmm::exec_policy_nosync(stream), + merged_indices.begin(), + merged_indices.end(), + thrust::make_pair(side::RIGHT, 1)); + + auto left_indices_col = + cudf::lower_bound(right_table, left_table, column_order, null_precedence, stream); + auto left_indices = left_indices_col->view(); + auto left_indices_mutable = left_indices_col->mutable_view(); + auto left_indices_begin = left_indices.begin(); + auto left_indices_end = left_indices.end(); + auto left_indices_mutable_begin = left_indices_mutable.begin(); + + auto left_counter = thrust::make_counting_iterator(0); + thrust::for_each( + rmm::exec_policy_nosync(stream), + left_counter, + left_counter + left_size, + [merged = merged_indices.data(), left = left_indices_begin] __device__(auto const idx) { + auto const final_left_idx = left[idx] + idx; + merged[final_left_idx] = thrust::make_pair(side::LEFT, idx); + }); - auto lhs_device_view = table_device_view::create(left_table, stream); - auto rhs_device_view = table_device_view::create(right_table, stream); - - auto d_column_order = cudf::detail::make_device_uvector_async( - column_order, stream, rmm::mr::get_current_device_resource()); - - if (nullable) { - auto d_null_precedence = cudf::detail::make_device_uvector_async( - null_precedence, stream, rmm::mr::get_current_device_resource()); - - auto ineq_op = detail::row_lexicographic_tagged_comparator( - *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); - thrust::merge(rmm::exec_policy(stream), - left_begin, - left_begin + left_size, - right_begin, - right_begin + right_size, - merged_indices.begin(), - ineq_op); - } else { - auto ineq_op = detail::row_lexicographic_tagged_comparator( - *lhs_device_view, *rhs_device_view, d_column_order.data()); - thrust::merge(rmm::exec_policy(stream), - left_begin, - left_begin + left_size, - right_begin, - right_begin + right_size, - merged_indices.begin(), - ineq_op); - } + rmm::device_uvector right_mask(total_size, stream); + auto total_counter = thrust::make_counting_iterator(0); + thrust::for_each( + rmm::exec_policy_nosync(stream), + total_counter, + total_counter + total_size, + [merged = merged_indices.data(), right = right_mask.data()] __device__(auto idx) { + auto [side, val] = merged[idx]; + if (side == side::LEFT) { + right[idx] = 0; + } else { + right[idx] = 1; + } + }); - CUDF_CHECK_CUDA(stream.value()); + rmm::device_uvector right_indices(total_size, stream); + thrust::exclusive_scan( + rmm::exec_policy_nosync(stream), right_mask.begin(), right_mask.end(), right_indices.begin()); + + auto right_counter = thrust::make_counting_iterator(0); + thrust::for_each(rmm::exec_policy_nosync(stream), + right_counter, + right_counter + total_size, + [merged = merged_indices.data(), + right_m = right_mask.data(), + right_i = right_indices.data()] __device__(auto const idx) { + auto const right_idx = right_i[idx]; + auto const right_mask = right_m[idx]; + if (right_mask) { merged[idx] = thrust::make_pair(side::RIGHT, right_idx); } + }); return merged_indices; } @@ -418,7 +491,7 @@ table_ptr_type merge(cudf::table_view const& left_table, // extract merged row order according to indices: // - auto const merged_indices = generate_merged_indices( + auto const merged_indices = generate_new_merged_indices( index_left_view, index_right_view, column_order, null_precedence, nullable, stream); // create merged table: From 3343ddba3c575c5e6ea3d27f7c9654495d542f1e Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 4 Oct 2023 13:57:58 -0700 Subject: [PATCH 02/15] use new algorithm only for nested types --- cpp/src/merge/merge.cu | 139 ++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 6e0a94d6ab9..0f19ce2f250 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -150,11 +150,11 @@ void materialize_bitmask(column_view const& left_col, CUDF_CHECK_CUDA(stream.value()); } -// struct side_index_generator { -// side _side; +struct side_index_generator { + side _side; -// __device__ index_type operator()(size_type i) const noexcept { return index_type{_side, i}; } -// }; + __device__ index_type operator()(size_type i) const noexcept { return index_type{_side, i}; } +}; /** * @brief Generates the row indices and source side (left or right) in accordance with the index @@ -173,66 +173,66 @@ void materialize_bitmask(column_view const& left_col, * * @return A device_uvector of merged indices */ -// index_vector generate_merged_indices(table_view const& left_table, -// table_view const& right_table, -// std::vector const& column_order, -// std::vector const& null_precedence, -// bool nullable, -// rmm::cuda_stream_view stream) -// { -// size_type const left_size = left_table.num_rows(); -// size_type const right_size = right_table.num_rows(); -// size_type const total_size = left_size + right_size; - -// auto left_gen = side_index_generator{side::LEFT}; -// auto right_gen = side_index_generator{side::RIGHT}; -// auto left_begin = cudf::detail::make_counting_transform_iterator(0, left_gen); -// auto right_begin = cudf::detail::make_counting_transform_iterator(0, right_gen); - -// index_vector merged_indices(total_size, stream); - -// auto lhs_device_view = table_device_view::create(left_table, stream); -// auto rhs_device_view = table_device_view::create(right_table, stream); - -// auto d_column_order = cudf::detail::make_device_uvector_async( -// column_order, stream, rmm::mr::get_current_device_resource()); - -// if (nullable) { -// auto d_null_precedence = cudf::detail::make_device_uvector_async( -// null_precedence, stream, rmm::mr::get_current_device_resource()); - -// auto ineq_op = detail::row_lexicographic_tagged_comparator( -// *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); -// thrust::merge(rmm::exec_policy(stream), -// left_begin, -// left_begin + left_size, -// right_begin, -// right_begin + right_size, -// merged_indices.begin(), -// ineq_op); -// } else { -// auto ineq_op = detail::row_lexicographic_tagged_comparator( -// *lhs_device_view, *rhs_device_view, d_column_order.data()); -// thrust::merge(rmm::exec_policy(stream), -// left_begin, -// left_begin + left_size, -// right_begin, -// right_begin + right_size, -// merged_indices.begin(), -// ineq_op); -// } - -// CUDF_CHECK_CUDA(stream.value()); - -// return merged_indices; -// } - -index_vector generate_new_merged_indices(table_view const& left_table, - table_view const& right_table, - std::vector const& column_order, - std::vector const& null_precedence, - bool nullable, - rmm::cuda_stream_view stream) +index_vector generate_merged_indices(table_view const& left_table, + table_view const& right_table, + std::vector const& column_order, + std::vector const& null_precedence, + bool nullable, + rmm::cuda_stream_view stream) +{ + size_type const left_size = left_table.num_rows(); + size_type const right_size = right_table.num_rows(); + size_type const total_size = left_size + right_size; + + auto left_gen = side_index_generator{side::LEFT}; + auto right_gen = side_index_generator{side::RIGHT}; + auto left_begin = cudf::detail::make_counting_transform_iterator(0, left_gen); + auto right_begin = cudf::detail::make_counting_transform_iterator(0, right_gen); + + index_vector merged_indices(total_size, stream); + + auto lhs_device_view = table_device_view::create(left_table, stream); + auto rhs_device_view = table_device_view::create(right_table, stream); + + auto d_column_order = cudf::detail::make_device_uvector_async( + column_order, stream, rmm::mr::get_current_device_resource()); + + if (nullable) { + auto d_null_precedence = cudf::detail::make_device_uvector_async( + null_precedence, stream, rmm::mr::get_current_device_resource()); + + auto ineq_op = detail::row_lexicographic_tagged_comparator( + *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); + thrust::merge(rmm::exec_policy(stream), + left_begin, + left_begin + left_size, + right_begin, + right_begin + right_size, + merged_indices.begin(), + ineq_op); + } else { + auto ineq_op = detail::row_lexicographic_tagged_comparator( + *lhs_device_view, *rhs_device_view, d_column_order.data()); + thrust::merge(rmm::exec_policy(stream), + left_begin, + left_begin + left_size, + right_begin, + right_begin + right_size, + merged_indices.begin(), + ineq_op); + } + + CUDF_CHECK_CUDA(stream.value()); + + return merged_indices; +} + +index_vector generate_merged_indices_nested(table_view const& left_table, + table_view const& right_table, + std::vector const& column_order, + std::vector const& null_precedence, + bool nullable, + rmm::cuda_stream_view stream) { size_type const left_size = left_table.num_rows(); size_type const right_size = right_table.num_rows(); @@ -491,8 +491,15 @@ table_ptr_type merge(cudf::table_view const& left_table, // extract merged row order according to indices: // - auto const merged_indices = generate_new_merged_indices( - index_left_view, index_right_view, column_order, null_precedence, nullable, stream); + index_vector merged_indices(0, stream); + if (cudf::detail::has_nested_columns(left_table) or + cudf::detail::has_nested_columns(right_table)) { + merged_indices = generate_merged_indices_nested( + index_left_view, index_right_view, column_order, null_precedence, nullable, stream); + } else { + merged_indices = generate_merged_indices( + index_left_view, index_right_view, column_order, null_precedence, nullable, stream); + } // create merged table: // From 2426c0b6a5bcbd4563e2ee0413577eb5aac35ede Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 4 Oct 2023 14:24:10 -0700 Subject: [PATCH 03/15] auto const everything --- cpp/src/merge/merge.cu | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 0f19ce2f250..d71b7645e8b 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -45,8 +45,6 @@ #include #include -#include - namespace cudf { namespace detail { namespace { @@ -244,15 +242,15 @@ index_vector generate_merged_indices_nested(table_view const& left_table, merged_indices.end(), thrust::make_pair(side::RIGHT, 1)); - auto left_indices_col = + auto const left_indices_col = cudf::lower_bound(right_table, left_table, column_order, null_precedence, stream); - auto left_indices = left_indices_col->view(); + auto const left_indices = left_indices_col->view(); auto left_indices_mutable = left_indices_col->mutable_view(); - auto left_indices_begin = left_indices.begin(); - auto left_indices_end = left_indices.end(); + auto const left_indices_begin = left_indices.begin(); + auto const left_indices_end = left_indices.end(); auto left_indices_mutable_begin = left_indices_mutable.begin(); - auto left_counter = thrust::make_counting_iterator(0); + auto const left_counter = thrust::make_counting_iterator(0); thrust::for_each( rmm::exec_policy_nosync(stream), left_counter, @@ -263,13 +261,13 @@ index_vector generate_merged_indices_nested(table_view const& left_table, }); rmm::device_uvector right_mask(total_size, stream); - auto total_counter = thrust::make_counting_iterator(0); + auto const total_counter = thrust::make_counting_iterator(0); thrust::for_each( rmm::exec_policy_nosync(stream), total_counter, total_counter + total_size, - [merged = merged_indices.data(), right = right_mask.data()] __device__(auto idx) { - auto [side, val] = merged[idx]; + [merged = merged_indices.data(), right = right_mask.data()] __device__(auto const idx) { + auto const [side, val] = merged[idx]; if (side == side::LEFT) { right[idx] = 0; } else { @@ -281,7 +279,7 @@ index_vector generate_merged_indices_nested(table_view const& left_table, thrust::exclusive_scan( rmm::exec_policy_nosync(stream), right_mask.begin(), right_mask.end(), right_indices.begin()); - auto right_counter = thrust::make_counting_iterator(0); + auto const right_counter = thrust::make_counting_iterator(0); thrust::for_each(rmm::exec_policy_nosync(stream), right_counter, right_counter + total_size, From 264b112042755654617d0dee4801a7d0fed620ab Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 11 Oct 2023 13:36:34 -0700 Subject: [PATCH 04/15] experimental comparators for non-nested case --- cpp/include/cudf/detail/merge.cuh | 109 ++++++----------------------- cpp/src/merge/merge.cu | 111 +++++++++++------------------- 2 files changed, 64 insertions(+), 156 deletions(-) diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index e8e9b080a92..4ab8d3589d4 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include @@ -41,108 +41,43 @@ using index_type = thrust::pair; */ using index_vector = rmm::device_uvector; -/** - * @brief tagged_element_relational_comparator uses element_relational_comparator to provide - * "tagged-index" comparison logic. - * - * Special treatment is necessary in several thrust algorithms (e.g., merge()) where - * the index affinity to the side is not guaranteed; i.e., the algorithms rely on - * binary functors (predicates) where the operands may transparently switch sides. - * - * For example, - * thrust::merge(left_container, - * right_container, - * predicate(lhs, rhs){...}); - * can create 4 different use-cases, inside predicate(...): - * - * 1. lhs refers to the left container; rhs to the right container; - * 2. vice-versa; - * 3. both lhs and rhs actually refer to the left container; - * 4. both lhs and rhs actually refer to the right container; - * - * Because of that, one cannot rely on the predicate having *fixed* references to the containers. - * Each invocation may land in a different situation (among the 4 above) than any other invocation. - * Also, one cannot just manipulate lhs, rhs (indices) alone; because, if predicate always applies - * one index to one container and the other index to the other container, - * switching the indices alone won't suffice in the cases (3) or (4), - * where the also the containers must be changed (to just one instead of two) - * independently of indices; - * - * As a result, a special comparison logic is necessary whereby the index is "tagged" with side - * information and consequently comparator functors (predicates) must operate on these tagged - * indices rather than on raw indices. - */ -template -struct tagged_element_relational_comparator { - __host__ __device__ tagged_element_relational_comparator(column_device_view lhs, - column_device_view rhs, - null_order null_precedence) - : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} - { - } - - [[nodiscard]] __device__ weak_ordering compare(index_type lhs_tagged_index, - index_type rhs_tagged_index) const noexcept - { - auto const [l_side, l_indx] = lhs_tagged_index; - auto const [r_side, r_indx] = rhs_tagged_index; - - column_device_view const* ptr_left_dview{l_side == side::LEFT ? &lhs : &rhs}; - column_device_view const* ptr_right_dview{r_side == side::LEFT ? &lhs : &rhs}; - - auto erl_comparator = element_relational_comparator( - nullate::DYNAMIC{has_nulls}, *ptr_left_dview, *ptr_right_dview, null_precedence); - - return cudf::type_dispatcher(lhs.type(), erl_comparator, l_indx, r_indx); - } - - private: - column_device_view lhs; - column_device_view rhs; - null_order null_precedence; -}; - /** * @brief The equivalent of `row_lexicographic_comparator` for tagged indices. */ -template +template struct row_lexicographic_tagged_comparator { - row_lexicographic_tagged_comparator(table_device_view lhs, - table_device_view rhs, - order const* column_order = nullptr, - null_order const* null_precedence = nullptr) - : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} + row_lexicographic_tagged_comparator(LeftComparator left_comp, + LeftRightComparator left_right_comp, + RightComparator right_comp) + : _left_comp{left_comp}, _left_right_comp{left_right_comp}, _right_comp{right_comp} { - // Add check for types to be the same. - CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); } __device__ bool operator()(index_type lhs_tagged_index, index_type rhs_tagged_index) const noexcept { - for (size_type i = 0; i < _lhs.num_columns(); ++i) { - bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - - null_order null_precedence = - _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - - auto comparator = tagged_element_relational_comparator{ - _lhs.column(i), _rhs.column(i), null_precedence}; + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; - weak_ordering state = comparator.compare(lhs_tagged_index, rhs_tagged_index); - - if (state == weak_ordering::EQUIVALENT) { continue; } + auto const [l_side, l_indx] = lhs_tagged_index; + auto const [r_side, r_indx] = rhs_tagged_index; - return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); + if (l_side == side::LEFT && r_side == side::RIGHT) { + return _left_right_comp(lhs_index_type{l_indx}, rhs_index_type{r_indx}); + } else if (l_side == side::RIGHT && r_side == side::LEFT) { + return _left_right_comp(rhs_index_type{l_indx}, lhs_index_type{r_indx}); + } else if (l_side == side::LEFT && r_side == side::LEFT) { + return _left_comp(l_indx, r_indx); + } else if (l_side == side::RIGHT && r_side == side::RIGHT) { + return _right_comp(l_indx, r_indx); } return false; } private: - table_device_view _lhs; - table_device_view _rhs; - null_order const* _null_precedence{}; - order const* _column_order{}; + LeftComparator _left_comp; + LeftRightComparator _left_right_comp; + RightComparator _right_comp; }; /** diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index d71b7645e8b..668ecff9100 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -189,36 +190,30 @@ index_vector generate_merged_indices(table_view const& left_table, index_vector merged_indices(total_size, stream); - auto lhs_device_view = table_device_view::create(left_table, stream); - auto rhs_device_view = table_device_view::create(right_table, stream); - - auto d_column_order = cudf::detail::make_device_uvector_async( - column_order, stream, rmm::mr::get_current_device_resource()); - - if (nullable) { - auto d_null_precedence = cudf::detail::make_device_uvector_async( - null_precedence, stream, rmm::mr::get_current_device_resource()); - - auto ineq_op = detail::row_lexicographic_tagged_comparator( - *lhs_device_view, *rhs_device_view, d_column_order.data(), d_null_precedence.data()); - thrust::merge(rmm::exec_policy(stream), - left_begin, - left_begin + left_size, - right_begin, - right_begin + right_size, - merged_indices.begin(), - ineq_op); - } else { - auto ineq_op = detail::row_lexicographic_tagged_comparator( - *lhs_device_view, *rhs_device_view, d_column_order.data()); - thrust::merge(rmm::exec_policy(stream), - left_begin, - left_begin + left_size, - right_begin, - right_begin + right_size, - merged_indices.begin(), - ineq_op); - } + auto left_comp = cudf::experimental::row::lexicographic::self_comparator{ + left_table, column_order, null_precedence, stream}; + auto left_right_comp = cudf::experimental::row::lexicographic::two_table_comparator{ + left_table, right_table, column_order, null_precedence, stream}; + auto right_comp = cudf::experimental::row::lexicographic::self_comparator{ + right_table, column_order, null_precedence, stream}; + + auto const left_has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(left_table)}; + auto const right_has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(right_table)}; + auto const left_right_has_nulls = nullate::DYNAMIC{left_has_nulls or right_has_nulls}; + + auto d_left_comp = left_comp.less(left_has_nulls); + auto d_left_right_comp = left_right_comp.less(left_right_has_nulls); + auto d_right_comp = right_comp.less(right_has_nulls); + + auto ineq_op = + detail::row_lexicographic_tagged_comparator(d_left_comp, d_left_right_comp, d_right_comp); + thrust::merge(rmm::exec_policy(stream), + left_begin, + left_begin + left_size, + right_begin, + right_begin + right_size, + merged_indices.begin(), + ineq_op); CUDF_CHECK_CUDA(stream.value()); @@ -237,60 +232,38 @@ index_vector generate_merged_indices_nested(table_view const& left_table, size_type const total_size = left_size + right_size; index_vector merged_indices(total_size, stream); - thrust::fill(rmm::exec_policy_nosync(stream), - merged_indices.begin(), - merged_indices.end(), - thrust::make_pair(side::RIGHT, 1)); - auto const left_indices_col = - cudf::lower_bound(right_table, left_table, column_order, null_precedence, stream); + auto const left_indices_col = cudf::detail::lower_bound(right_table, + left_table, + column_order, + null_precedence, + stream, + rmm::mr::get_current_device_resource()); auto const left_indices = left_indices_col->view(); auto left_indices_mutable = left_indices_col->mutable_view(); auto const left_indices_begin = left_indices.begin(); auto const left_indices_end = left_indices.end(); auto left_indices_mutable_begin = left_indices_mutable.begin(); - auto const left_counter = thrust::make_counting_iterator(0); - thrust::for_each( - rmm::exec_policy_nosync(stream), - left_counter, - left_counter + left_size, - [merged = merged_indices.data(), left = left_indices_begin] __device__(auto const idx) { - auto const final_left_idx = left[idx] + idx; - merged[final_left_idx] = thrust::make_pair(side::LEFT, idx); - }); - - rmm::device_uvector right_mask(total_size, stream); auto const total_counter = thrust::make_counting_iterator(0); thrust::for_each( rmm::exec_policy_nosync(stream), total_counter, total_counter + total_size, - [merged = merged_indices.data(), right = right_mask.data()] __device__(auto const idx) { - auto const [side, val] = merged[idx]; - if (side == side::LEFT) { - right[idx] = 0; + [merged = merged_indices.data(), left = left_indices_begin, left_size, right_size] __device__( + auto const idx) { + if (idx < right_size) { + // this tells us between which segments of left elements a right element + // would fall + auto const r_bound = thrust::upper_bound(thrust::seq, left, left + left_size, idx); + auto const r_segment = thrust::distance(left, r_bound); + merged[r_segment + idx] = thrust::make_pair(side::RIGHT, idx); } else { - right[idx] = 1; + auto const left_idx = idx - right_size; + merged[left[left_idx] + left_idx] = thrust::make_pair(side::LEFT, left_idx); } }); - rmm::device_uvector right_indices(total_size, stream); - thrust::exclusive_scan( - rmm::exec_policy_nosync(stream), right_mask.begin(), right_mask.end(), right_indices.begin()); - - auto const right_counter = thrust::make_counting_iterator(0); - thrust::for_each(rmm::exec_policy_nosync(stream), - right_counter, - right_counter + total_size, - [merged = merged_indices.data(), - right_m = right_mask.data(), - right_i = right_indices.data()] __device__(auto const idx) { - auto const right_idx = right_i[idx]; - auto const right_mask = right_m[idx]; - if (right_mask) { merged[idx] = thrust::make_pair(side::RIGHT, right_idx); } - }); - return merged_indices; } From 61dec014fda65d94922b4f0d9062bbf7cafbbce0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 16 Oct 2023 15:23:51 -0700 Subject: [PATCH 05/15] add device constructor for experimental comparator --- cpp/include/cudf/detail/merge.cuh | 57 +++++++++++-------- .../cudf/table/experimental/row_operators.cuh | 37 +++++++++++- cpp/src/merge/merge.cu | 54 +++++++++++------- 3 files changed, 101 insertions(+), 47 deletions(-) diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index 4ab8d3589d4..852dd980531 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -41,43 +41,52 @@ using index_type = thrust::pair; */ using index_vector = rmm::device_uvector; -/** - * @brief The equivalent of `row_lexicographic_comparator` for tagged indices. - */ -template +template struct row_lexicographic_tagged_comparator { - row_lexicographic_tagged_comparator(LeftComparator left_comp, - LeftRightComparator left_right_comp, - RightComparator right_comp) - : _left_comp{left_comp}, _left_right_comp{left_right_comp}, _right_comp{right_comp} + row_lexicographic_tagged_comparator(table_device_view lhs, + table_device_view rhs, + device_span column_order, + device_span null_precedence) + : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} { + // Add check for types to be the same. + CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); } __device__ bool operator()(index_type lhs_tagged_index, index_type rhs_tagged_index) const noexcept { - using cudf::experimental::row::lhs_index_type; - using cudf::experimental::row::rhs_index_type; - auto const [l_side, l_indx] = lhs_tagged_index; auto const [r_side, r_indx] = rhs_tagged_index; - if (l_side == side::LEFT && r_side == side::RIGHT) { - return _left_right_comp(lhs_index_type{l_indx}, rhs_index_type{r_indx}); - } else if (l_side == side::RIGHT && r_side == side::LEFT) { - return _left_right_comp(rhs_index_type{l_indx}, lhs_index_type{r_indx}); - } else if (l_side == side::LEFT && r_side == side::LEFT) { - return _left_comp(l_indx, r_indx); - } else if (l_side == side::RIGHT && r_side == side::RIGHT) { - return _right_comp(l_indx, r_indx); - } - return false; + // Not sure why `const_cast` is needed here + table_device_view* ptr_left_dview{l_side == side::LEFT + ? const_cast(&_lhs) + : const_cast(&_rhs)}; + table_device_view* ptr_right_dview{r_side == side::LEFT + ? const_cast(&_lhs) + : const_cast(&_rhs)}; + + cudf::experimental::row::lexicographic::device_row_comparator comparator{ + has_nulls, + *ptr_left_dview, + *ptr_right_dview, + {}, + {}, + std::nullopt, + _column_order, + _null_precedence}; + + auto weak_order = comparator(l_indx, r_indx); + + return weak_order == weak_ordering::LESS; } private: - LeftComparator _left_comp; - LeftRightComparator _left_right_comp; - RightComparator _right_comp; + table_device_view _lhs; + table_device_view _rhs; + device_span _null_precedence{}; + device_span _column_order{}; }; /** diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6b024d902a9..6d86d3224f5 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -264,6 +264,7 @@ template class device_row_comparator { + public: friend class self_comparator; ///< Allow self_comparator to access private members friend class two_table_comparator; ///< Allow two_table_comparator to access private members @@ -276,6 +277,8 @@ class device_row_comparator { * @param rhs The second table (may be the same table as `lhs`) * @param depth Optional, device array the same length as a row that contains starting depths of * columns if they're nested, and 0 otherwise. + * @param l_dremel_device_views <> + * @param r_dremel_device_views <> * @param column_order Optional, device array the same length as a row that indicates the desired * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are * sorted in ascending order. @@ -284,6 +287,7 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ + __host__ __device__ device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, @@ -292,7 +296,7 @@ class device_row_comparator { std::optional> depth = std::nullopt, std::optional> column_order = std::nullopt, std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + PhysicalElementComparator comparator = {}) : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -323,6 +327,8 @@ class device_row_comparator { * @param depth The depth of the column if part of a nested column @see * preprocessed_table::depths * @param comparator Physical element relational comparison functor. + * @param l_dremel_device_view <> + * @param r_dremel_device_view <> */ __device__ element_comparator(Nullate check_nulls, column_device_view lhs, @@ -370,6 +376,15 @@ class device_row_comparator { std::numeric_limits::max()); } + /** + * @brief + * + * @tparam Element + * @tparam Element, + * CUDF_ENABLE_IF(not cudf::is_relationally_comparable() and + * (not has_nested_columns or not cudf::is_nested())) + * @return __device__ + */ template () and (not has_nested_columns or not cudf::is_nested()))> @@ -379,6 +394,16 @@ class device_row_comparator { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } + /** + * @brief + * + * @tparam Element + * @tparam Element, + * CUDF_ENABLE_IF(has_nested_columns and std::is_same_v) + * @param lhs_element_index + * @param rhs_element_index + * @return __device__ + */ template )> __device__ cuda::std::pair operator()( @@ -413,6 +438,16 @@ class device_row_comparator { rhs_element_index); } + /** + * @brief + * + * @tparam Element + * @tparam Element, + * CUDF_ENABLE_IF(has_nested_columns and std::is_same_v) + * @param lhs_element_index + * @param rhs_element_index + * @return __device__ + */ template )> __device__ cuda::std::pair operator()(size_type lhs_element_index, diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 668ecff9100..ed30833e0f3 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -190,30 +190,40 @@ index_vector generate_merged_indices(table_view const& left_table, index_vector merged_indices(total_size, stream); - auto left_comp = cudf::experimental::row::lexicographic::self_comparator{ - left_table, column_order, null_precedence, stream}; - auto left_right_comp = cudf::experimental::row::lexicographic::two_table_comparator{ - left_table, right_table, column_order, null_precedence, stream}; - auto right_comp = cudf::experimental::row::lexicographic::self_comparator{ - right_table, column_order, null_precedence, stream}; - - auto const left_has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(left_table)}; - auto const right_has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(right_table)}; + auto const left_has_nulls = nullate::DYNAMIC{cudf::has_nulls(left_table)}; + auto const right_has_nulls = nullate::DYNAMIC{cudf::has_nulls(right_table)}; auto const left_right_has_nulls = nullate::DYNAMIC{left_has_nulls or right_has_nulls}; - auto d_left_comp = left_comp.less(left_has_nulls); - auto d_left_right_comp = left_right_comp.less(left_right_has_nulls); - auto d_right_comp = right_comp.less(right_has_nulls); - - auto ineq_op = - detail::row_lexicographic_tagged_comparator(d_left_comp, d_left_right_comp, d_right_comp); - thrust::merge(rmm::exec_policy(stream), - left_begin, - left_begin + left_size, - right_begin, - right_begin + right_size, - merged_indices.begin(), - ineq_op); + auto lhs_device_view = table_device_view::create(left_table, stream); + auto rhs_device_view = table_device_view::create(right_table, stream); + + auto d_column_order = cudf::detail::make_device_uvector_async( + column_order, stream, rmm::mr::get_current_device_resource()); + + if (left_right_has_nulls) { + auto d_null_precedence = cudf::detail::make_device_uvector_async( + null_precedence, stream, rmm::mr::get_current_device_resource()); + + auto ineq_op = detail::row_lexicographic_tagged_comparator( + *lhs_device_view, *rhs_device_view, d_column_order, d_null_precedence); + thrust::merge(rmm::exec_policy(stream), + left_begin, + left_begin + left_size, + right_begin, + right_begin + right_size, + merged_indices.begin(), + ineq_op); + } else { + auto ineq_op = detail::row_lexicographic_tagged_comparator( + *lhs_device_view, *rhs_device_view, d_column_order, {}); + thrust::merge(rmm::exec_policy(stream), + left_begin, + left_begin + left_size, + right_begin, + right_begin + right_size, + merged_indices.begin(), + ineq_op); + } CUDF_CHECK_CUDA(stream.value()); From 5329634537ae1987cc340794d063828b8341df47 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 16 Oct 2023 16:39:44 -0700 Subject: [PATCH 06/15] fix regression, address initial review --- cpp/include/cudf/detail/merge.cuh | 20 ++--- .../cudf/table/experimental/row_operators.cuh | 78 +++++++++++++------ cpp/src/merge/merge.cu | 42 ++++++---- 3 files changed, 92 insertions(+), 48 deletions(-) diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index 852dd980531..f78f39fb51b 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -41,7 +41,7 @@ using index_type = thrust::pair; */ using index_vector = rmm::device_uvector; -template +template struct row_lexicographic_tagged_comparator { row_lexicographic_tagged_comparator(table_device_view lhs, table_device_view rhs, @@ -67,15 +67,15 @@ struct row_lexicographic_tagged_comparator { ? const_cast(&_lhs) : const_cast(&_rhs)}; - cudf::experimental::row::lexicographic::device_row_comparator comparator{ - has_nulls, - *ptr_left_dview, - *ptr_right_dview, - {}, - {}, - std::nullopt, - _column_order, - _null_precedence}; + auto comparator = [&]() { + if (has_nulls) { + return cudf::experimental::row::lexicographic::device_row_comparator{ + has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order, _null_precedence}; + } else { + return cudf::experimental::row::lexicographic::device_row_comparator{ + has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order}; + } + }(); auto weak_order = comparator(l_indx, r_indx); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6d86d3224f5..f36ba5ad6af 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -52,6 +52,7 @@ #include #include #include +#include #include namespace cudf { @@ -275,10 +276,10 @@ class device_row_comparator { * @param check_nulls Indicates if any input column contains nulls. * @param lhs The first table * @param rhs The second table (may be the same table as `lhs`) + * @param l_dremel_device_views lhs table dremel device view for list type + * @param r_dremel_device_views rhs table dremel device view for list type * @param depth Optional, device array the same length as a row that contains starting depths of * columns if they're nested, and 0 otherwise. - * @param l_dremel_device_views <> - * @param r_dremel_device_views <> * @param column_order Optional, device array the same length as a row that indicates the desired * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are * sorted in ascending order. @@ -287,7 +288,6 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - __host__ __device__ device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, @@ -309,6 +309,44 @@ class device_row_comparator { { } + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables. + * This is a special overload to allow device-side construction of the + * comparator for cases where no preprocessing is needed, i.e. tables with + * non-nested type columns. + * + * @param check_nulls Indicates if any input column contains nulls. + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param column_order Optional, device array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If `nullopt`, then null precedence would be + * `null_order::BEFORE` for all columns. + * @param comparator Physical element relational comparison functor. + */ + template + __device__ device_row_comparator( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) + : _lhs{lhs}, + _rhs{rhs}, + _l_dremel{}, + _r_dremel{}, + _check_nulls{check_nulls}, + _depth{}, + _column_order{column_order}, + _null_precedence{null_precedence}, + _comparator{comparator} + { + } + /** * @brief Performs a relational comparison between two elements in two columns. */ @@ -377,13 +415,11 @@ class device_row_comparator { } /** - * @brief + * @brief Throws run-time error when columns types cannot be compared + * or if this class is instantiated with `has_nested_columns = false` but + * passed tables with nested columns * - * @tparam Element - * @tparam Element, - * CUDF_ENABLE_IF(not cudf::is_relationally_comparable() and - * (not has_nested_columns or not cudf::is_nested())) - * @return __device__ + * @return Ordering */ template () and @@ -395,14 +431,12 @@ class device_row_comparator { } /** - * @brief + * @brief Compares two struct-type columns * - * @tparam Element - * @tparam Element, - * CUDF_ENABLE_IF(has_nested_columns and std::is_same_v) - * @param lhs_element_index - * @param rhs_element_index - * @return __device__ + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along + * with the depth at which a null value was encountered. */ template )> @@ -439,14 +473,12 @@ class device_row_comparator { } /** - * @brief + * @brief Compares two list-type columns * - * @tparam Element - * @tparam Element, - * CUDF_ENABLE_IF(has_nested_columns and std::is_same_v) - * @param lhs_element_index - * @param rhs_element_index - * @return __device__ + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along + * with the depth at which a null value was encountered. */ template )> diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index ed30833e0f3..c419f144eef 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -190,9 +190,8 @@ index_vector generate_merged_indices(table_view const& left_table, index_vector merged_indices(total_size, stream); - auto const left_has_nulls = nullate::DYNAMIC{cudf::has_nulls(left_table)}; - auto const right_has_nulls = nullate::DYNAMIC{cudf::has_nulls(right_table)}; - auto const left_right_has_nulls = nullate::DYNAMIC{left_has_nulls or right_has_nulls}; + auto const has_nulls = + nullate::DYNAMIC{cudf::has_nulls(left_table) or cudf::has_nulls(right_table)}; auto lhs_device_view = table_device_view::create(left_table, stream); auto rhs_device_view = table_device_view::create(right_table, stream); @@ -200,9 +199,19 @@ index_vector generate_merged_indices(table_view const& left_table, auto d_column_order = cudf::detail::make_device_uvector_async( column_order, stream, rmm::mr::get_current_device_resource()); - if (left_right_has_nulls) { + if (has_nulls) { + auto new_null_precedence = [&]() { + if (null_precedence.size() > 0) { + CUDF_EXPECTS(static_cast(null_precedence.size()) == left_table.num_columns(), + "Null precedence vector size mismatched"); + return null_precedence; + } else { + return std::vector(left_table.num_columns(), null_order::BEFORE); + } + }(); + auto d_null_precedence = cudf::detail::make_device_uvector_async( - null_precedence, stream, rmm::mr::get_current_device_resource()); + new_null_precedence, stream, rmm::mr::get_current_device_resource()); auto ineq_op = detail::row_lexicographic_tagged_comparator( *lhs_device_view, *rhs_device_view, d_column_order, d_null_precedence); @@ -262,6 +271,9 @@ index_vector generate_merged_indices_nested(table_view const& left_table, total_counter + total_size, [merged = merged_indices.data(), left = left_indices_begin, left_size, right_size] __device__( auto const idx) { + // We split threads into two groups, so only one kernel is needed. + // Threads in [0, right_size) will insert right indices in sorted order. + // Threads in [right_size, total_size) will insert left indices in sorted order. if (idx < right_size) { // this tells us between which segments of left elements a right element // would fall @@ -472,16 +484,16 @@ table_ptr_type merge(cudf::table_view const& left_table, // extract merged row order according to indices: // - index_vector merged_indices(0, stream); - if (cudf::detail::has_nested_columns(left_table) or - cudf::detail::has_nested_columns(right_table)) { - merged_indices = generate_merged_indices_nested( - index_left_view, index_right_view, column_order, null_precedence, nullable, stream); - } else { - merged_indices = generate_merged_indices( - index_left_view, index_right_view, column_order, null_precedence, nullable, stream); - } - + auto const merged_indices = [&]() { + if (cudf::detail::has_nested_columns(left_table) or + cudf::detail::has_nested_columns(right_table)) { + return generate_merged_indices_nested( + index_left_view, index_right_view, column_order, null_precedence, nullable, stream); + } else { + return generate_merged_indices( + index_left_view, index_right_view, column_order, null_precedence, nullable, stream); + } + }(); // create merged table: // auto const n_cols = left_table.num_columns(); From 377c9cd832e38cd9832ff7da06efa5e1e68edf48 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 16 Oct 2023 16:41:04 -0700 Subject: [PATCH 07/15] default construction unnecessary --- cpp/include/cudf/detail/merge.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index f78f39fb51b..5b10071d6cd 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -85,8 +85,8 @@ struct row_lexicographic_tagged_comparator { private: table_device_view _lhs; table_device_view _rhs; - device_span _null_precedence{}; - device_span _column_order{}; + device_span _null_precedence; + device_span _column_order; }; /** From 29d51c02279ed6f096355c4929d82057be145848 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 16 Oct 2023 16:41:59 -0700 Subject: [PATCH 08/15] add noexcept back --- cpp/include/cudf/table/experimental/row_operators.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index f36ba5ad6af..6946ccdb213 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -296,7 +296,7 @@ class device_row_comparator { std::optional> depth = std::nullopt, std::optional> column_order = std::nullopt, std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -334,7 +334,7 @@ class device_row_comparator { table_device_view rhs, std::optional> column_order = std::nullopt, std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel{}, From b62fe895df8b5ff2cc678afb30d2287c0ec3e127 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 20 Oct 2023 10:29:47 -0700 Subject: [PATCH 09/15] passing tests, benchmarks, and detail API rework --- cpp/benchmarks/CMakeLists.txt | 1 + .../generate_nested_types.hpp} | 0 cpp/benchmarks/merge/merge_lists.cpp | 54 +++++++++ cpp/benchmarks/merge/merge_structs.cpp | 54 +++++++++ cpp/benchmarks/sort/rank_lists.cpp | 2 +- cpp/benchmarks/sort/rank_structs.cpp | 2 +- cpp/benchmarks/sort/sort_lists.cpp | 2 +- cpp/benchmarks/sort/sort_structs.cpp | 2 +- cpp/examples/nested_types/hashed.json | 5 + cpp/examples/nested_types/output.json | 5 + cpp/examples/nested_types/unique.json | 4 + cpp/include/cudf/detail/merge.cuh | 110 ----------------- cpp/include/cudf/detail/merge.hpp | 60 ++++++++++ cpp/include/cudf/dictionary/detail/merge.hpp | 4 +- cpp/include/cudf/merge.hpp | 7 +- cpp/include/cudf/strings/detail/merge.cuh | 2 +- cpp/src/merge/merge.cu | 84 ++++++++++++- .../quantiles/tdigest/tdigest_aggregation.cu | 2 +- cpp/tests/merge/merge_test.cpp | 113 ++++++++++++++++++ 19 files changed, 391 insertions(+), 122 deletions(-) rename cpp/benchmarks/{sort/nested_types_common.hpp => common/generate_nested_types.hpp} (100%) create mode 100644 cpp/benchmarks/merge/merge_lists.cpp create mode 100644 cpp/benchmarks/merge/merge_structs.cpp create mode 100644 cpp/examples/nested_types/hashed.json create mode 100644 cpp/examples/nested_types/output.json create mode 100644 cpp/examples/nested_types/unique.json delete mode 100644 cpp/include/cudf/detail/merge.cuh create mode 100644 cpp/include/cudf/detail/merge.hpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index cd6b3cfdc03..a3e2b4ed6db 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -230,6 +230,7 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) +ConfigureNVBench(MERGE_NVBENCH merge/merge_structs.cpp merge/merge_lists.cpp) # ################################################################################################## # * null_mask benchmark --------------------------------------------------------------------------- diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/common/generate_nested_types.hpp similarity index 100% rename from cpp/benchmarks/sort/nested_types_common.hpp rename to cpp/benchmarks/common/generate_nested_types.hpp diff --git a/cpp/benchmarks/merge/merge_lists.cpp b/cpp/benchmarks/merge/merge_lists.cpp new file mode 100644 index 00000000000..bcb9f10ac83 --- /dev/null +++ b/cpp/benchmarks/merge/merge_lists.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +void nvbench_merge_list(nvbench::state& state) +{ + rmm::cuda_stream_view stream; + + auto const input1 = create_lists_data(state); + auto const sorted_input1 = + cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource()); + + auto const input2 = create_lists_data(state); + auto const sorted_input2 = + cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource()); + + stream.synchronize(); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + + cudf::detail::merge({*sorted_input1, *sorted_input2}, + {0}, + {cudf::order::ASCENDING}, + {}, + stream_view, + rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH(nvbench_merge_list) + .set_name("merge_lists") + .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28}) + .add_int64_axis("depth", {1, 4}) + .add_float64_axis("null_frequency", {0, 0.2}); diff --git a/cpp/benchmarks/merge/merge_structs.cpp b/cpp/benchmarks/merge/merge_structs.cpp new file mode 100644 index 00000000000..9c56b44b623 --- /dev/null +++ b/cpp/benchmarks/merge/merge_structs.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +void nvbench_merge_struct(nvbench::state& state) +{ + rmm::cuda_stream_view stream; + + auto const input1 = create_structs_data(state); + auto const sorted_input1 = + cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource()); + + auto const input2 = create_structs_data(state); + auto const sorted_input2 = + cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource()); + + stream.synchronize(); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + + cudf::detail::merge({*sorted_input1, *sorted_input2}, + {0}, + {cudf::order::ASCENDING}, + {}, + stream_view, + rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH(nvbench_merge_struct) + .set_name("merge_struct") + .add_int64_power_of_two_axis("NumRows", {10, 18, 26}) + .add_int64_axis("Depth", {0, 1, 8}) + .add_int64_axis("Nulls", {0, 1}); diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp index 49dc409ebfc..a5da4e4ff5e 100644 --- a/cpp/benchmarks/sort/rank_lists.cpp +++ b/cpp/benchmarks/sort/rank_lists.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "nested_types_common.hpp" #include "rank_types_common.hpp" +#include #include diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp index 85427e2128f..271b883e62a 100644 --- a/cpp/benchmarks/sort/rank_structs.cpp +++ b/cpp/benchmarks/sort/rank_structs.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "nested_types_common.hpp" #include "rank_types_common.hpp" +#include #include diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp index 4b04323a99f..2052de3688c 100644 --- a/cpp/benchmarks/sort/sort_lists.cpp +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "nested_types_common.hpp" +#include #include diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp index 1d54fa42f6f..3a3d1080ba0 100644 --- a/cpp/benchmarks/sort/sort_structs.cpp +++ b/cpp/benchmarks/sort/sort_structs.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "nested_types_common.hpp" +#include #include diff --git a/cpp/examples/nested_types/hashed.json b/cpp/examples/nested_types/hashed.json new file mode 100644 index 00000000000..8042d78985d --- /dev/null +++ b/cpp/examples/nested_types/hashed.json @@ -0,0 +1,5 @@ +{"0":1027901597} +{"0":1027901597} +{"0":1740876620} +{"0":4277765851} +{"0":208960915} diff --git a/cpp/examples/nested_types/output.json b/cpp/examples/nested_types/output.json new file mode 100644 index 00000000000..7440a81ce76 --- /dev/null +++ b/cpp/examples/nested_types/output.json @@ -0,0 +1,5 @@ +{"features":{"key":"a1","value":[{"info":"message_1","type":"device_a","dt":1688750001}]},"source":"network_a","quality":0.7,"count":2} +{"features":{"key":"a1","value":[{"info":"message_1","type":"device_a","dt":1688750001}]},"source":"network_b","quality":0.9,"count":2} +{"features":{"key":"a2","value":[{"info":"message_2","type":"device_a","dt":1688750002}]},"source":"network_a","quality":0.7,"count":1} +{"features":{"key":"a3","value":[{"info":"message_3","type":"device_a","dt":1688750003}]},"source":"network_b","quality":0.8,"count":1} +{"features":{"key":"a4","value":[{"info":"message_4","type":"device_a","dt":1688750004}]},"source":"network_b","quality":0.9,"count":1} diff --git a/cpp/examples/nested_types/unique.json b/cpp/examples/nested_types/unique.json new file mode 100644 index 00000000000..b9d983bcb3a --- /dev/null +++ b/cpp/examples/nested_types/unique.json @@ -0,0 +1,4 @@ +{"0":{"0":"a1","1":[{"0":"message_1","1":"device_a","2":1688750001}]},"1":"network_a","2":0.7,"3":2} +{"0":{"0":"a2","1":[{"0":"message_2","1":"device_a","2":1688750002}]},"1":"network_a","2":0.7,"3":1} +{"0":{"0":"a3","1":[{"0":"message_3","1":"device_a","2":1688750003}]},"1":"network_b","2":0.8,"3":1} +{"0":{"0":"a4","1":[{"0":"message_4","1":"device_a","2":1688750004}]},"1":"network_b","2":0.9,"3":1} diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh deleted file mode 100644 index 5b10071d6cd..00000000000 --- a/cpp/include/cudf/detail/merge.cuh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include -#include -#include - -namespace cudf { -namespace detail { -/** - * @brief Source table identifier to copy data from. - */ -enum class side : bool { LEFT, RIGHT }; - -/** - * @brief Tagged index type: `thrust::get<0>` indicates left/right side, - * `thrust::get<1>` indicates the row index - */ -using index_type = thrust::pair; - -/** - * @brief Vector of `index_type` values. - */ -using index_vector = rmm::device_uvector; - -template -struct row_lexicographic_tagged_comparator { - row_lexicographic_tagged_comparator(table_device_view lhs, - table_device_view rhs, - device_span column_order, - device_span null_precedence) - : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} - { - // Add check for types to be the same. - CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); - } - - __device__ bool operator()(index_type lhs_tagged_index, - index_type rhs_tagged_index) const noexcept - { - auto const [l_side, l_indx] = lhs_tagged_index; - auto const [r_side, r_indx] = rhs_tagged_index; - - // Not sure why `const_cast` is needed here - table_device_view* ptr_left_dview{l_side == side::LEFT - ? const_cast(&_lhs) - : const_cast(&_rhs)}; - table_device_view* ptr_right_dview{r_side == side::LEFT - ? const_cast(&_lhs) - : const_cast(&_rhs)}; - - auto comparator = [&]() { - if (has_nulls) { - return cudf::experimental::row::lexicographic::device_row_comparator{ - has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order, _null_precedence}; - } else { - return cudf::experimental::row::lexicographic::device_row_comparator{ - has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order}; - } - }(); - - auto weak_order = comparator(l_indx, r_indx); - - return weak_order == weak_ordering::LESS; - } - - private: - table_device_view _lhs; - table_device_view _rhs; - device_span _null_precedence; - device_span _column_order; -}; - -/** - * @copydoc std::unique_ptr merge( - * std::vector const& tables_to_merge, - * std::vector const& key_cols, - * std::vector const& column_order, - * std::vector const& null_precedence, - * rmm::mr::device_memory_resource* mr) - * - * @param stream CUDA stream used for device memory operations and kernel launches - */ -std::unique_ptr merge(std::vector const& tables_to_merge, - std::vector const& key_cols, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - -} // namespace detail -} // namespace cudf diff --git a/cpp/include/cudf/detail/merge.hpp b/cpp/include/cudf/detail/merge.hpp new file mode 100644 index 00000000000..2167a484214 --- /dev/null +++ b/cpp/include/cudf/detail/merge.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Source table identifier to copy data from. + */ +enum class side : bool { LEFT, RIGHT }; + +/** + * @brief Tagged index type: `thrust::get<0>` indicates left/right side, + * `thrust::get<1>` indicates the row index + */ +using index_type = thrust::pair; + +/** + * @brief Vector of `index_type` values. + */ +using index_vector = rmm::device_uvector; + +/** + * @copydoc std::unique_ptr merge( + * std::vector const& tables_to_merge, + * std::vector const& key_cols, + * std::vector const& column_order, + * std::vector const& null_precedence, + * rmm::mr::device_memory_resource* mr) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +std::unique_ptr merge(std::vector const& tables_to_merge, + std::vector const& key_cols, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/dictionary/detail/merge.hpp b/cpp/include/cudf/dictionary/detail/merge.hpp index e7ea53c740a..cad495d0097 100644 --- a/cpp/include/cudf/dictionary/detail/merge.hpp +++ b/cpp/include/cudf/dictionary/detail/merge.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/include/cudf/merge.hpp b/cpp/include/cudf/merge.hpp index 3d09550209d..33ad0c1f16b 100644 --- a/cpp/include/cudf/merge.hpp +++ b/cpp/include/cudf/merge.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,10 @@ namespace cudf { * @brief Merge a set of sorted tables. * * Merges sorted tables into one sorted table - * containing data from all tables. + * containing data from all tables. Each column + * of each table must be sorted according to the + * parameters (cudf::column_order and cudf::null_order) + * specified for that column. * * ``` * Example 1: diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh index 965e89cc862..5f50faa158e 100644 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ b/cpp/include/cudf/strings/detail/merge.cuh @@ -18,8 +18,8 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index c419f144eef..8098e02cf40 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -13,25 +13,30 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include +#include #include -#include +#include #include #include #include #include #include #include +#include +#include #include #include +#include #include #include #include #include +#include #include -#include #include #include @@ -48,8 +53,57 @@ namespace cudf { namespace detail { + namespace { +template +struct row_lexicographic_tagged_comparator { + row_lexicographic_tagged_comparator(table_device_view lhs, + table_device_view rhs, + device_span column_order, + device_span null_precedence) + : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} + { + // Add check for types to be the same. + CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); + } + + __device__ bool operator()(index_type lhs_tagged_index, + index_type rhs_tagged_index) const noexcept + { + auto const [l_side, l_indx] = lhs_tagged_index; + auto const [r_side, r_indx] = rhs_tagged_index; + + // Not sure why `const_cast` is needed here + table_device_view* ptr_left_dview{l_side == side::LEFT + ? const_cast(&_lhs) + : const_cast(&_rhs)}; + table_device_view* ptr_right_dview{r_side == side::LEFT + ? const_cast(&_lhs) + : const_cast(&_rhs)}; + + auto comparator = [&]() { + if (has_nulls) { + return cudf::experimental::row::lexicographic::device_row_comparator{ + has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order, _null_precedence}; + } else { + return cudf::experimental::row::lexicographic::device_row_comparator{ + has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order}; + } + }(); + + auto weak_order = comparator(l_indx, r_indx); + + return weak_order == weak_ordering::LESS; + } + + private: + table_device_view _lhs; + table_device_view _rhs; + device_span _null_precedence; + device_span _column_order; +}; + using detail::side; using index_type = detail::index_type; @@ -419,6 +473,32 @@ std::unique_ptr column_merger::operator()( return result; } +// specialization for lists +template <> +std::unique_ptr column_merger::operator()( + column_view const& lcol, + column_view const& rcol, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const +{ + std::vector columns{lcol, rcol}; + auto concatenated_list = cudf::lists::detail::concatenate(columns, stream, mr); + + auto const iter_gather = cudf::detail::make_counting_transform_iterator( + 0, [row_order = row_order_.data(), lsize = lcol.size()] __device__(auto const idx) { + auto const [side, index] = row_order[idx]; + return side == side::LEFT ? index : lsize + index; + }); + + auto result = cudf::detail::gather(table_view{{concatenated_list->view()}}, + iter_gather, + iter_gather + concatenated_list->size(), + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + return std::move(result->release()[0]); +} + // specialization for structs template <> std::unique_ptr column_merger::operator()( diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index 9e8b75ae3b6..44a13c450ab 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 3a61c0768a6..3558e5676dd 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -27,7 +27,9 @@ #include #include #include +#include #include +#include #include #include @@ -874,6 +876,117 @@ TEST_F(MergeTest, StructsNestedWithNulls) // clang-format on } +using lcw = cudf::test::lists_column_wrapper; +using cudf::test::iterators::null_at; +using cudf::test::iterators::nulls_at; + +TEST_F(MergeTest, Lists) +{ + auto col1 = lcw{lcw{1}, lcw{3}, lcw{5}, lcw{7}}; + auto col2 = lcw{lcw{2}, lcw{4}, lcw{6}, lcw{8}}; + + auto tbl1 = cudf::table_view{{col1}}; + auto tbl2 = cudf::table_view{{col2}}; + + auto result = cudf::merge({tbl1, tbl2}, {0}, {cudf::order::ASCENDING}); + + auto expected_col = lcw{lcw{1}, lcw{2}, lcw{3}, lcw{4}, lcw{5}, lcw{6}, lcw{7}, lcw{8}}; + auto expected_tbl = cudf::table_view{{expected_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl, *result); +} + +TEST_F(MergeTest, NestedListsWithNulls) +{ + auto col1 = lcw{{lcw{lcw{1}}, lcw{lcw{3}}, lcw{lcw{5}}, lcw{lcw{7}}}, null_at(3)}; + auto col2 = lcw{{lcw{lcw{2}}, lcw{lcw{4}}, lcw{lcw{6}}, lcw{lcw{8}}}, null_at(3)}; + + auto tbl1 = cudf::table_view{{col1}}; + auto tbl2 = cudf::table_view{{col2}}; + + auto result = cudf::merge({tbl1, tbl2}, {0}, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + + auto expected_col = lcw{{lcw{lcw{1}}, + lcw{lcw{2}}, + lcw{lcw{3}}, + lcw{lcw{4}}, + lcw{lcw{5}}, + lcw{lcw{6}}, + lcw{lcw{7}}, + lcw{lcw{8}}}, + nulls_at({6, 7})}; + auto expected_tbl = cudf::table_view{{expected_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl, *result); +} + +TEST_F(MergeTest, NestedListsofStructs) +{ + // [ {1}, {2}, {3} ] + // [ {5} ] + // [ {7}, {8} ] + // [ {10} ] + auto const col1 = [] { + auto const get_structs = [] { + auto child0 = cudf::test::fixed_width_column_wrapper{1, 2, 3, 5, 7, 8, 10}; + return cudf::test::structs_column_wrapper{{child0}}; + }; + return cudf::make_lists_column( + 4, + cudf::test::fixed_width_column_wrapper{0, 3, 4, 6, 7}.release(), + get_structs().release(), + 0, + {}); + }(); + + // [ {4} ] + // [ {6} ] + // [ {9} ] + // [ {11} ] + auto const col2 = [] { + auto const get_structs = [] { + auto child0 = cudf::test::fixed_width_column_wrapper{4, 6, 9, 11}; + return cudf::test::structs_column_wrapper{{child0}}; + }; + return cudf::make_lists_column( + 4, + cudf::test::fixed_width_column_wrapper{0, 1, 2, 3, 4}.release(), + get_structs().release(), + 0, + {}); + }(); + + auto tbl1 = cudf::table_view{{*col1}}; + auto tbl2 = cudf::table_view{{*col2}}; + + auto result = cudf::merge({tbl1, tbl2}, {0}, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + + // [ {1}, {2}, {3} ] + // [ {4} ] + // [ {5} ] + // [ {6} ] + // [ {7}, {8} ] + // [ {9} ] + // [ {10} ] + // [ {11} ] + auto const expected_col = [] { + auto const get_structs = [] { + auto child0 = + cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + return cudf::test::structs_column_wrapper{{child0}}; + }; + return cudf::make_lists_column( + 8, + cudf::test::fixed_width_column_wrapper{0, 3, 4, 5, 6, 8, 9, 10, 11}.release(), + get_structs().release(), + 0, + {}); + }(); + auto expected_tbl = cudf::table_view{{*expected_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl, *result); +} + template struct FixedPointTestAllReps : public cudf::test::BaseFixture {}; From d3a2425b999b136c942f19b0aeaa46fb82ee0b1f Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 20 Oct 2023 10:30:55 -0700 Subject: [PATCH 10/15] delete accidentally included files --- cpp/examples/nested_types/hashed.json | 5 ----- cpp/examples/nested_types/output.json | 5 ----- cpp/examples/nested_types/unique.json | 4 ---- 3 files changed, 14 deletions(-) delete mode 100644 cpp/examples/nested_types/hashed.json delete mode 100644 cpp/examples/nested_types/output.json delete mode 100644 cpp/examples/nested_types/unique.json diff --git a/cpp/examples/nested_types/hashed.json b/cpp/examples/nested_types/hashed.json deleted file mode 100644 index 8042d78985d..00000000000 --- a/cpp/examples/nested_types/hashed.json +++ /dev/null @@ -1,5 +0,0 @@ -{"0":1027901597} -{"0":1027901597} -{"0":1740876620} -{"0":4277765851} -{"0":208960915} diff --git a/cpp/examples/nested_types/output.json b/cpp/examples/nested_types/output.json deleted file mode 100644 index 7440a81ce76..00000000000 --- a/cpp/examples/nested_types/output.json +++ /dev/null @@ -1,5 +0,0 @@ -{"features":{"key":"a1","value":[{"info":"message_1","type":"device_a","dt":1688750001}]},"source":"network_a","quality":0.7,"count":2} -{"features":{"key":"a1","value":[{"info":"message_1","type":"device_a","dt":1688750001}]},"source":"network_b","quality":0.9,"count":2} -{"features":{"key":"a2","value":[{"info":"message_2","type":"device_a","dt":1688750002}]},"source":"network_a","quality":0.7,"count":1} -{"features":{"key":"a3","value":[{"info":"message_3","type":"device_a","dt":1688750003}]},"source":"network_b","quality":0.8,"count":1} -{"features":{"key":"a4","value":[{"info":"message_4","type":"device_a","dt":1688750004}]},"source":"network_b","quality":0.9,"count":1} diff --git a/cpp/examples/nested_types/unique.json b/cpp/examples/nested_types/unique.json deleted file mode 100644 index b9d983bcb3a..00000000000 --- a/cpp/examples/nested_types/unique.json +++ /dev/null @@ -1,4 +0,0 @@ -{"0":{"0":"a1","1":[{"0":"message_1","1":"device_a","2":1688750001}]},"1":"network_a","2":0.7,"3":2} -{"0":{"0":"a2","1":[{"0":"message_2","1":"device_a","2":1688750002}]},"1":"network_a","2":0.7,"3":1} -{"0":{"0":"a3","1":[{"0":"message_3","1":"device_a","2":1688750003}]},"1":"network_b","2":0.8,"3":1} -{"0":{"0":"a4","1":[{"0":"message_4","1":"device_a","2":1688750004}]},"1":"network_b","2":0.9,"3":1} From 6bcff40c1bd836b06b5a3c920ecdcf0cd9c899bc Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Oct 2023 19:35:05 -0400 Subject: [PATCH 11/15] Update cpp/include/cudf/merge.hpp Co-authored-by: Bradley Dice --- cpp/include/cudf/merge.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/merge.hpp b/cpp/include/cudf/merge.hpp index 33ad0c1f16b..8886ec24bfe 100644 --- a/cpp/include/cudf/merge.hpp +++ b/cpp/include/cudf/merge.hpp @@ -34,7 +34,7 @@ namespace cudf { * @brief Merge a set of sorted tables. * * Merges sorted tables into one sorted table - * containing data from all tables. Each column + * containing data from all tables. The key columns * of each table must be sorted according to the * parameters (cudf::column_order and cudf::null_order) * specified for that column. From 7774caf911bcab2936607b08826c4024d451522b Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 25 Oct 2023 16:37:04 -0700 Subject: [PATCH 12/15] header includes according to dev guide --- cpp/benchmarks/common/generate_nested_types.hpp | 2 +- cpp/benchmarks/sort/rank_lists.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/benchmarks/common/generate_nested_types.hpp b/cpp/benchmarks/common/generate_nested_types.hpp index 93853ba5768..ee9e3ca9de3 100644 --- a/cpp/benchmarks/common/generate_nested_types.hpp +++ b/cpp/benchmarks/common/generate_nested_types.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include "generate_input.hpp" #include diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp index a5da4e4ff5e..c23f3c891f0 100644 --- a/cpp/benchmarks/sort/rank_lists.cpp +++ b/cpp/benchmarks/sort/rank_lists.cpp @@ -15,6 +15,7 @@ */ #include "rank_types_common.hpp" + #include #include From 29a84282a6bc053bccf11ecb24bc324e80cf9a84 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 25 Oct 2023 17:16:23 -0700 Subject: [PATCH 13/15] address review --- cpp/src/merge/merge.cu | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 8098e02cf40..e5e2bccd118 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -58,10 +58,10 @@ namespace { template struct row_lexicographic_tagged_comparator { - row_lexicographic_tagged_comparator(table_device_view lhs, - table_device_view rhs, - device_span column_order, - device_span null_precedence) + row_lexicographic_tagged_comparator(table_device_view const lhs, + table_device_view const rhs, + device_span const column_order, + device_span const null_precedence) : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} { // Add check for types to be the same. @@ -74,14 +74,8 @@ struct row_lexicographic_tagged_comparator { auto const [l_side, l_indx] = lhs_tagged_index; auto const [r_side, r_indx] = rhs_tagged_index; - // Not sure why `const_cast` is needed here - table_device_view* ptr_left_dview{l_side == side::LEFT - ? const_cast(&_lhs) - : const_cast(&_rhs)}; - table_device_view* ptr_right_dview{r_side == side::LEFT - ? const_cast(&_lhs) - : const_cast(&_rhs)}; - + table_device_view const* ptr_left_dview{l_side == side::LEFT ? &_lhs : &_rhs}; + table_device_view const* ptr_right_dview{r_side == side::LEFT ? &_lhs : &_rhs}; auto comparator = [&]() { if (has_nulls) { return cudf::experimental::row::lexicographic::device_row_comparator{ @@ -98,10 +92,10 @@ struct row_lexicographic_tagged_comparator { } private: - table_device_view _lhs; - table_device_view _rhs; - device_span _null_precedence; - device_span _column_order; + table_device_view const _lhs; + table_device_view const _rhs; + device_span const _null_precedence; + device_span const _column_order; }; using detail::side; @@ -254,7 +248,7 @@ index_vector generate_merged_indices(table_view const& left_table, column_order, stream, rmm::mr::get_current_device_resource()); if (has_nulls) { - auto new_null_precedence = [&]() { + auto const new_null_precedence = [&]() { if (null_precedence.size() > 0) { CUDF_EXPECTS(static_cast(null_precedence.size()) == left_table.num_columns(), "Null precedence vector size mismatched"); From e5a6d5d78c7f910b0cbed1080e8b733fcb8f64f9 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 26 Oct 2023 15:27:29 -0700 Subject: [PATCH 14/15] no column checks in comparator functor needed --- cpp/src/merge/merge.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index fbd8ffe4250..a8ab8bbc3a2 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -64,8 +64,6 @@ struct row_lexicographic_tagged_comparator { device_span const null_precedence) : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} { - // Add check for types to be the same. - CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); } __device__ bool operator()(index_type lhs_tagged_index, From d84391e1e1768b346dc4d1aee240ead228ac1ca6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 27 Oct 2023 15:06:05 -0700 Subject: [PATCH 15/15] address review feedback --- cpp/src/merge/merge.cu | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index a8ab8bbc3a2..e47abd6ede4 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -36,6 +36,8 @@ #include #include +#include +#include #include #include @@ -74,8 +76,8 @@ struct row_lexicographic_tagged_comparator { table_device_view const* ptr_left_dview{l_side == side::LEFT ? &_lhs : &_rhs}; table_device_view const* ptr_right_dview{r_side == side::LEFT ? &_lhs : &_rhs}; - auto comparator = [&]() { - if (has_nulls) { + auto const comparator = [&]() { + if constexpr (has_nulls) { return cudf::experimental::row::lexicographic::device_row_comparator{ has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order, _null_precedence}; } else { @@ -84,9 +86,7 @@ struct row_lexicographic_tagged_comparator { } }(); - auto weak_order = comparator(l_indx, r_indx); - - return weak_order == weak_ordering::LESS; + return comparator(l_indx, r_indx) == weak_ordering::LESS; } private: @@ -638,6 +638,13 @@ table_ptr_type merge(std::vector const& tables_to_merge, CUDF_EXPECTS(key_cols.size() == column_order.size(), "Mismatched size between key_cols and column_order"); + CUDF_EXPECTS(std::accumulate(tables_to_merge.cbegin(), + tables_to_merge.cend(), + cudf::size_type{0}, + [](auto const& running_sum, auto const& tbl) { + return running_sum + tbl.num_rows(); + }) <= std::numeric_limits::max(), + "Total number of merged rows exceeds row limit"); // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views.