From bf1c6ee050961f872782ee719165e9e2d58bb7ff Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 4 May 2022 10:15:44 -0700 Subject: [PATCH 1/6] split off weak ordering row operator changes --- cpp/CMakeLists.txt | 2 +- cpp/include/cudf/detail/structs/utilities.hpp | 16 +++- .../cudf/table/experimental/row_operators.cuh | 92 +++++++++++++++---- cpp/src/structs/utilities.cpp | 6 ++ 4 files changed, 98 insertions(+), 18 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cbe2811afe4..7870366b714 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -189,7 +189,6 @@ add_library( src/ast/expression_parser.cpp src/ast/expressions.cpp src/binaryop/binaryop.cpp - src/binaryop/compiled/binary_ops.cu src/binaryop/compiled/Add.cu src/binaryop/compiled/ATan2.cu src/binaryop/compiled/BitwiseAnd.cu @@ -220,6 +219,7 @@ add_library( src/binaryop/compiled/ShiftRightUnsigned.cu src/binaryop/compiled/Sub.cu src/binaryop/compiled/TrueDiv.cu + src/binaryop/compiled/binary_ops.cu src/binaryop/compiled/util.cpp src/labeling/label_bins.cu src/bitmask/null_mask.cu diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 751b7c00e8a..45d4c3b5ae4 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -245,6 +245,20 @@ std::tuple> superimpose_parent table_view const& table, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Checks if a column or any of its children is a struct column with structs that are null. + * + * This function searches for structs that are null -- differentiating between structs that are null + * and structs containing null values. Null structs add a column to the result of the flatten column + * utility and necessitates column_nullability::FORCE when flattening the column for comparison + * operations. + * + * @param col Column to check for null structs + * @return A boolean indicating if the column is or contains a struct column that contains a null + * struct. + */ +bool contains_null_structs(column_view const& col); } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 2ed45c71633..2a2b1f90d43 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -68,16 +68,17 @@ struct dispatch_void_if_nested { }; namespace row { - namespace lexicographic { /** - * @brief Computes whether one row is lexicographically *less* than another row. + * @brief Computes the lexicographic comparison between 2 rows. * * Lexicographic ordering is determined by: * - Two rows are compared element by element. * - The first mismatching element defines which row is lexicographically less * or greater than the other. + * - If the rows are compared without mismatched elements, the rows are equivalent + * * * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of * two words, for example, `aac` would be *less* than (or precede) `abb`. The @@ -88,8 +89,7 @@ namespace lexicographic { */ template class device_row_comparator { - friend class self_comparator; - + public: /** * @brief Construct a function object for performing a lexicographic * comparison between the rows of two tables. @@ -145,7 +145,11 @@ class device_row_comparator { column_device_view rhs, null_order null_precedence = null_order::BEFORE, int depth = 0) - : _lhs{lhs}, _rhs{rhs}, _nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth} + : _lhs{lhs}, + _rhs{rhs}, + _check_nulls{check_nulls}, + _null_precedence{null_precedence}, + _depth{depth} { } @@ -162,7 +166,7 @@ class device_row_comparator { __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { - if (_nulls) { + if (_check_nulls) { bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; @@ -211,7 +215,7 @@ class device_row_comparator { ++depth; } - auto const comparator = element_comparator{_nulls, lcol, rcol, _null_precedence, depth}; + auto const comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth}; return cudf::type_dispatcher( lcol.type(), comparator, lhs_element_index, rhs_element_index); } @@ -219,7 +223,7 @@ class device_row_comparator { private: column_device_view const _lhs; column_device_view const _rhs; - Nullate const _nulls; + Nullate const _check_nulls; null_order const _null_precedence; int const _depth; }; @@ -227,13 +231,14 @@ class device_row_comparator { public: /** * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less than the row at `rhs_index` in the `rhs` table. + * lexicographically less, greater, or equal to the row at `rhs_index` in the `rhs` table. * * @param lhs_index The index of row in the `lhs` table to examine * @param rhs_index The index of the row in the `rhs` table to examine - * @return `true` if row from the `lhs` table compares less than row in the `rhs` table + * @return weak ordering comparison of the row in the `lhs` table relative to the row in the `rhs` + * table */ - __device__ bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept + __device__ weak_ordering operator()(size_type lhs_index, size_type rhs_index) const noexcept { int last_null_depth = std::numeric_limits::max(); for (size_type i = 0; i < _lhs.num_columns(); ++i) { @@ -248,16 +253,17 @@ class device_row_comparator { auto const comparator = element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth}; - weak_ordering state; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); if (state == weak_ordering::EQUIVALENT) { continue; } - return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); + return ascending + ? state + : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER); } - return false; + return weak_ordering::EQUIVALENT; } private: @@ -269,6 +275,60 @@ class device_row_comparator { std::optional> const _null_precedence; }; // class device_row_comparator +/** + * @brief Wraps and interprets the result of device_row_comparator, true if the result is + * weak_ordering::LESS meaning one row is lexicographically *less* than another row. + * + * @tparam Nullate A cudf::nullate type describing whether to check for nulls. + */ +template +class device_less_comparator { + friend class self_comparator; + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables. + * + * @param has_nulls Indicates if either input table contains columns with nulls. + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param depth Optional, device array the same length as a row that contains starting depths of + * columns if they're nested, and 0 otherwise. + * @param column_order Optional, device array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If `nullopt`, then null precedence would be + * `null_order::BEFORE` for all columns. + */ + device_less_comparator( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt) noexcept + : comparator{check_nulls, lhs, rhs, depth, column_order, null_precedence} + { + } + + public: + /** + * @brief Checks whether the row at `lhs_index` in the `lhs` table compares + * lexicographically less than the row at `rhs_index` in the `rhs` table. + * + * @param lhs_index The index of row in the `lhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine + * @return `true` if row from the `lhs` table compares less than row in the `rhs` table + */ + __device__ bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept + { + return comparator(lhs_index, rhs_index) == weak_ordering::LESS; + } + + private: + device_row_comparator comparator; +}; // class device_less_comparator + struct preprocessed_table { using table_device_view_owner = std::invoke_result_t; @@ -417,9 +477,9 @@ class self_comparator { * @tparam Nullate A cudf::nullate type describing whether to check for nulls. */ template - device_row_comparator device_comparator(Nullate nullate = {}) const + device_less_comparator device_comparator(Nullate nullate = {}) const { - return device_row_comparator( + return device_less_comparator( nullate, *d_t, *d_t, d_t->depths(), d_t->column_order(), d_t->null_precedence()); } diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index a2c173cae5f..5baab0f09a2 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -441,6 +441,12 @@ std::tuple> superimpose_parent return {table_view{superimposed_columns}, std::move(superimposed_nullmasks)}; } +bool contains_null_structs(column_view const& col) +{ + return (is_struct(col) && col.has_nulls()) || + std::any_of(col.child_begin(), col.child_end(), contains_null_structs); +} + } // namespace detail } // namespace structs } // namespace cudf From 5d87db2ee34f1f41dbd59ea50d82c06bcc5ae1b4 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 4 May 2022 12:27:06 -0700 Subject: [PATCH 2/6] device_row_comparator private with friend class --- cpp/include/cudf/table/experimental/row_operators.cuh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 2a2b1f90d43..4aefa7cf948 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -70,6 +70,9 @@ struct dispatch_void_if_nested { namespace row { namespace lexicographic { +template +class device_less_comparator; + /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -89,7 +92,7 @@ namespace lexicographic { */ template class device_row_comparator { - public: + friend class device_less_comparator; /** * @brief Construct a function object for performing a lexicographic * comparison between the rows of two tables. From 2dd2045d54b1e296fa0fe011f2d28efd0d6c23d7 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Fri, 6 May 2022 14:36:28 -0700 Subject: [PATCH 3/6] device_less conversion to templated struct --- .../cudf/table/experimental/row_operators.cuh | 92 ++++++++----------- 1 file changed, 39 insertions(+), 53 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 4aefa7cf948..0319c7e682c 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -45,6 +45,7 @@ #include namespace cudf { + namespace experimental { /** @@ -70,9 +71,6 @@ struct dispatch_void_if_nested { namespace row { namespace lexicographic { -template -class device_less_comparator; - /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -92,7 +90,8 @@ class device_less_comparator; */ template class device_row_comparator { - friend class device_less_comparator; + // friend class device_less_comparator; + friend class self_comparator; /** * @brief Construct a function object for performing a lexicographic * comparison between the rows of two tables. @@ -234,7 +233,7 @@ class device_row_comparator { public: /** * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less, greater, or equal to the row at `rhs_index` in the `rhs` table. + * lexicographically less, greater, or equivalent to the row at `rhs_index` in the `rhs` table. * * @param lhs_index The index of row in the `lhs` table to examine * @param rhs_index The index of the row in the `rhs` table to examine @@ -278,6 +277,32 @@ class device_row_comparator { std::optional> const _null_precedence; }; // class device_row_comparator +template +__device__ bool any_equal(weak_ordering result, weak_ordering v, WeakOrderings... vs) +{ + if constexpr (sizeof...(WeakOrderings) == 0) { + return result == v; + } else { + return result == v or any_equal(result, vs...); + } +} + +/** + * @brief Wraps and interprets the result of templated Comparator that returns a weak_ordering. + * Returns true if the weak_ordering matches any of the templated values. + * + * @tparam Comparator generic comparator that returns a weak_ordering. + * @tparam values weak_ordering parameter pack of orderings to interpret as true + */ +template +struct weak_ordering_comparator_impl { + __device__ bool operator()(size_type const& lhs, size_type const& rhs) + { + return any_equal(comparator(lhs, rhs), values...); + } + Comparator comparator; +}; + /** * @brief Wraps and interprets the result of device_row_comparator, true if the result is * weak_ordering::LESS meaning one row is lexicographically *less* than another row. @@ -285,52 +310,13 @@ class device_row_comparator { * @tparam Nullate A cudf::nullate type describing whether to check for nulls. */ template -class device_less_comparator { - friend class self_comparator; - /** - * @brief Construct a function object for performing a lexicographic - * comparison between the rows of two tables. - * - * @param has_nulls Indicates if either input table contains columns with nulls. - * @param lhs The first table - * @param rhs The second table (may be the same table as `lhs`) - * @param depth Optional, device array the same length as a row that contains starting depths of - * columns if they're nested, and 0 otherwise. - * @param column_order Optional, device array the same length as a row that indicates the desired - * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are - * sorted in ascending order. - * @param null_precedence Optional, device array the same length as a row and indicates how null - * values compare to all other for every column. If `nullopt`, then null precedence would be - * `null_order::BEFORE` for all columns. - */ - device_less_comparator( - Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt) noexcept - : comparator{check_nulls, lhs, rhs, depth, column_order, null_precedence} - { - } +using less_comparator = + weak_ordering_comparator_impl, weak_ordering::LESS>; - public: - /** - * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less than the row at `rhs_index` in the `rhs` table. - * - * @param lhs_index The index of row in the `lhs` table to examine - * @param rhs_index The index of the row in the `rhs` table to examine - * @return `true` if row from the `lhs` table compares less than row in the `rhs` table - */ - __device__ bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept - { - return comparator(lhs_index, rhs_index) == weak_ordering::LESS; - } - - private: - device_row_comparator comparator; -}; // class device_less_comparator +template +using less_equivalent_comparator = weak_ordering_comparator_impl, + weak_ordering::LESS, + weak_ordering::EQUIVALENT>; struct preprocessed_table { using table_device_view_owner = @@ -480,10 +466,10 @@ class self_comparator { * @tparam Nullate A cudf::nullate type describing whether to check for nulls. */ template - device_less_comparator device_comparator(Nullate nullate = {}) const + less_comparator device_comparator(Nullate nullate = {}) const { - return device_less_comparator( - nullate, *d_t, *d_t, d_t->depths(), d_t->column_order(), d_t->null_precedence()); + return less_comparator{device_row_comparator( + nullate, *d_t, *d_t, d_t->depths(), d_t->column_order(), d_t->null_precedence())}; } private: From 7ba960e0a5fe792041e641aac903542a098d0e53 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 9 May 2022 14:16:35 -0700 Subject: [PATCH 4/6] fold parameter pack --- .../cudf/table/experimental/row_operators.cuh | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 0319c7e682c..76daa6dbca4 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -277,16 +277,6 @@ class device_row_comparator { std::optional> const _null_precedence; }; // class device_row_comparator -template -__device__ bool any_equal(weak_ordering result, weak_ordering v, WeakOrderings... vs) -{ - if constexpr (sizeof...(WeakOrderings) == 0) { - return result == v; - } else { - return result == v or any_equal(result, vs...); - } -} - /** * @brief Wraps and interprets the result of templated Comparator that returns a weak_ordering. * Returns true if the weak_ordering matches any of the templated values. @@ -295,12 +285,12 @@ __device__ bool any_equal(weak_ordering result, weak_ordering v, WeakOrderings.. * @tparam values weak_ordering parameter pack of orderings to interpret as true */ template -struct weak_ordering_comparator_impl { - __device__ bool operator()(size_type const& lhs, size_type const& rhs) - { - return any_equal(comparator(lhs, rhs), values...); - } - Comparator comparator; +struct weak_ordering_comparator_impl{ + __device__ bool operator()(size_type const& lhs, size_type const& rhs){ + weak_ordering const result = comparator(lhs, rhs); + return ( (result == values) || ...); + } + Comparator comparator; }; /** From 84833e7f3f2f6fe4ccec3de890cb9966b29c9afd Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 9 May 2022 18:53:30 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Bradley Dice --- cpp/include/cudf/table/experimental/row_operators.cuh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 76daa6dbca4..98ccd6c2b8c 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -90,7 +90,6 @@ namespace lexicographic { */ template class device_row_comparator { - // friend class device_less_comparator; friend class self_comparator; /** * @brief Construct a function object for performing a lexicographic @@ -281,6 +280,9 @@ class device_row_comparator { * @brief Wraps and interprets the result of templated Comparator that returns a weak_ordering. * Returns true if the weak_ordering matches any of the templated values. * + * Note that this should never be used with only `weak_ordering::EQUIVALENT`. + * An equality comparator should be used instead for optimal performance. + * * @tparam Comparator generic comparator that returns a weak_ordering. * @tparam values weak_ordering parameter pack of orderings to interpret as true */ From 4d197eae7e2a68b8da86e8561b54629e18a3b8e3 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 9 May 2022 20:18:36 -0700 Subject: [PATCH 6/6] fix code style --- .../cudf/table/experimental/row_operators.cuh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 76daa6dbca4..98d08c28d31 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -285,12 +285,13 @@ class device_row_comparator { * @tparam values weak_ordering parameter pack of orderings to interpret as true */ template -struct weak_ordering_comparator_impl{ - __device__ bool operator()(size_type const& lhs, size_type const& rhs){ - weak_ordering const result = comparator(lhs, rhs); - return ( (result == values) || ...); - } - Comparator comparator; +struct weak_ordering_comparator_impl { + __device__ bool operator()(size_type const& lhs, size_type const& rhs) + { + weak_ordering const result = comparator(lhs, rhs); + return ((result == values) || ...); + } + Comparator comparator; }; /**