From cade8755e38b8a7fd5a25e110dd10df160c4f121 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 13 Jul 2021 09:17:07 -0700 Subject: [PATCH] Add a new nullable function for table_views and address various minor outstanding PR comments from PR #8214. --- cpp/include/cudf/ast/detail/linearizer.hpp | 4 ++-- cpp/include/cudf/join.hpp | 20 ++++++++++++++++++++ cpp/include/cudf/table/table_view.hpp | 5 +++++ cpp/src/ast/transform.cu | 7 ++----- cpp/src/join/nested_loop_join.cuh | 11 ++--------- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index 67474e08877..3e10714aea9 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -108,7 +108,7 @@ class linearizer { * @param right The right table used for evaluating the abstract syntax tree. */ linearizer(detail::node const& expr, cudf::table_view left, cudf::table_view right) - : _left(left), _right(right), _node_count(0), _intermediate_counter() + : _left{left}, _right{right}, _node_count{0}, _intermediate_counter{} { expr.accept(*this); } @@ -120,7 +120,7 @@ class linearizer { * @param table The table used for evaluating the abstract syntax tree. */ linearizer(detail::node const& expr, cudf::table_view table) - : _left(table), _right(table), _node_count(0), _intermediate_counter() + : _left{table}, _right{table}, _node_count{0}, _intermediate_counter{} { expr.accept(*this); } diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 725c0fc3699..1cc2069335f 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -658,6 +658,9 @@ class hash_join { * The corresponding values in the second returned vector are * the matched row indices from the right table. * + * If the provided predicate returns NULL for a pair of pair of rows + * (left, right), that pair is not included in the output. + * * @code{.pseudo} * Left: {{0, 1, 2}} * Right: {{1, 2, 3}} @@ -672,6 +675,7 @@ class hash_join { * * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` * mismatch. + * @throw cudf::logic_error if the binary predicate outputs a non-boolean result. * * @param left The left table * @param right The right table @@ -702,6 +706,9 @@ conditional_inner_join( * from the right table, if there is a match or (2) an unspecified * out-of-bounds value. * + * If the provided predicate returns NULL for a pair of pair of rows + * (left, right), that pair is not included in the output. + * * @code{.pseudo} * Left: {{0, 1, 2}} * Right: {{1, 2, 3}} @@ -716,6 +723,7 @@ conditional_inner_join( * * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` * mismatch. + * @throw cudf::logic_error if the binary predicate outputs a non-boolean result. * * @param left The left table * @param right The right table @@ -744,6 +752,9 @@ conditional_left_join(table_view left, * right tables, (2) a row index and an unspecified out-of-bounds value, * representing a row from one table without a match in the other. * + * If the provided predicate returns NULL for a pair of pair of rows + * (left, right), that pair is not included in the output. + * * @code{.pseudo} * Left: {{0, 1, 2}} * Right: {{1, 2, 3}} @@ -758,6 +769,7 @@ conditional_left_join(table_view left, * * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` * mismatch. + * @throw cudf::logic_error if the binary predicate outputs a non-boolean result. * * @param left The left table * @param right The right table @@ -781,6 +793,9 @@ conditional_full_join(table_view left, * for which there exists some row in the right table where the predicate * evaluates to true. * + * If the provided predicate returns NULL for a pair of pair of rows + * (left, right), that pair is not included in the output. + * * @code{.pseudo} * Left: {{0, 1, 2}} * Right: {{1, 2, 3}} @@ -795,6 +810,7 @@ conditional_full_join(table_view left, * * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` * mismatch. + * @throw cudf::logic_error if the binary predicate outputs a non-boolean result. * * @param left The left table * @param right The right table @@ -818,6 +834,9 @@ std::unique_ptr> conditional_left_semi_join( * for which there does not exist any row in the right table where the * predicate evaluates to true. * + * If the provided predicate returns NULL for a pair of pair of rows + * (left, right), that pair is not included in the output. + * * @code{.pseudo} * Left: {{0, 1, 2}} * Right: {{1, 2, 3}} @@ -832,6 +851,7 @@ std::unique_ptr> conditional_left_semi_join( * * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys` * mismatch. + * @throw cudf::logic_error if the binary predicate outputs a non-boolean result. * * @param left The left table * @param right The right table diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 1ff701c3b01..81d6050b1c6 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -257,6 +257,11 @@ class mutable_table_view : public detail::table_view_base { mutable_table_view(std::vector const& views); }; +inline bool nullable(table_view const& view) +{ + return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.nullable(); }); +} + inline bool has_nulls(table_view const& view) { return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.has_nulls(); }); diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index 7aa89635c54..f3c02949585 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -93,11 +93,8 @@ std::unique_ptr compute_column(table_view const table, // If none of the input columns actually contain nulls, we can still use the // non-nullable version of the expression evaluation code path for // performance, so we capture that information as well. - auto const nullable = - std::any_of(table.begin(), table.end(), [](column_view c) { return c.nullable(); }); - auto const has_nulls = nullable && std::any_of(table.begin(), table.end(), [](column_view c) { - return c.nullable() && c.has_nulls(); - }); + auto const nullable = cudf::nullable(table); + auto const has_nulls = nullable && cudf::has_nulls(table); auto const plan = ast_plan{expr, table, has_nulls, stream, mr}; diff --git a/cpp/src/join/nested_loop_join.cuh b/cpp/src/join/nested_loop_join.cuh index 9848477a894..f1e35d5422a 100644 --- a/cpp/src/join/nested_loop_join.cuh +++ b/cpp/src/join/nested_loop_join.cuh @@ -85,15 +85,8 @@ get_conditional_join_indices(table_view const& left, // If none of the input columns actually contain nulls, we can still use the // non-nullable version of the expression evaluation code path for // performance, so we capture that information as well. - auto const nullable = - std::any_of(left.begin(), left.end(), [](column_view c) { return c.nullable(); }) || - std::any_of(right.begin(), right.end(), [](column_view c) { return c.nullable(); }); - auto const has_nulls = - nullable && - (std::any_of( - left.begin(), left.end(), [](column_view c) { return c.nullable() && c.has_nulls(); }) || - std::any_of( - right.begin(), right.end(), [](column_view c) { return c.nullable() && c.has_nulls(); })); + auto const nullable = cudf::nullable(left) || cudf::nullable(right); + auto const has_nulls = nullable && (cudf::has_nulls(left) || cudf::has_nulls(right)); auto const plan = ast::detail::ast_plan{binary_pred, left, right, has_nulls, stream, mr}; CUDF_EXPECTS(plan.output_type().id() == type_id::BOOL8,