Skip to content

Commit

Permalink
Add a new nullable function for table_views and address various minor…
Browse files Browse the repository at this point in the history
… outstanding PR comments from PR rapidsai#8214.
  • Loading branch information
vyasr committed Jul 21, 2021
1 parent 61ebf96 commit cade875
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 16 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/ast/detail/linearizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class linearizer {
* @param right The right table used for evaluating the abstract syntax tree.
*/
linearizer(detail::node const& expr, cudf::table_view left, cudf::table_view right)
: _left(left), _right(right), _node_count(0), _intermediate_counter()
: _left{left}, _right{right}, _node_count{0}, _intermediate_counter{}
{
expr.accept(*this);
}
Expand All @@ -120,7 +120,7 @@ class linearizer {
* @param table The table used for evaluating the abstract syntax tree.
*/
linearizer(detail::node const& expr, cudf::table_view table)
: _left(table), _right(table), _node_count(0), _intermediate_counter()
: _left{table}, _right{table}, _node_count{0}, _intermediate_counter{}
{
expr.accept(*this);
}
Expand Down
20 changes: 20 additions & 0 deletions cpp/include/cudf/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,9 @@ class hash_join {
* The corresponding values in the second returned vector are
* the matched row indices from the right table.
*
* If the provided predicate returns NULL for a pair of pair of rows
* (left, right), that pair is not included in the output.
*
* @code{.pseudo}
* Left: {{0, 1, 2}}
* Right: {{1, 2, 3}}
Expand All @@ -672,6 +675,7 @@ class hash_join {
*
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
* @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
*
* @param left The left table
* @param right The right table
Expand Down Expand Up @@ -702,6 +706,9 @@ conditional_inner_join(
* from the right table, if there is a match or (2) an unspecified
* out-of-bounds value.
*
* If the provided predicate returns NULL for a pair of pair of rows
* (left, right), that pair is not included in the output.
*
* @code{.pseudo}
* Left: {{0, 1, 2}}
* Right: {{1, 2, 3}}
Expand All @@ -716,6 +723,7 @@ conditional_inner_join(
*
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
* @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
*
* @param left The left table
* @param right The right table
Expand Down Expand Up @@ -744,6 +752,9 @@ conditional_left_join(table_view left,
* right tables, (2) a row index and an unspecified out-of-bounds value,
* representing a row from one table without a match in the other.
*
* If the provided predicate returns NULL for a pair of pair of rows
* (left, right), that pair is not included in the output.
*
* @code{.pseudo}
* Left: {{0, 1, 2}}
* Right: {{1, 2, 3}}
Expand All @@ -758,6 +769,7 @@ conditional_left_join(table_view left,
*
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
* @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
*
* @param left The left table
* @param right The right table
Expand All @@ -781,6 +793,9 @@ conditional_full_join(table_view left,
* for which there exists some row in the right table where the predicate
* evaluates to true.
*
* If the provided predicate returns NULL for a pair of pair of rows
* (left, right), that pair is not included in the output.
*
* @code{.pseudo}
* Left: {{0, 1, 2}}
* Right: {{1, 2, 3}}
Expand All @@ -795,6 +810,7 @@ conditional_full_join(table_view left,
*
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
* @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
*
* @param left The left table
* @param right The right table
Expand All @@ -818,6 +834,9 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
* for which there does not exist any row in the right table where the
* predicate evaluates to true.
*
* If the provided predicate returns NULL for a pair of pair of rows
* (left, right), that pair is not included in the output.
*
* @code{.pseudo}
* Left: {{0, 1, 2}}
* Right: {{1, 2, 3}}
Expand All @@ -832,6 +851,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
*
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
* @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
*
* @param left The left table
* @param right The right table
Expand Down
5 changes: 5 additions & 0 deletions cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ class mutable_table_view : public detail::table_view_base<mutable_column_view> {
mutable_table_view(std::vector<mutable_table_view> const& views);
};

inline bool nullable(table_view const& view)
{
return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.nullable(); });
}

inline bool has_nulls(table_view const& view)
{
return std::any_of(view.begin(), view.end(), [](auto const& col) { return col.has_nulls(); });
Expand Down
7 changes: 2 additions & 5 deletions cpp/src/ast/transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,8 @@ std::unique_ptr<column> compute_column(table_view const table,
// If none of the input columns actually contain nulls, we can still use the
// non-nullable version of the expression evaluation code path for
// performance, so we capture that information as well.
auto const nullable =
std::any_of(table.begin(), table.end(), [](column_view c) { return c.nullable(); });
auto const has_nulls = nullable && std::any_of(table.begin(), table.end(), [](column_view c) {
return c.nullable() && c.has_nulls();
});
auto const nullable = cudf::nullable(table);
auto const has_nulls = nullable && cudf::has_nulls(table);

auto const plan = ast_plan{expr, table, has_nulls, stream, mr};

Expand Down
11 changes: 2 additions & 9 deletions cpp/src/join/nested_loop_join.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,8 @@ get_conditional_join_indices(table_view const& left,
// If none of the input columns actually contain nulls, we can still use the
// non-nullable version of the expression evaluation code path for
// performance, so we capture that information as well.
auto const nullable =
std::any_of(left.begin(), left.end(), [](column_view c) { return c.nullable(); }) ||
std::any_of(right.begin(), right.end(), [](column_view c) { return c.nullable(); });
auto const has_nulls =
nullable &&
(std::any_of(
left.begin(), left.end(), [](column_view c) { return c.nullable() && c.has_nulls(); }) ||
std::any_of(
right.begin(), right.end(), [](column_view c) { return c.nullable() && c.has_nulls(); }));
auto const nullable = cudf::nullable(left) || cudf::nullable(right);
auto const has_nulls = nullable && (cudf::has_nulls(left) || cudf::has_nulls(right));

auto const plan = ast::detail::ast_plan{binary_pred, left, right, has_nulls, stream, mr};
CUDF_EXPECTS(plan.output_type().id() == type_id::BOOL8,
Expand Down

0 comments on commit cade875

Please sign in to comment.