Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix conditional joins with empty left table #9146

Merged
merged 6 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions cpp/src/join/conditional_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,8 @@ conditional_join(table_view const& left,
// null index for the right table; in others, we return an empty output.
if (right.num_rows() == 0) {
switch (join_type) {
// Left, left anti, and full (which are effectively left because we are
// guaranteed that left has more rows than right) all return a all the
// row indices from left with a corresponding NULL from the right.
// Left, left anti, and full all return all the row indices from left
// with a corresponding NULL from the right.
case join_kind::LEFT_JOIN:
case join_kind::LEFT_ANTI_JOIN:
case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left, stream);
Expand All @@ -61,6 +60,23 @@ conditional_join(table_view const& left,
case join_kind::LEFT_SEMI_JOIN:
return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
default: CUDF_FAIL("Invalid join kind."); break;
}
} else if (left.num_rows() == 0) {
switch (join_type) {
// Left, left anti, left semi, and inner joins all return empty sets.
case join_kind::LEFT_JOIN:
case join_kind::LEFT_ANTI_JOIN:
case join_kind::INNER_JOIN:
case join_kind::LEFT_SEMI_JOIN:
return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
// Full joins need to return the trivial complement.
case join_kind::FULL_JOIN: {
auto ret_flipped = get_trivial_left_join_indices(right, stream);
return std::make_pair(std::move(ret_flipped.second), std::move(ret_flipped.first));
}
vyasr marked this conversation as resolved.
Show resolved Hide resolved
default: CUDF_FAIL("Invalid join kind."); break;
}
}

Expand Down Expand Up @@ -118,8 +134,14 @@ conditional_join(table_view const& left,

// If the output size will be zero, we can return immediately.
if (join_size == 0) {
return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
auto join_indices{
std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr))};
if (join_type == join_kind::FULL_JOIN) {
auto complement_indices = detail::get_left_join_indices_complement(
join_indices.second, left.num_rows(), right.num_rows(), stream, mr);
return detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
}
vyasr marked this conversation as resolved.
Show resolved Hide resolved
}

rmm::device_scalar<size_type> write_index(0, stream);
Expand Down Expand Up @@ -177,20 +199,31 @@ std::size_t compute_conditional_join_output_size(table_view const& left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// We can immediately filter out cases where the right table is empty. In
// some cases, we return all the rows of the left table with a corresponding
// null index for the right table; in others, we return an empty output.
// We can immediately filter out cases where one table is empty. In
// some cases, we return all the rows of the other table with a corresponding
// null index for the empty table; in others, we return an empty output.
if (right.num_rows() == 0) {
switch (join_type) {
// Left, left anti, and full (which are effectively left because we are
// guaranteed that left has more rows than right) all return a all the
// row indices from left with a corresponding NULL from the right.
// Left, left anti, and full all return all the row indices from left
// with a corresponding NULL from the right.
case join_kind::LEFT_JOIN:
case join_kind::LEFT_ANTI_JOIN:
case join_kind::FULL_JOIN: return left.num_rows();
// Inner and left semi joins return empty output because no matches can exist.
case join_kind::INNER_JOIN:
case join_kind::LEFT_SEMI_JOIN: return 0;
default: CUDF_FAIL("Invalid join kind."); break;
}
} else if (left.num_rows() == 0) {
switch (join_type) {
// Left, left anti, left semi, and inner joins all return empty sets.
case join_kind::LEFT_JOIN:
case join_kind::LEFT_ANTI_JOIN:
case join_kind::INNER_JOIN:
case join_kind::LEFT_SEMI_JOIN: return 0;
// Full joins need to return the trivial complement.
case join_kind::FULL_JOIN: return right.num_rows();
default: CUDF_FAIL("Invalid join kind."); break;
}
}

Expand Down
18 changes: 18 additions & 0 deletions cpp/tests/join/conditional_join_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,11 @@ TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnOneRowAllEqual)
this->test({{0}}, {{0}}, left_zero_eq_right_zero, {{0, 0}});
};

TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnLeftEmpty)
{
this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {});
};

TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnTwoRowAllEqual)
{
this->test({{0, 1}}, {{0, 0}}, left_zero_eq_right_zero, {{0, 0}, {0, 1}});
Expand Down Expand Up @@ -489,6 +494,11 @@ TYPED_TEST(ConditionalLeftJoinTest, TestTwoColumnThreeRowSomeEqual)
{{0, 0}, {1, 1}, {2, JoinNoneValue}});
};

TYPED_TEST(ConditionalLeftJoinTest, TestOneColumnLeftEmpty)
{
this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {});
};

TYPED_TEST(ConditionalLeftJoinTest, TestCompareRandomToHash)
{
// Generate columns of 10 repeats of the integer range [0, 10), then merge
Expand Down Expand Up @@ -560,6 +570,14 @@ TYPED_TEST(ConditionalFullJoinTest, TestOneColumnNoneEqual)
{JoinNoneValue, 2}});
};

TYPED_TEST(ConditionalFullJoinTest, TestOneColumnLeftEmpty)
{
this->test({{}},
{{3, 4, 5}},
left_zero_eq_right_zero,
{{JoinNoneValue, 0}, {JoinNoneValue, 1}, {JoinNoneValue, 2}});
};

TYPED_TEST(ConditionalFullJoinTest, TestTwoColumnThreeRowSomeEqual)
{
this->test({{0, 1, 2}, {10, 20, 30}},
Expand Down