Skip to content

Commit

Permalink
Fix a bug: inner_join_size return zero if build table is empty (#9128)
Browse files Browse the repository at this point in the history
Closes #9092

This PR fixed a bug where `inner_join_size` would throw an exception if the build table is empty. Corresponding unit tests are added as well.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #9128
  • Loading branch information
PointKernel authored Aug 26, 2021
1 parent 86a4459 commit 4d8e401
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 10 deletions.
10 changes: 6 additions & 4 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,9 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p
rmm::cuda_stream_view stream) const
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(_hash_table, "Hash table of hash join is null.");

// Return directly if build table is empty
if (_hash_table == nullptr) { return 0; }

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE);
Expand All @@ -367,7 +369,7 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr
CUDF_FUNC_RANGE();

// Trivial left join case - exit early
if (!_hash_table) { return probe.num_rows(); }
if (_hash_table == nullptr) { return probe.num_rows(); }

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE);
Expand All @@ -388,7 +390,7 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr
CUDF_FUNC_RANGE();

// Trivial left join case - exit early
if (!_hash_table) { return probe.num_rows(); }
if (_hash_table == nullptr) { return probe.num_rows(); }

auto flattened_probe = structs::detail::flatten_nested_columns(
probe, {}, {}, structs::detail::column_nullability::FORCE);
Expand Down Expand Up @@ -447,7 +449,7 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe,
rmm::mr::device_memory_resource* mr) const
{
// Trivial left join case - exit early
if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) {
if (_hash_table == nullptr and JoinKind != cudf::detail::join_kind::INNER_JOIN) {
return get_trivial_left_join_indices(probe, stream, mr);
}

Expand Down
66 changes: 60 additions & 6 deletions cpp/tests/join/join_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -987,8 +987,26 @@ TEST_F(JoinTest, EmptyRightTableInnerJoin)
Table t0(std::move(cols0));
Table empty1(std::move(cols1));

auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
{
auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
}

{
cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL);

auto output_size = hash_join.inner_join_size(t0);
std::optional<std::size_t> optional_size = output_size;

std::size_t const size_gold = 0;
EXPECT_EQ(output_size, size_gold);

auto result = hash_join.inner_join(t0, cudf::null_equality::EQUAL, optional_size);
column_wrapper<int32_t> col_gold_0{};
column_wrapper<int32_t> col_gold_1{};
auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
}
}

TEST_F(JoinTest, EmptyRightTableLeftJoin)
Expand All @@ -1008,8 +1026,26 @@ TEST_F(JoinTest, EmptyRightTableLeftJoin)
Table t0(std::move(cols0));
Table empty1(std::move(cols1));

auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
{
auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
}

{
cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL);

auto output_size = hash_join.left_join_size(t0);
std::optional<std::size_t> optional_size = output_size;

std::size_t const size_gold = 5;
EXPECT_EQ(output_size, size_gold);

auto result = hash_join.left_join(t0, cudf::null_equality::EQUAL, optional_size);
column_wrapper<int32_t> col_gold_0{{0, 1, 2, 3, 4}};
column_wrapper<int32_t> col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, NoneValue}};
auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
}
}

TEST_F(JoinTest, EmptyRightTableFullJoin)
Expand All @@ -1029,8 +1065,26 @@ TEST_F(JoinTest, EmptyRightTableFullJoin)
Table t0(std::move(cols0));
Table empty1(std::move(cols1));

auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
{
auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1});
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
}

{
cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL);

auto output_size = hash_join.full_join_size(t0);
std::optional<std::size_t> optional_size = output_size;

std::size_t const size_gold = 5;
EXPECT_EQ(output_size, size_gold);

auto result = hash_join.full_join(t0, cudf::null_equality::EQUAL, optional_size);
column_wrapper<int32_t> col_gold_0{{0, 1, 2, 3, 4}};
column_wrapper<int32_t> col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, NoneValue}};
auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
}
}

// Both tables empty
Expand Down

0 comments on commit 4d8e401

Please sign in to comment.