diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index ee1eaeaed47..636729a735e 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -347,7 +347,9 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p rmm::cuda_stream_view stream) const { CUDF_FUNC_RANGE(); - CUDF_EXPECTS(_hash_table, "Hash table of hash join is null."); + + // Return directly if build table is empty + if (_hash_table == nullptr) { return 0; } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -367,7 +369,7 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (!_hash_table) { return probe.num_rows(); } + if (_hash_table == nullptr) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -388,7 +390,7 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (!_hash_table) { return probe.num_rows(); } + if (_hash_table == nullptr) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -447,7 +449,7 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe, rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early - if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) { + if (_hash_table == nullptr and JoinKind != cudf::detail::join_kind::INNER_JOIN) { return get_trivial_left_join_indices(probe, stream, mr); } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index af998e366e9..8945f82baef 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -987,8 +987,26 @@ TEST_F(JoinTest, EmptyRightTableInnerJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); + { + auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); + } + + { + cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL); + + auto output_size = hash_join.inner_join_size(t0); + std::optional optional_size = output_size; + + std::size_t const size_gold = 0; + EXPECT_EQ(output_size, size_gold); + + auto result = hash_join.inner_join(t0, cudf::null_equality::EQUAL, optional_size); + column_wrapper col_gold_0{}; + column_wrapper col_gold_1{}; + auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); + } } TEST_F(JoinTest, EmptyRightTableLeftJoin) @@ -1008,8 +1026,26 @@ TEST_F(JoinTest, EmptyRightTableLeftJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); + { + auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); + } + + { + cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL); + + auto output_size = hash_join.left_join_size(t0); + std::optional optional_size = output_size; + + std::size_t const size_gold = 5; + EXPECT_EQ(output_size, size_gold); + + auto result = hash_join.left_join(t0, cudf::null_equality::EQUAL, optional_size); + column_wrapper col_gold_0{{0, 1, 2, 3, 4}}; + column_wrapper col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, NoneValue}}; + auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); + } } TEST_F(JoinTest, EmptyRightTableFullJoin) @@ -1029,8 +1065,26 @@ TEST_F(JoinTest, EmptyRightTableFullJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); + { + auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); + } + + { + cudf::hash_join hash_join(empty1, cudf::null_equality::EQUAL); + + auto output_size = hash_join.full_join_size(t0); + std::optional optional_size = output_size; + + std::size_t const size_gold = 5; + EXPECT_EQ(output_size, size_gold); + + auto result = hash_join.full_join(t0, cudf::null_equality::EQUAL, optional_size); + column_wrapper col_gold_0{{0, 1, 2, 3, 4}}; + column_wrapper col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, NoneValue}}; + auto const [sorted_gold, sorted_result] = gather_maps_as_tables(col_gold_0, col_gold_1, result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); + } } // Both tables empty