From 9f573016959754e3272b3b9b0f09583d0a5529a3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 19 Jul 2022 12:13:49 -0700 Subject: [PATCH] Remove legacy join APIs (#11274) Resolves #7762 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) - Mike Wilson (https://github.com/hyperbolic2346) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/11274 --- cpp/benchmarks/join/join.cu | 18 +- cpp/benchmarks/join/join_common.hpp | 11 +- cpp/benchmarks/join/left_join.cu | 44 +- cpp/include/cudf/join.hpp | 250 -------- cpp/src/join/join.cu | 174 ------ cpp/src/join/semi_join.cu | 122 ---- cpp/tests/join/join_tests.cpp | 162 ++++-- cpp/tests/join/semi_anti_join_tests.cpp | 78 ++- java/src/main/java/ai/rapids/cudf/Table.java | 170 ------ java/src/main/native/src/TableJni.cpp | 245 -------- .../test/java/ai/rapids/cudf/TableTest.java | 550 ------------------ 11 files changed, 214 insertions(+), 1610 deletions(-) diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu index f21356aff02..b42cda7f24c 100644 --- a/cpp/benchmarks/join/join.cu +++ b/cpp/benchmarks/join/join.cu @@ -44,12 +44,10 @@ void nvbench_inner_join(nvbench::state& state, auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) { - cudf::hash_join hj_obj(left_input.select(left_on), compare_nulls, stream); - return hj_obj.inner_join(right_input.select(right_on), std::nullopt, stream); + cudf::hash_join hj_obj(left_input, compare_nulls, stream); + return hj_obj.inner_join(right_input, std::nullopt, stream); }; BM_join(state, join); @@ -66,12 +64,10 @@ void nvbench_left_join(nvbench::state& state, auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) { - cudf::hash_join hj_obj(left_input.select(left_on), compare_nulls, stream); - return hj_obj.left_join(right_input.select(right_on), std::nullopt, stream); + cudf::hash_join hj_obj(left_input, compare_nulls, stream); + return hj_obj.left_join(right_input, std::nullopt, stream); }; BM_join(state, join); @@ -88,12 +84,10 @@ void nvbench_full_join(nvbench::state& state, auto join = [](cudf::table_view const& left_input, cudf::table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) { - cudf::hash_join hj_obj(left_input.select(left_on), compare_nulls, stream); - return hj_obj.full_join(right_input.select(right_on), std::nullopt, stream); + cudf::hash_join hj_obj(left_input, compare_nulls, stream); + return hj_obj.full_join(right_input, std::nullopt, stream); }; BM_join(state, join); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 7d80b42529e..6762b9c1f34 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -143,17 +143,16 @@ static void BM_join(state_type& state, Join JoinFunc) for (auto _ : state) { cuda_event_timer raii(state, true, cudf::default_stream_value); - auto result = JoinFunc( - probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); + auto result = JoinFunc(probe_table.select(columns_to_join), + build_table.select(columns_to_join), + cudf::null_equality::UNEQUAL); } } if constexpr (std::is_same_v and (not is_conditional)) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = JoinFunc(probe_table, - build_table, - columns_to_join, - columns_to_join, + auto result = JoinFunc(probe_table.select(columns_to_join), + build_table.select(columns_to_join), cudf::null_equality::UNEQUAL, stream_view); }); diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu index 58a1c2d7f29..5c1e5483ad4 100644 --- a/cpp/benchmarks/join/left_join.cu +++ b/cpp/benchmarks/join/left_join.cu @@ -20,18 +20,16 @@ template class Join : public cudf::benchmark { }; -#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - std::vector const& left_on, \ - std::vector const& right_on, \ - cudf::null_equality compare_nulls) { \ - return cudf::left_anti_join(left, right, left_on, right_on, compare_nulls); \ - }; \ - BM_join(st, join); \ +#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_anti_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ } LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, int32_t, false); @@ -39,18 +37,16 @@ LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, int64_t, false); LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, int32_t, true); LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, int64_t, true); -#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) \ - { \ - auto join = [](cudf::table_view const& left, \ - cudf::table_view const& right, \ - std::vector const& left_on, \ - std::vector const& right_on, \ - cudf::null_equality compare_nulls) { \ - return cudf::left_semi_join(left, right, left_on, right_on, compare_nulls); \ - }; \ - BM_join(st, join); \ +#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_semi_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ } LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, int32_t, false); diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index d4d6e44509f..bc3bfef3a7d 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -87,51 +87,6 @@ inner_join(cudf::table_view const& left_keys, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs an inner join on the specified columns of two - * tables (`left`, `right`) - * - * Inner Join returns rows from both tables as long as the values - * in the columns being joined on match. - * - * @code{.pseudo} - * Left: {{0, 1, 2}} - * Right: {{4, 9, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {1} - * Result: {{1, 2}, {4, 9}, {1, 2}} - * @endcode - * - * @throw cudf::logic_error if number of elements in `left_on` or `right_on` - * mismatch. - * @throw cudf::logic_error if number of columns in either `left` or `right` - * table is 0 or exceeds MAX_JOIN_SIZE - * @throw std::out_of_range if element of `left_on` or `right_on` exceed the - * number of columns in the left or right table. - * - * @param[in] left The left table - * @param[in] right The right table - * @param[in] left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` will be compared against the column - * from `right` indicated by `right_on[i]`. - * @param[in] right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` will be compared against the column - * from `left` indicated by `left_on[i]`. - * @param[in] compare_nulls controls whether null join-key values - * should match or not. - * @param mr Device memory resource used to allocate the returned table and columns' device memory - * - * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr inner_join( - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a pair of row index vectors corresponding to a * left join between the specified tables. @@ -172,59 +127,6 @@ left_join(cudf::table_view const& left_keys, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a left join (also known as left outer join) on the - * specified columns of two tables (`left`, `right`) - * - * Left join returns all the rows from the left table and those rows from the - * right table that match on the joined columns. - * For rows from the right table that do not have a match, the corresponding - * values in the left columns will be null. - * - * @code{.pseudo} - * Left: {{0, 1, 2}} - * Right: {{1, 2, 3}, {1, 2 ,5}} - * left_on: {0} - * right_on: {1} - * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} } - * - * Left: {{0, 1, 2}} - * Right {{1, 2, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {0} - * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} } - * @endcode - * - * @throw cudf::logic_error if number of elements in `left_on` or `right_on` - * mismatch. - * @throw cudf::logic_error if number of columns in either `left` or `right` - * table is 0 or exceeds MAX_JOIN_SIZE - * @throw std::out_of_range if element of `left_on` or `right_on` exceed the - * number of columns in the left or right table. - * - * @param[in] left The left table - * @param[in] right The right table - * @param[in] left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` will be compared against the column - * from `right` indicated by `right_on[i]`. - * @param[in] right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` will be compared against the column - * from `left` indicated by `left_on[i]`. - * @param[in] compare_nulls controls whether null join-key values - * should match or not. - * @param mr Device memory resource used to allocate the returned table and columns' device memory - * - * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr left_join( - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a pair of row index vectors corresponding to a * full join between the specified tables. @@ -264,59 +166,6 @@ full_join(cudf::table_view const& left_keys, null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a full join (also known as full outer join) on the - * specified columns of two tables (`left`, `right`) - * - * Full Join returns the rows that would be returned by a left join and those - * rows from the right table that do not have a match. - * For rows from the right table that do not have a match, the corresponding - * values in the left columns will be null. - * - * @code{.pseudo} - * Left: {{0, 1, 2}} - * Right: {{1, 2, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {1} - * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} } - * - * Left: {{0, 1, 2}} - * Right: {{1, 2, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {0} - * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} } - * @endcode - * - * @throw cudf::logic_error if number of elements in `left_on` or `right_on` - * mismatch. - * @throw cudf::logic_error if number of columns in either `left` or `right` - * table is 0 or exceeds MAX_JOIN_SIZE - * @throw std::out_of_range if element of `left_on` or `right_on` exceed the - * number of columns in the left or right table. - * - * @param[in] left The left table - * @param[in] right The right table - * @param[in] left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` will be compared against the column - * from `right` indicated by `right_on[i]`. - * @param[in] right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` will be compared against the column - * from `left` indicated by `left_on[i]`. - * @param[in] compare_nulls controls whether null join-key values - * should match or not. - * @param mr Device memory resource used to allocate the returned table and columns' device memory - * - * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr full_join( - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a vector of row indices corresponding to a left semi join * between the specified tables. @@ -349,54 +198,6 @@ std::unique_ptr> left_semi_join( null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a left semi join on the specified columns of two - * tables (`left`, `right`) - * - * A left semi join only returns data from the left table, and only - * returns rows that exist in the right table. - * - * @code{.pseudo} - * TableA: {{0, 1, 2}} - * TableB: {{1, 2, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {1} - * Result: { {1, 2} } - * - * TableA {{0, 1, 2}, {1, 2, 5}} - * TableB {{1, 2, 3}} - * left_on: {0} - * right_on: {0} - * Result: { {1, 2}, {2, 5} } - * @endcode - * - * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0 - * - * @param[in] left The left table - * @param[in] right The right table - * @param[in] left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` - * will be compared against the column from `right` - * indicated by `right_on[i]`. - * @param[in] right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` - * will be compared against the column from `left` - * indicated by `left_on[i]`. - * @param[in] compare_nulls Controls whether null join-key values should match or not - * @param[in] mr Device memory resource used to allocate the returned table's - * device memory - * - * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr left_semi_join( - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a vector of row indices corresponding to a left anti join * between the specified tables. @@ -428,57 +229,6 @@ std::unique_ptr> left_anti_join( null_equality compare_nulls = null_equality::EQUAL, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a left anti join on the specified columns of two - * tables (`left`, `right`) - * - * A left anti join only returns data from the left table, and only - * returns rows that do not exist in the right table. - * - * @code{.pseudo} - * TableA: {{0, 1, 2}} - * TableB: {{1, 2, 3}, {1, 2, 5}} - * left_on: {0} - * right_on: {1} - * Result: {{0}} - * - * TableA: {{0, 1, 2}, {1, 2, 5}} - * TableB: {{1, 2, 3}} - * left_on: {0} - * right_on: {0} - * Result: { {0}, {1} } - * @endcode - * - * @throw cudf::logic_error if number of elements in `left_on` or `right_on` - * mismatch. - * @throw cudf::logic_error if number of columns in either `left` or `right` - * table is 0 or exceeds MAX_JOIN_SIZE - * - * @param[in] left The left table - * @param[in] right The right table - * @param[in] left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` - * will be compared against the column from `right` - * indicated by `right_on[i]`. - * @param[in] right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` - * will be compared against the column from `left` - * indicated by `left_on[i]`. - * @param[in] compare_nulls Controls whether null join-key values should match or not - * @param[in] mr Device memory resource used to allocate the returned table's - * device memory - * - * @return Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr left_anti_join( - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls = null_equality::EQUAL, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Performs a cross join on two tables (`left`, `right`) * diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index 829ff914dfd..bb8fc07c2d7 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -26,26 +26,6 @@ namespace cudf { namespace detail { -namespace { -std::pair, std::unique_ptr> get_empty_joined_table( - table_view const& probe, table_view const& build) -{ - std::unique_ptr
empty_probe = empty_like(probe); - std::unique_ptr
empty_build = empty_like(build); - return std::pair(std::move(empty_probe), std::move(empty_build)); -} - -std::unique_ptr combine_table_pair(std::unique_ptr&& left, - std::unique_ptr&& right) -{ - auto joined_cols = left->release(); - auto right_cols = right->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(right_cols.begin()), - std::make_move_iterator(right_cols.end())); - return std::make_unique(std::move(joined_cols)); -} -} // namespace std::pair>, std::unique_ptr>> @@ -79,42 +59,6 @@ inner_join(table_view const& left_input, } } -std::unique_ptr
inner_join(table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // Make sure any dictionary columns have matched key sets. - // This will return any new dictionary columns created as well as updated table_views. - auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input.select(left_on), right_input.select(right_on)}, - stream, - rmm::mr::get_current_device_resource()); // temporary objects returned - - // now rebuild the table views with the updated ones - auto const left = scatter_columns(matched.second.front(), left_on, left_input); - auto const right = scatter_columns(matched.second.back(), right_on, right_input); - - auto const [left_join_indices, right_join_indices] = cudf::detail::inner_join( - left.select(left_on), right.select(right_on), compare_nulls, stream, mr); - std::unique_ptr
left_result = detail::gather(left, - left_join_indices->begin(), - left_join_indices->end(), - out_of_bounds_policy::DONT_CHECK, - stream, - mr); - std::unique_ptr
right_result = detail::gather(right, - right_join_indices->begin(), - right_join_indices->end(), - out_of_bounds_policy::DONT_CHECK, - stream, - mr); - return combine_table_pair(std::move(left_result), std::move(right_result)); -} - std::pair>, std::unique_ptr>> left_join(table_view const& left_input, @@ -137,48 +81,6 @@ left_join(table_view const& left_input, return hj_obj.left_join(left, std::nullopt, stream, mr); } -std::unique_ptr
left_join(table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // Make sure any dictionary columns have matched key sets. - // This will return any new dictionary columns created as well as updated table_views. - auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input.select(left_on), right_input.select(right_on)}, // these should match - stream, - rmm::mr::get_current_device_resource()); // temporary objects returned - // now rebuild the table views with the updated ones - table_view const left = scatter_columns(matched.second.front(), left_on, left_input); - table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - - if ((left_on.empty() or right_on.empty()) or - cudf::detail::is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) { - auto [left_empty_table, right_empty_table] = get_empty_joined_table(left, right); - return cudf::detail::combine_table_pair(std::move(left_empty_table), - std::move(right_empty_table)); - } - - auto const [left_join_indices, right_join_indices] = cudf::detail::left_join( - left.select(left_on), right.select(right_on), compare_nulls, stream, mr); - std::unique_ptr
left_result = detail::gather(left, - left_join_indices->begin(), - left_join_indices->end(), - out_of_bounds_policy::NULLIFY, - stream, - mr); - std::unique_ptr
right_result = detail::gather(right, - right_join_indices->begin(), - right_join_indices->end(), - out_of_bounds_policy::NULLIFY, - stream, - mr); - return combine_table_pair(std::move(left_result), std::move(right_result)); -} - std::pair>, std::unique_ptr>> full_join(table_view const& left_input, @@ -201,47 +103,6 @@ full_join(table_view const& left_input, return hj_obj.full_join(left, std::nullopt, stream, mr); } -std::unique_ptr
full_join(table_view const& left_input, - table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // Make sure any dictionary columns have matched key sets. - // This will return any new dictionary columns created as well as updated table_views. - auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input.select(left_on), right_input.select(right_on)}, // these should match - stream, - rmm::mr::get_current_device_resource()); // temporary objects returned - // now rebuild the table views with the updated ones - table_view const left = scatter_columns(matched.second.front(), left_on, left_input); - table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - - if ((left_on.empty() or right_on.empty()) or - cudf::detail::is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) { - auto [left_empty_table, right_empty_table] = get_empty_joined_table(left, right); - return cudf::detail::combine_table_pair(std::move(left_empty_table), - std::move(right_empty_table)); - } - - auto const [left_join_indices, right_join_indices] = cudf::detail::full_join( - left.select(left_on), right.select(right_on), compare_nulls, stream, mr); - std::unique_ptr
left_result = detail::gather(left, - left_join_indices->begin(), - left_join_indices->end(), - out_of_bounds_policy::NULLIFY, - stream, - mr); - std::unique_ptr
right_result = detail::gather(right, - right_join_indices->begin(), - right_join_indices->end(), - out_of_bounds_policy::NULLIFY, - stream, - mr); - return combine_table_pair(std::move(left_result), std::move(right_result)); -} } // namespace detail std::pair>, @@ -255,18 +116,6 @@ inner_join(table_view const& left, return detail::inner_join(left, right, compare_nulls, cudf::default_stream_value, mr); } -std::unique_ptr
inner_join(table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::inner_join( - left, right, left_on, right_on, compare_nulls, cudf::default_stream_value, mr); -} - std::pair>, std::unique_ptr>> left_join(table_view const& left, @@ -278,18 +127,6 @@ left_join(table_view const& left, return detail::left_join(left, right, compare_nulls, cudf::default_stream_value, mr); } -std::unique_ptr
left_join(table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::left_join( - left, right, left_on, right_on, compare_nulls, cudf::default_stream_value, mr); -} - std::pair>, std::unique_ptr>> full_join(table_view const& left, @@ -301,15 +138,4 @@ full_join(table_view const& left, return detail::full_join(left, right, compare_nulls, cudf::default_stream_value, mr); } -std::unique_ptr
full_join(table_view const& left, - table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::full_join( - left, right, left_on, right_on, compare_nulls, cudf::default_stream_value, mr); -} } // namespace cudf diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 5cb58b92fe9..87bac002f53 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -85,112 +85,8 @@ std::unique_ptr> left_semi_anti_join( return gather_map; } -/** - * @brief Performs a left semi or anti join on the specified columns of two - * tables (left, right) - * - * The semi and anti joins only return data from the left table. A left semi join - * returns rows that exist in the right table, a left anti join returns rows - * that do not exist in the right table. - * - * The basic approach is to create a hash table containing the contents of the right - * table and then select only rows that exist (or don't exist) to be included in - * the return set. - * - * @throws cudf::logic_error if number of columns in either `left` or `right` table is 0 - * @throws cudf::logic_error if number of returned columns is 0 - * @throws cudf::logic_error if number of elements in `right_on` and `left_on` are not equal - * - * @param kind Indicates whether to do LEFT_SEMI_JOIN or LEFT_ANTI_JOIN - * @param left The left table - * @param right The right table - * @param left_on The column indices from `left` to join on. - * The column from `left` indicated by `left_on[i]` - * will be compared against the column from `right` - * indicated by `right_on[i]`. - * @param right_on The column indices from `right` to join on. - * The column from `right` indicated by `right_on[i]` - * will be compared against the column from `left` - * indicated by `left_on[i]`. - * @param compare_nulls Controls whether null join-key values should match or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to used to allocate the returned table - * - * @returns Result of joining `left` and `right` tables on the columns - * specified by `left_on` and `right_on`. - */ -std::unique_ptr left_semi_anti_join( - join_kind const kind, - cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on"); - - if ((left_on.empty() || right_on.empty()) || is_trivial_join(left, right, kind)) { - return empty_like(left); - } - - if ((join_kind::LEFT_ANTI_JOIN == kind) && (0 == right.num_rows())) { - // Everything matches, just copy the proper columns from the left table - return std::make_unique
(left, stream, mr); - } - - // Make sure any dictionary columns have matched key sets. - // This will return any new dictionary columns created as well as updated table_views. - auto matched = cudf::dictionary::detail::match_dictionaries( - {left.select(left_on), right.select(right_on)}, - stream, - rmm::mr::get_current_device_resource()); // temporary objects returned - - auto const left_selected = matched.second.front(); - auto const right_selected = matched.second.back(); - - auto gather_vector = - left_semi_anti_join(kind, left_selected, right_selected, compare_nulls, stream); - - // wrapping the device vector with a column view allows calling the non-iterator - // version of detail::gather, improving compile time by 10% and reducing the - // object file size by 2.2x without affecting performance - auto gather_map = column_view(data_type{type_id::INT32}, - static_cast(gather_vector->size()), - gather_vector->data(), - nullptr, - 0); - - auto const left_updated = scatter_columns(left_selected, left_on, left); - return cudf::detail::gather(left_updated, - gather_map, - out_of_bounds_policy::DONT_CHECK, - negative_index_policy::NOT_ALLOWED, - stream, - mr); -} - } // namespace detail -std::unique_ptr left_semi_join(cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::left_semi_anti_join(detail::join_kind::LEFT_SEMI_JOIN, - left, - right, - left_on, - right_on, - compare_nulls, - cudf::default_stream_value, - mr); -} - std::unique_ptr> left_semi_join( cudf::table_view const& left, cudf::table_view const& right, @@ -202,24 +98,6 @@ std::unique_ptr> left_semi_join( detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::default_stream_value, mr); } -std::unique_ptr left_anti_join(cudf::table_view const& left, - cudf::table_view const& right, - std::vector const& left_on, - std::vector const& right_on, - null_equality compare_nulls, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::left_semi_anti_join(detail::join_kind::LEFT_ANTI_JOIN, - left, - right, - left_on, - right_on, - compare_nulls, - cudf::default_stream_value, - mr); -} - std::unique_ptr> left_anti_join( cudf::table_view const& left, cudf::table_view const& right, diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 9d9d1f4fd10..44e1d586389 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -47,6 +47,82 @@ using Table = cudf::table; constexpr cudf::size_type NoneValue = std::numeric_limits::min(); // TODO: how to test if this isn't public? +// This function is a wrapper around cudf's join APIs that takes the gather map +// from join APIs and materializes the table that would be created by gathering +// from the joined tables. Join APIs originally returned tables like this, but +// they were modified in https://github.com/rapidsai/cudf/pull/7454. This +// helper function allows us to avoid rewriting all our tests in terms of +// gather maps. +template >, + std::unique_ptr>> (*join_impl)( + cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + cudf::null_equality compare_nulls, + rmm::mr::device_memory_resource* mr), + cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK> +std::unique_ptr join_and_gather( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + join_impl(left_selected, right_selected, compare_nulls, mr); + + auto left_indices_span = cudf::device_span{*left_join_indices}; + auto right_indices_span = cudf::device_span{*right_join_indices}; + + auto left_indices_col = cudf::column_view{left_indices_span}; + auto right_indices_col = cudf::column_view{right_indices_span}; + + auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); + + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} + +std::unique_ptr inner_join( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + left_input, right_input, left_on, right_on, compare_nulls); +} + +std::unique_ptr left_join( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + left_input, right_input, left_on, right_on, compare_nulls); +} + +std::unique_ptr full_join( + cudf::table_view const& full_input, + cudf::table_view const& right_input, + std::vector const& full_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + full_input, right_input, full_on, right_on, compare_nulls); +} + struct JoinTest : public cudf::test::BaseFixture { std::pair, std::unique_ptr> gather_maps_as_tables( cudf::column_view const& expected_left_map, @@ -88,7 +164,7 @@ TEST_F(JoinTest, EmptySentinelRepro) cudf::table_view left({left_first_col, left_second_col, left_third_col}); cudf::table_view right({right_first_col, right_second_col, right_third_col}); - auto result = cudf::inner_join(left, right, {0, 1, 2}, {0, 1, 2}); + auto result = inner_join(left, right, {0, 1, 2}, {0, 1, 2}); EXPECT_EQ(result->num_rows(), 1); } @@ -114,7 +190,7 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0}, {0}); + auto result = left_join(t0, t1, {0}, {0}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -160,7 +236,7 @@ TEST_F(JoinTest, FullJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -209,7 +285,7 @@ TEST_F(JoinTest, FullJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -261,7 +337,7 @@ TEST_F(JoinTest, FullJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -310,7 +386,7 @@ TEST_F(JoinTest, FullJoinOnNulls) // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); + result = full_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -366,7 +442,7 @@ TEST_F(JoinTest, LeftJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); + auto result = left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -411,7 +487,7 @@ TEST_F(JoinTest, LeftJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); + auto result = left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -475,7 +551,7 @@ TEST_F(JoinTest, LeftJoinWithStructsAndNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {3}, {3}); + auto result = left_join(t0, t1, {3}, {3}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -551,7 +627,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); + auto result = left_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -601,7 +677,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); + result = left_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -654,7 +730,7 @@ TEST_F(JoinTest, InnerJoinNoNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -699,7 +775,7 @@ TEST_F(JoinTest, InnerJoinWithNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -767,7 +843,7 @@ TEST_F(JoinTest, InnerJoinWithStructsAndNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1, 3}, {0, 1, 3}); + auto result = inner_join(t0, t1, {0, 1, 3}, {0, 1, 3}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -833,7 +909,7 @@ TEST_F(JoinTest, InnerJoinOnNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -862,7 +938,7 @@ TEST_F(JoinTest, InnerJoinOnNulls) // Repeat test with compare_nulls_equal=false, // as per SQL standard. - result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); + result = inner_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -909,7 +985,7 @@ TEST_F(JoinTest, EmptyLeftTableInnerJoin) Table empty0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}); + auto result = inner_join(empty0, t1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result); } @@ -930,7 +1006,7 @@ TEST_F(JoinTest, EmptyLeftTableLeftJoin) Table empty0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}); + auto result = left_join(empty0, t1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result); } @@ -951,7 +1027,7 @@ TEST_F(JoinTest, EmptyLeftTableFullJoin) Table lhs(std::move(cols0)); Table rhs(std::move(cols1)); - auto result = cudf::full_join(lhs, rhs, {0, 1}, {0, 1}); + auto result = full_join(lhs, rhs, {0, 1}, {0, 1}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -992,7 +1068,7 @@ TEST_F(JoinTest, EmptyRightTableInnerJoin) Table empty1(std::move(cols1)); { - auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); + auto result = inner_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } @@ -1031,7 +1107,7 @@ TEST_F(JoinTest, EmptyRightTableLeftJoin) Table empty1(std::move(cols1)); { - auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); + auto result = left_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); } @@ -1070,7 +1146,7 @@ TEST_F(JoinTest, EmptyRightTableFullJoin) Table empty1(std::move(cols1)); { - auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); + auto result = full_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result); } @@ -1109,7 +1185,7 @@ TEST_F(JoinTest, BothEmptyInnerJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}); + auto result = inner_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } @@ -1130,7 +1206,7 @@ TEST_F(JoinTest, BothEmptyLeftJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}); + auto result = left_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } @@ -1151,7 +1227,7 @@ TEST_F(JoinTest, BothEmptyFullJoin) Table t0(std::move(cols0)); Table empty1(std::move(cols1)); - auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}); + auto result = full_join(t0, empty1, {0, 1}, {0, 1}); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result); } @@ -1174,7 +1250,7 @@ TEST_F(JoinTest, EqualValuesInnerJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}); @@ -1209,7 +1285,7 @@ TEST_F(JoinTest, EqualValuesLeftJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); + auto result = left_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1}); @@ -1243,7 +1319,7 @@ TEST_F(JoinTest, EqualValuesFullJoin) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); column_wrapper col_gold_0{{0, 0, 0, 0}}; strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}); @@ -1272,7 +1348,7 @@ TEST_F(JoinTest, InnerJoinCornerCase) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::inner_join(t0, t1, {0}, {0}); + auto result = inner_join(t0, t1, {0}, {0}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -1453,7 +1529,7 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls) auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); - auto result = cudf::left_join(t0, t1, {0}, {0}); + auto result = left_join(t0, t1, {0}, {0}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); auto decoded4 = cudf::dictionary::decode(result_view.column(4)); @@ -1466,7 +1542,7 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls) auto result_sort_order = cudf::sorted_order(cudf::table_view(result_decoded)); auto sorted_result = cudf::gather(cudf::table_view(result_decoded), *result_sort_order); - auto gold = cudf::left_join(g0, g1, {0}, {0}); + auto gold = left_join(g0, g1, {0}, {0}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1488,7 +1564,7 @@ TEST_F(JoinDictionaryTest, LeftJoinWithNulls) auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()}); auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()}); - auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}); + auto result = left_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded2 = cudf::dictionary::decode(result_view.column(2)); auto decoded5 = cudf::dictionary::decode(result_view.column(5)); @@ -1503,7 +1579,7 @@ TEST_F(JoinDictionaryTest, LeftJoinWithNulls) auto g0 = cudf::table_view({col0_0, col0_1, col0_2_w}); auto g1 = cudf::table_view({col1_0, col1_1, col1_2_w}); - auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}); + auto gold = left_join(g0, g1, {0, 1}, {0, 1}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1525,7 +1601,7 @@ TEST_F(JoinDictionaryTest, InnerJoinNoNulls) auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2}); auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2}); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); auto decoded4 = cudf::dictionary::decode(result_view.column(4)); @@ -1540,7 +1616,7 @@ TEST_F(JoinDictionaryTest, InnerJoinNoNulls) auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); - auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}); + auto gold = inner_join(g0, g1, {0, 1}, {0, 1}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1562,7 +1638,7 @@ TEST_F(JoinDictionaryTest, InnerJoinWithNulls) auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()}); auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()}); - auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}); + auto result = inner_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded2 = cudf::dictionary::decode(result_view.column(2)); auto decoded5 = cudf::dictionary::decode(result_view.column(5)); @@ -1577,7 +1653,7 @@ TEST_F(JoinDictionaryTest, InnerJoinWithNulls) auto g0 = cudf::table_view({col0_0, col0_1, col0_2_w}); auto g1 = cudf::table_view({col1_0, col1_1, col1_2_w}); - auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}); + auto gold = inner_join(g0, g1, {0, 1}, {0, 1}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1599,7 +1675,7 @@ TEST_F(JoinDictionaryTest, FullJoinNoNulls) auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2}); auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2}); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded1 = cudf::dictionary::decode(result_view.column(1)); auto decoded4 = cudf::dictionary::decode(result_view.column(4)); @@ -1614,7 +1690,7 @@ TEST_F(JoinDictionaryTest, FullJoinNoNulls) auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2}); auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2}); - auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}); + auto gold = full_join(g0, g1, {0, 1}, {0, 1}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1636,7 +1712,7 @@ TEST_F(JoinDictionaryTest, FullJoinWithNulls) auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2}); auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2}); - auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}); + auto result = full_join(t0, t1, {0, 1}, {0, 1}); auto result_view = result->view(); auto decoded0 = cudf::dictionary::decode(result_view.column(0)); auto decoded3 = cudf::dictionary::decode(result_view.column(3)); @@ -1651,7 +1727,7 @@ TEST_F(JoinDictionaryTest, FullJoinWithNulls) auto g0 = cudf::table_view({col0_0_w, col0_1, col0_2}); auto g1 = cudf::table_view({col1_0_w, col1_1, col1_2}); - auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}); + auto gold = full_join(g0, g1, {0, 1}, {0, 1}); auto gold_sort_order = cudf::sorted_order(gold->view()); auto sorted_gold = cudf::gather(gold->view(), *gold_sort_order); @@ -1707,7 +1783,7 @@ TEST_F(JoinTest, FullJoinWithStructsAndNulls) Table t0(std::move(cols0)); Table t1(std::move(cols1)); - auto result = cudf::full_join(t0, t1, {0, 1, 3}, {0, 1, 3}); + auto result = full_join(t0, t1, {0, 1, 3}, {0, 1, 3}); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -1815,7 +1891,7 @@ TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown) return make_table(dim_structs.release()); }(); - auto const result = cudf::inner_join(fact_table.view(), dimension_table.view(), {0}, {0}); + auto const result = inner_join(fact_table.view(), dimension_table.view(), {0}, {0}); EXPECT_EQ(result->num_rows(), 1); // The null STRUCT rows should match. // Note: Join result might not have nulls pushed down, since it's an output of gather(). diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp index 97af1fd7006..1de70124b60 100644 --- a/cpp/tests/join/semi_anti_join_tests.cpp +++ b/cpp/tests/join/semi_anti_join_tests.cpp @@ -40,6 +40,58 @@ using Table = cudf::table; struct JoinTest : public cudf::test::BaseFixture { }; +namespace { +// This function is a wrapper around cudf's join APIs that takes the gather map +// from join APIs and materializes the table that would be created by gathering +// from the joined tables. Join APIs originally returned tables like this, but +// they were modified in https://github.com/rapidsai/cudf/pull/7454. This +// helper function allows us to avoid rewriting all our tests in terms of +// gather maps. +template > (*join_impl)( + cudf::table_view const& left_keys, + cudf::table_view const& right_keys, + cudf::null_equality compare_nulls, + rmm::mr::device_memory_resource* mr)> +std::unique_ptr join_and_gather( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const join_indices = join_impl(left_selected, right_selected, compare_nulls, mr); + + auto left_indices_span = cudf::device_span{*join_indices}; + auto left_indices_col = cudf::column_view{left_indices_span}; + return cudf::gather(left_input, left_indices_col); +} +} // namespace + +std::unique_ptr left_semi_join( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + left_input, right_input, left_on, right_on, compare_nulls); +} + +std::unique_ptr left_anti_join( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + left_input, right_input, left_on, right_on, compare_nulls); +} + TEST_F(JoinTest, TestSimple) { column_wrapper left_col0{0, 1, 2}; @@ -48,7 +100,7 @@ TEST_F(JoinTest, TestSimple) auto left = cudf::table_view{{left_col0}}; auto right = cudf::table_view{{right_col0}}; - auto result = cudf::left_semi_join(left, right); + auto result = left_semi_join(left, right); auto result_cv = cudf::column_view( cudf::data_type{cudf::type_to_id()}, result->size(), result->data()); column_wrapper expected{0, 1}; @@ -104,8 +156,8 @@ TEST_F(JoinTest, SemiJoinWithStructsAndNulls) { auto tables = get_saj_tables({1, 1, 0, 1, 0}, {1, 0, 0, 1, 1}); - auto result = cudf::left_semi_join( - *tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::EQUAL); + auto result = + left_semi_join(*tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::EQUAL); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -136,7 +188,7 @@ TEST_F(JoinTest, SemiJoinWithStructsAndNullsNotEqual) { auto tables = get_saj_tables({1, 1, 0, 1, 1}, {1, 1, 0, 1, 1}); - auto result = cudf::left_semi_join( + auto result = left_semi_join( *tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::UNEQUAL); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -169,8 +221,8 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNulls) { auto tables = get_saj_tables({1, 1, 0, 1, 0}, {1, 0, 0, 1, 1}); - auto result = cudf::left_anti_join( - *tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::EQUAL); + auto result = + left_anti_join(*tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::EQUAL); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -202,7 +254,7 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNullsNotEqual) { auto tables = get_saj_tables({1, 1, 0, 1, 1}, {1, 1, 0, 1, 1}); - auto result = cudf::left_anti_join( + auto result = left_anti_join( *tables.first, *tables.second, {0, 1, 3}, {0, 1, 3}, cudf::null_equality::UNEQUAL); auto result_sort_order = cudf::sorted_order(result->view()); auto sorted_result = cudf::gather(result->view(), *result_sort_order); @@ -249,11 +301,9 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNullsOnOneSide) auto left = cudf::table_view{{left_col0}}; auto right = cudf::table_view{{right_col0}}; - auto result = cudf::left_anti_join(left, right, {0}, {0}); - auto expected = [] { - column_wrapper child1{{null}, cudf::test::iterators::null_at(0)}; - column_wrapper child2{12}; - return cudf::test::structs_column_wrapper{{child1, child2}}; - }(); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0).view()); + auto result = cudf::left_anti_join(left, right); + auto result_span = cudf::device_span{*result}; + auto result_col = cudf::column_view{result_span}; + auto expected = column_wrapper{1}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result_col); } diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index db90c09a078..c8f842fcc63 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -549,9 +549,6 @@ private static native long[] orderBy(long inputTable, long[] sortKeys, boolean[] private static native long[] merge(long[] tableHandles, int[] sortKeyIndexes, boolean[] isDescending, boolean[] areNullsSmallest) throws CudfException; - private static native long[] leftJoin(long leftTable, int[] leftJoinCols, long rightTable, - int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; - private static native long[] leftJoinGatherMaps(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; @@ -562,9 +559,6 @@ private static native long[] leftJoinGatherMaps(long leftKeys, long rightKeys, private static native long[] leftHashJoinGatherMapsWithCount(long leftTable, long rightHashJoin, long outputRowCount) throws CudfException; - private static native long[] innerJoin(long leftTable, int[] leftJoinCols, long rightTable, - int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; - private static native long[] innerJoinGatherMaps(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; @@ -575,9 +569,6 @@ private static native long[] innerJoinGatherMaps(long leftKeys, long rightKeys, private static native long[] innerHashJoinGatherMapsWithCount(long table, long hashJoin, long outputRowCount) throws CudfException; - private static native long[] fullJoin(long leftTable, int[] leftJoinCols, long rightTable, - int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; - private static native long[] fullJoinGatherMaps(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; @@ -588,15 +579,9 @@ private static native long[] fullJoinGatherMaps(long leftKeys, long rightKeys, private static native long[] fullHashJoinGatherMapsWithCount(long leftTable, long rightHashJoin, long outputRowCount) throws CudfException; - private static native long[] leftSemiJoin(long leftTable, int[] leftJoinCols, long rightTable, - int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; - private static native long[] leftSemiJoinGatherMap(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; - private static native long[] leftAntiJoin(long leftTable, int[] leftJoinCols, long rightTable, - int[] rightJoinCols, boolean compareNullsEqual) throws CudfException; - private static native long[] leftAntiJoinGatherMap(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; @@ -4119,161 +4104,6 @@ public static final class TableOperation { operation = new Operation(table, indices); } - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @param compareNullsEqual - Whether null join-key values should match or not. - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table leftJoin(TableOperation rightJoinIndices, boolean compareNullsEqual) { - return new Table(Table.leftJoin(operation.table.nativeHandle, operation.indices, - rightJoinIndices.operation.table.nativeHandle, rightJoinIndices.operation.indices, - compareNullsEqual)); - } - - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table leftJoin(TableOperation rightJoinIndices) { - return leftJoin(rightJoinIndices, true); - } - - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).innerJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @param compareNullsEqual - Whether null join-key values should match or not. - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table innerJoin(TableOperation rightJoinIndices, boolean compareNullsEqual) { - return new Table(Table.innerJoin(operation.table.nativeHandle, operation.indices, - rightJoinIndices.operation.table.nativeHandle, rightJoinIndices.operation.indices, - compareNullsEqual)); - } - - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).innerJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table innerJoin(TableOperation rightJoinIndices) { - return innerJoin(rightJoinIndices, true); - } - - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).fullJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @param compareNullsEqual - Whether null join-key values should match or not. - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table fullJoin(TableOperation rightJoinIndices, boolean compareNullsEqual) { - return new Table(Table.fullJoin(operation.table.nativeHandle, operation.indices, - rightJoinIndices.operation.table.nativeHandle, rightJoinIndices.operation.indices, - compareNullsEqual)); - } - - /** - * Joins two tables on the join columns that are passed in. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).fullJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @return the joined table. The order of the columns returned will be join columns, - * left non-join columns, right non-join columns. - */ - public Table fullJoin(TableOperation rightJoinIndices) { - return fullJoin(rightJoinIndices, true); - } - - /** - * Performs a semi-join between a left table and a right table, returning only the rows from - * the left table that match rows in the right table on the join keys. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftSemiJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @param compareNullsEqual - Whether null join-key values should match or not. - * @return the left semi-joined table. - */ - public Table leftSemiJoin(TableOperation rightJoinIndices, boolean compareNullsEqual) { - return new Table(Table.leftSemiJoin(operation.table.nativeHandle, operation.indices, - rightJoinIndices.operation.table.nativeHandle, rightJoinIndices.operation.indices, - compareNullsEqual)); - } - - /** - * Performs a semi-join between a left table and a right table, returning only the rows from - * the left table that match rows in the right table on the join keys. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftSemiJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @return the left semi-joined table. - */ - public Table leftSemiJoin(TableOperation rightJoinIndices) { - return leftSemiJoin(rightJoinIndices, true); - } - - /** - * Performs an anti-join between a left table and a right table, returning only the rows from - * the left table that do not match rows in the right table on the join keys. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftAntiJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @param compareNullsEqual - Whether null join-key values should match or not. - * @return the left anti-joined table. - */ - public Table leftAntiJoin(TableOperation rightJoinIndices, boolean compareNullsEqual) { - return new Table(Table.leftAntiJoin(operation.table.nativeHandle, operation.indices, - rightJoinIndices.operation.table.nativeHandle, rightJoinIndices.operation.indices, - compareNullsEqual)); - } - - /** - * Performs an anti-join between a left table and a right table, returning only the rows from - * the left table that do not match rows in the right table on the join keys. - * Usage: - * Table t1 ... - * Table t2 ... - * Table result = t1.onColumns(0,1).leftAntiJoin(t2.onColumns(2,3)); - * @param rightJoinIndices - Indices of the right table to join on - * @return the left anti-joined table. - */ - public Table leftAntiJoin(TableOperation rightJoinIndices) { - return leftAntiJoin(rightJoinIndices, true); - } - /** * Hash partition a table into the specified number of partitions. Uses the default MURMUR3 * hashing. diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 4bdd54640d6..471ddef81c2 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -985,51 +985,6 @@ get_mixed_size_info(JNIEnv *env, jlong j_output_row_count, jlong j_matches_view) matches->template data(), matches->size())); } -// Returns a table view containing only the columns at the specified indices -cudf::table_view const get_keys_table(cudf::table_view const *t, - native_jintArray const &key_indices) { - std::vector key_cols; - key_cols.reserve(key_indices.size()); - std::transform(key_indices.begin(), key_indices.end(), std::back_inserter(key_cols), - [t](int idx) { return t->column(idx); }); - return table_view(key_cols); -} - -// Returns a table view containing only the columns that are NOT at the specified indices -cudf::table_view const get_non_keys_table(cudf::table_view const *t, - native_jintArray const &key_indices) { - std::vector non_key_indices; - for (int i = 0; i < t->num_columns(); ++i) { - if (std::find(key_indices.begin(), key_indices.end(), i) == key_indices.end()) { - non_key_indices.push_back(i); - } - } - std::vector cols; - std::transform(non_key_indices.begin(), non_key_indices.end(), std::back_inserter(cols), - [&t](int idx) { return t->column(idx); }); - return table_view(cols); -} - -// Combine left and right join results into a column pointer array that can be returned to the JVM. -jlongArray combine_join_results(JNIEnv *env, std::vector> left_cols, - std::vector> right_cols) { - cudf::jni::native_jlongArray outcol_handles(env, left_cols.size() + right_cols.size()); - auto iter = - std::transform(left_cols.begin(), left_cols.end(), outcol_handles.begin(), - [](std::unique_ptr &col) { return release_as_jlong(col); }); - std::transform(right_cols.begin(), right_cols.end(), iter, - [](std::unique_ptr &col) { return release_as_jlong(col); }); - return outcol_handles.get_jArray(); -} - -// Combine left and right join results into a column pointer array that can be returned to the JVM. -jlongArray combine_join_results(JNIEnv *env, cudf::table &left_results, - cudf::table &right_results) { - std::vector> left_cols = left_results.release(); - std::vector> right_cols = right_results.release(); - return combine_join_results(env, std::move(left_cols), std::move(right_cols)); -} - cudf::column_view remove_validity_from_col(cudf::column_view column_view) { if (!cudf::is_compound(column_view.type())) { if (column_view.nullable() && column_view.null_count() == 0) { @@ -2025,206 +1980,6 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_readArrowIPCEnd(JNIEnv *env, jc CATCH_STD(env, ) } -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftJoin( - JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, - jintArray j_right_key_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); - - try { - cudf::jni::auto_set_device(env); - auto left_in_table = reinterpret_cast(j_left_table); - auto right_in_table = reinterpret_cast(j_right_table); - cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); - auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); - left_key_indices.cancel(); - cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); - auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); - auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; - - // compute gather maps for the left and right tables that can produce the join result rows - auto join_maps = cudf::left_join(left_keys_table, right_keys_table, nulleq); - CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), - "join result exceeds maximum column length"); - auto num_join_rows = static_cast(join_maps.first->size()); - - // compute the join result rows for the left table columns - auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.first->data()); - auto left_out_table = - cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::DONT_CHECK); - - // compute the join result rows for the right table columns - auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); - right_key_indices.cancel(); - auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.second->data()); - auto right_out_table = - cudf::gather(right_non_keys_table, right_gather_col, cudf::out_of_bounds_policy::NULLIFY); - - return cudf::jni::combine_join_results(env, *left_out_table, *right_out_table); - } - CATCH_STD(env, NULL); -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerJoin( - JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, - jintArray j_right_key_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); - - try { - cudf::jni::auto_set_device(env); - auto left_in_table = reinterpret_cast(j_left_table); - auto right_in_table = reinterpret_cast(j_right_table); - cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); - auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); - left_key_indices.cancel(); - cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); - auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); - auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; - - // compute gather maps for the left and right tables that can produce the join result rows - auto join_maps = cudf::inner_join(left_keys_table, right_keys_table, nulleq); - CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), - "join result exceeds maximum column length"); - auto num_join_rows = static_cast(join_maps.first->size()); - - // compute the join result rows for the left table columns - auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.first->data()); - auto left_out_table = - cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::DONT_CHECK); - - // compute the join result rows for the right table columns - auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); - right_key_indices.cancel(); - auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.second->data()); - auto right_out_table = cudf::gather(right_non_keys_table, right_gather_col, - cudf::out_of_bounds_policy::DONT_CHECK); - - return cudf::jni::combine_join_results(env, *left_out_table, *right_out_table); - } - CATCH_STD(env, NULL); -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_fullJoin( - JNIEnv *env, jclass, jlong j_left_table, jintArray j_left_key_indices, jlong j_right_table, - jintArray j_right_key_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, j_left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, j_left_key_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, j_right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, j_right_key_indices, "right_col_join_indices is null", NULL); - - try { - cudf::jni::auto_set_device(env); - auto left_in_table = reinterpret_cast(j_left_table); - auto right_in_table = reinterpret_cast(j_right_table); - cudf::jni::native_jintArray left_key_indices(env, j_left_key_indices); - auto left_keys_table = cudf::jni::get_keys_table(left_in_table, left_key_indices); - cudf::jni::native_jintArray right_key_indices(env, j_right_key_indices); - auto right_keys_table = cudf::jni::get_keys_table(right_in_table, right_key_indices); - auto nulleq = compare_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; - - // compute gather maps for the left and right tables that can produce the join result rows - auto join_maps = cudf::full_join(left_keys_table, right_keys_table, nulleq); - CUDF_EXPECTS(join_maps.first->size() <= std::numeric_limits::max(), - "join result exceeds maximum column length"); - auto num_join_rows = static_cast(join_maps.first->size()); - - // compute the join result rows for the left table columns - auto left_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.first->data()); - auto left_out_table = - cudf::gather(*left_in_table, left_gather_col, cudf::out_of_bounds_policy::NULLIFY); - // Replace any nulls in the left key column results with the right key column results. - std::vector> result_cols = left_out_table->release(); - auto right_gather_col = cudf::column_view(cudf::data_type{cudf::type_id::INT32}, num_join_rows, - join_maps.second->data()); - for (int i = 0; i < left_key_indices.size(); ++i) { - std::unique_ptr &colptr = result_cols[left_key_indices[i]]; - auto right_key_col = right_in_table->column(right_key_indices[i]); - auto gathered = cudf::gather(cudf::table_view{{right_key_col}}, right_gather_col, - cudf::out_of_bounds_policy::NULLIFY); - auto replaced_col = cudf::replace_nulls(*colptr, gathered->get_column(0)); - colptr.reset(replaced_col.release()); - } - left_key_indices.cancel(); - - // compute the join result rows for the right table columns - auto right_non_keys_table = cudf::jni::get_non_keys_table(right_in_table, right_key_indices); - right_key_indices.cancel(); - auto right_out_table = - cudf::gather(right_non_keys_table, right_gather_col, cudf::out_of_bounds_policy::NULLIFY); - - return cudf::jni::combine_join_results(env, std::move(result_cols), right_out_table->release()); - } - CATCH_STD(env, NULL); -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftSemiJoin( - JNIEnv *env, jclass, jlong left_table, jintArray left_col_join_indices, jlong right_table, - jintArray right_col_join_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, left_col_join_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, right_col_join_indices, "right_col_join_indices is null", NULL); - - try { - cudf::jni::auto_set_device(env); - cudf::table_view *n_left_table = reinterpret_cast(left_table); - cudf::table_view *n_right_table = reinterpret_cast(right_table); - cudf::jni::native_jintArray left_join_cols_arr(env, left_col_join_indices); - std::vector left_join_cols( - left_join_cols_arr.data(), left_join_cols_arr.data() + left_join_cols_arr.size()); - cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); - std::vector right_join_cols( - right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - - std::unique_ptr result = - cudf::left_semi_join(*n_left_table, *n_right_table, left_join_cols, right_join_cols, - static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : - cudf::null_equality::UNEQUAL); - - return convert_table_for_return(env, result); - } - CATCH_STD(env, NULL); -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftAntiJoin( - JNIEnv *env, jclass, jlong left_table, jintArray left_col_join_indices, jlong right_table, - jintArray right_col_join_indices, jboolean compare_nulls_equal) { - JNI_NULL_CHECK(env, left_table, "left_table is null", NULL); - JNI_NULL_CHECK(env, left_col_join_indices, "left_col_join_indices is null", NULL); - JNI_NULL_CHECK(env, right_table, "right_table is null", NULL); - JNI_NULL_CHECK(env, right_col_join_indices, "right_col_join_indices is null", NULL); - - try { - cudf::jni::auto_set_device(env); - cudf::table_view *n_left_table = reinterpret_cast(left_table); - cudf::table_view *n_right_table = reinterpret_cast(right_table); - cudf::jni::native_jintArray left_join_cols_arr(env, left_col_join_indices); - std::vector left_join_cols( - left_join_cols_arr.data(), left_join_cols_arr.data() + left_join_cols_arr.size()); - cudf::jni::native_jintArray right_join_cols_arr(env, right_col_join_indices); - std::vector right_join_cols( - right_join_cols_arr.data(), right_join_cols_arr.data() + right_join_cols_arr.size()); - - std::unique_ptr result = - cudf::left_anti_join(*n_left_table, *n_right_table, left_join_cols, right_join_cols, - static_cast(compare_nulls_equal) ? cudf::null_equality::EQUAL : - cudf::null_equality::UNEQUAL); - - return convert_table_for_return(env, result); - } - CATCH_STD(env, NULL); -} - JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftJoinGatherMaps( JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { return cudf::jni::join_gather_maps( diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index fbaead1e429..7ef47d6a7cc 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -793,556 +793,6 @@ void testReadORCTimeUnit() { } } - @Test - void testLeftJoinWithNulls() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build(); - Table expected = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) // common - .column( 100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left - .column(null, null, 203, null, null, null, null, 201, 202, 204) // right - .build(); - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftJoinOnNullKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, null, null, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - - Table rightTable = new Table.TestBuilder() - .column(null, null, 9, 8, 10, 32) - .column( 201, 202, 203, 204, 205, 206) - .build()) { - - try (Table expectedResults = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, null, null, null, null, 8) // common - .column( 100, 101, 102, 103, 104, 105, 106, 107, 107, 108, 108, 109) // left - .column(null, null, 203, null, null, null, null, 201, 202, 201, 202, 204) // right - .build(); - - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expectedResults, orderedJoinedTable); - } - - try (Table expectedResults = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, null, null, 8) // common - .column( 100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left - .column(null, null, 203, null, null, null, null, null, null, 204) // right - .build(); - - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expectedResults, orderedJoinedTable); - } - } - } - - @Test - void testLeftJoin() { - try (Table leftTable = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) - .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) - .build(); - Table rightTable = new Table.TestBuilder() - .column(306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column( 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) - .build(); - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); - Table expected = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common - .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) // left - .column( 22, 29, 25, 20, 23, 27, 28, 24, 21, 26) // right - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftJoinLeftEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder() - .column(emptyInts) - .column(emptyInts) - .build(); - Table rightTable = new Table.TestBuilder() - .column(306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column( 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) - .build(); - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table expected = new Table.TestBuilder() - .column(emptyInts) // common - .column(emptyInts) // left - .column(emptyInts) // right - .build()) { - assertTablesAreEqual(expected, joinedTable); - } - } - - @Test - void testLeftJoinRightEmpty() { - final Integer[] emptyInts = new Integer[0]; - final Integer[] nullInts = new Integer[10]; - Arrays.fill(nullInts, null); - try (Table leftTable = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) - .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) - .build(); - Table rightTable = new Table.TestBuilder() - .column(emptyInts) - .column(emptyInts) - .build(); - Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); - Table expected = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common - .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) // left - .column(nullInts) // right - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testFullJoinWithNonCommonKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(200, 201, 202, 203, 204, 205) - .build(); - Table expected = new Table.TestBuilder() - .column( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 32) // common - .column( 103, 104, 100, 101, 106, 108, 107, 105, 109, 102, null, null) // left - .column(null, null, null, null, null, 201, 200, null, 203, 202, 204, 205) // right - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testFullJoinLeftEmpty() { - final Integer[] emptyInts = new Integer[0]; - final Integer[] nullInts = new Integer[6]; - try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(200, 201, 202, 203, 204, 205) - .build(); - Table expected = new Table.TestBuilder() - .column( 5, 6, 8, 9, 10, 32) // common - .column(nullInts) // left - .column( 201, 200, 203, 202, 204, 205) // right - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testFullJoinRightEmpty() { - final Integer[] emptyInts = new Integer[0]; - final Integer[] nullInts = new Integer[10]; - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table expected = new Table.TestBuilder() - .column( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) // common - .column( 103, 104, 100, 101, 106, 108, 107, 105, 109, 102) // left - .column(nullInts) // right - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testFullJoinOnNullKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, null, 0, 1, 7, 4, null, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column(null, 5, null, 8, 10, 32) - .column( 200, 201, 202, 203, 204, 205) - .build()) { - - // First, test that null-key rows match, with compareNullsEqual=true. - try (Table expectedResults = new Table.TestBuilder() - .column(null, null, null, null, 0, 1, 2, 3, 4, 5, 7, 8, 10, 32) // common - .column( 102, 102, 107, 107, 103, 104, 100, 101, 106, 108, 105, 109, null, null) // left - .column( 200, 202, 200, 202, null, null, null, null, null, 201, null, 203, 204, 205) // right - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true), OrderByArg.asc(1, true))) { - assertTablesAreEqual(expectedResults, orderedJoinedTable); - } - - // Next, test that null-key rows do not match, with compareNullsEqual=false. - try (Table expectedResults = new Table.TestBuilder() - .column(null, null, null, null, 0, 1, 2, 3, 4, 5, 7, 8, 10, 32) // common - .column(null, null, 102, 107, 103, 104, 100, 101, 106, 108, 105, 109, null, null) // left - .column( 200, 202, null, null, null, null, null, null, null, 201, null, 203, 204, 205) // right - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy( - OrderByArg.asc(0, true), OrderByArg.asc(1, true), OrderByArg.asc(2, true))) { - assertTablesAreEqual(expectedResults, orderedJoinedTable); - } - } - } - - @Test - void testFullJoinWithOnlyCommonKeys() { - try (Table leftTable = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column(306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) - .build(); - Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); - Table expected = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left - .column(202, 209, 205, 200, 203, 207, 208, 204, 201, 206) // right - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testInnerJoinWithNonCommonKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(200, 201, 202, 203, 204, 205) - .build(); - Table expected = new Table.TestBuilder() - .column( 9, 6, 5, 8) // common - .column(102, 107, 108, 109) // left - .column(202, 200, 201, 203) // right - .build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testInnerJoinLeftEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table expected = new Table.TestBuilder() - .column(emptyInts).column(emptyInts).column(emptyInts).build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(expected, joinedTable); - } - } - - @Test - void testInnerJoinRightEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table expected = new Table.TestBuilder() - .column(emptyInts).column(emptyInts).column(emptyInts).build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(expected, joinedTable); - } - } - - @Test - void testInnerJoinOnNullKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, null, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, null, 9, 8, 10, 32) - .column(200, 201, 202, 203, 204, 205) - .build()) { - - // First, test that null-key rows match, with compareNullsEqual=true. - try (Table expected = new Table.TestBuilder() - .column( 9, 6, null, 8) // common - .column(102, 107, 108, 109) // left - .column(202, 200, 201, 203) // right - .build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - - // Next, test that null-key rows do not match, with compareNullsEqual=false. - try (Table expected = new Table.TestBuilder() - .column( 9, 6, 8) // common - .column(102, 107, 109) // left - .column(202, 200, 203) // right - .build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))){ - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - } - - @Test - void testInnerJoinWithOnlyCommonKeys() { - try (Table leftTable = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column(306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) - .build(); - Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); - Table expected = new Table.TestBuilder() - .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left - .column(202, 209, 205, 200, 203, 207, 208, 204, 201, 206) // right - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftSemiJoin() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build(); - Table expected = new Table.TestBuilder() - .column( 9, 6, 5, 8) - .column(102, 107, 108, 109) - .build(); - Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftSemiJoinLeftEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build(); - Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(leftTable, joinedTable); - } - } - - @Test - void testLeftSemiJoinRightEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(rightTable, joinedTable); - } - } - - @Test - void testLeftSemiJoinWithNulls() { - try (Table leftTable = new Table.TestBuilder() - .column( 360, 326, null, 306, null, 254, 251, 361, 301, 317) - .column( 10, 11, null, 13, 14, null, 16, 17, 18, 19) - .column("20", "29", "22", "23", "24", "25", "26", "27", "28", "29") - .build(); - Table rightTable = new Table.TestBuilder() - .column( 306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") - .build(); - Table joinedTable = leftTable.onColumns(0, 2).leftSemiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true)); - Table expected = new Table.TestBuilder() - .column(254, 326, 361) - .column(null, 11, 17) - .column("25", "29", "27") - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftSemiJoinOnNullKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, null, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, null, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build()) { - - // First, test that null-key rows match, with compareNullsEqual=true. - try (Table expected = new Table.TestBuilder() - .column( 9, 6, null, 8) - .column(102, 107, 108, 109) - .build(); - Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - - // Next, test that null-key rows do not match, with compareNullsEqual=false. - try (Table expected = new Table.TestBuilder() - .column( 9, 6, 8) - .column(102, 107, 109) - .build(); - Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - } - - @Test - void testLeftAntiJoin() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build(); - Table expected = new Table.TestBuilder() - .column( 2, 3, 0, 1, 7, 4) - .column(100, 101, 103, 104, 105, 106) - .build(); - Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - - @Test - void testLeftAntiJoinLeftEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table rightTable = new Table.TestBuilder() - .column( 6, 5, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build(); - Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(leftTable, joinedTable); - } - } - - @Test - void testLeftAntiJoinRightEmpty() { - final Integer[] emptyInts = new Integer[0]; - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, 5, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder().column(emptyInts).column(emptyInts).build(); - Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true)) { - assertTablesAreEqual(leftTable, joinedTable); - } - } - - @Test - void testLeftAntiJoinOnNullKeys() { - try (Table leftTable = new Table.TestBuilder() - .column( 2, 3, 9, 0, 1, 7, 4, 6, null, 8) - .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) - .build(); - Table rightTable = new Table.TestBuilder() - .column( 6, null, 9, 8, 10, 32) - .column(201, 202, 203, 204, 205, 206) - .build()) { - - // First, test that null-key rows match, with compareNullsEqual=true. - try (Table expected = new Table.TestBuilder() - .column( 2, 3, 0, 1, 7, 4) - .column(100, 101, 103, 104, 105, 106) - .build(); - Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - - // Next, test that null-key rows do not match, with compareNullsEqual=false. - try (Table expected = new Table.TestBuilder() - .column( 2, 3, 0, 1, 7, 4, null) - .column(100, 101, 103, 104, 105, 106, 108) - .build(); - Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - } - - @Test - void testLeftAntiJoinWithNulls() { - try (Table leftTable = new Table.TestBuilder() - .column( 360, 326, null, 306, null, 254, 251, 361, 301, 317) - .column( 10, 11, null, 13, 14, null, 16, 17, 18, 19) - .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") - .build(); - Table rightTable = new Table.TestBuilder() - .column( 306, 301, 360, 109, 335, 254, 317, 361, 251, 326) - .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") - .build(); - Table joinedTable = leftTable.onColumns(0, 2).leftAntiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(2, true)); - Table expected = new Table.TestBuilder() - .column( 360, 326, null, 306, null, 251, 301, 317) - .column( 10, 11, null, 13, 14, 16, 18, 19) - .column("20", "21", "22", "23", "24", "26", "28", "29") - .build()) { - assertTablesAreEqual(expected, orderedJoinedTable); - } - } - @Test void testCrossJoin() { try (Table leftTable = new Table.TestBuilder()