From 4aebb24bc2400b374f0313018f30314a3a3ef537 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 8 Feb 2022 11:16:19 -0500 Subject: [PATCH 01/11] Create hash table by ignoring nulls --- cpp/src/join/hash_join.cu | 4 ++-- cpp/src/join/hash_join.cuh | 38 ++++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 7590c93f0c3..27fa3bbcb62 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -235,7 +235,7 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, null_equality compare_nulls, rmm::cuda_stream_view stream) : _is_empty{build.num_rows() == 0}, - _hash_table{compute_hash_table_size(build.num_rows()), + _hash_table{0, std::numeric_limits::max(), cudf::detail::JoinNoneValue, stream.value(), @@ -253,7 +253,7 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, if (_is_empty) { return; } - build_join_hash_table(_build, _hash_table, compare_nulls, stream); + _hash_table = std::move(build_join_hash_table(_build, compare_nulls, stream)); } std::pair>, diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index 21bfd8120f7..7302678232d 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -148,42 +148,52 @@ std::unique_ptr combine_table_pair(std::unique_ptr&& l /** * @brief Builds the hash table based on the given `build_table`. * - * @tparam MultimapType The type of the hash table - * * @param build Table of columns used to build join hash. - * @param hash_table Build hash table. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. * */ -template -void build_join_hash_table(cudf::table_view const& build, - MultimapType& hash_table, - null_equality compare_nulls, - rmm::cuda_stream_view stream) +cudf::detail::multimap_type build_join_hash_table(cudf::table_view const& build, + null_equality compare_nulls, + rmm::cuda_stream_view stream) { - auto build_table_ptr = cudf::table_device_view::create(build, stream); + auto build_table_ptr = cudf::table_device_view::create(build, stream); + auto const build_table_num_rows = build.num_rows(); CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows"); + CUDF_EXPECTS(0 != build_table_num_rows, "Build side table has no rows"); + + cudf::detail::multimap_type hash_table{ + compute_hash_table_size(build_table_num_rows), + std::numeric_limits::max(), + cudf::detail::JoinNoneValue, + stream.value(), + detail::hash_table_allocator_type{default_allocator{}, stream}}; row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; - auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_build, empty_key_sentinel}; + make_pair_function pair_func{hash_build, hash_table.get_empty_key_sentinel()}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - size_type const build_table_num_rows{build_table_ptr->num_rows()}; if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) { hash_table.insert(iter, iter + build_table_num_rows, stream.value()); } else { thrust::counting_iterator stencil(0); - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + auto const [row_bitmask, count] = cudf::detail::bitmask_and(build, stream); row_is_valid pred{static_cast(row_bitmask.data())}; + // resize the hash table + hash_table = std::move(cudf::detail::multimap_type{ + compute_hash_table_size(build_table_num_rows - count), + std::numeric_limits::max(), + cudf::detail::JoinNoneValue, + stream.value(), + detail::hash_table_allocator_type{default_allocator{}, stream}}); + // insert valid rows hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); } + return hash_table; } } // namespace detail From 2e0825a670d479c5e6c8f93a7746dbbafdfa538f Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 11 Feb 2022 16:45:20 -0500 Subject: [PATCH 02/11] Use valid row logic to reduce hash table size --- cpp/benchmarks/join/join_common.hpp | 6 +-- cpp/src/join/hash_join.cu | 72 +++++++++++++++++++++++++---- cpp/src/join/hash_join.cuh | 9 ++-- cpp/src/join/join.cu | 9 ++-- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index e88253395d8..7ae94de47c6 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -117,14 +117,14 @@ static void BM_join(state_type& state, Join JoinFunc) cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = JoinFunc( - probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); + build_table, probe_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } } if constexpr (std::is_same_v and (not is_conditional)) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = JoinFunc(probe_table, - build_table, + auto result = JoinFunc(build_table, + probe_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL, diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index b89bcabf23e..7be847d80a5 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -41,6 +41,58 @@ std::pair, std::unique_ptr> get_empty_joined_table return std::make_pair(std::move(empty_probe), std::move(empty_build)); } +std::pair get_valid_row_info(cudf::table_view const& build, + null_equality const nulls_equal, + rmm::cuda_stream_view stream) +{ + auto valid_num_rows = build.num_rows(); + auto [row_bitmask, null_count] = cudf::detail::bitmask_and(build, stream); + if (nulls_equal == cudf::null_equality::UNEQUAL and nullable(build)) { + valid_num_rows -= null_count; + } + return std::make_pair(std::move(row_bitmask), valid_num_rows); +} + +/** + * @brief Fills the hash table based on the given `build_table`. + * + * @tparam MultimapType The type of the hash table + * + * @param build Table of columns used to build join hash. + * @param hash_table Build hash table. + * @param nulls_equal Flag to denote nulls are equal or not. + * @param stream CUDA stream used for device memory operations and kernel launches. + * + */ +template +void fill_join_hash_table(cudf::table_view const& build, + MultimapType& hash_table, + null_equality const nulls_equal, + rmm::device_buffer const& row_bitmask, + rmm::cuda_stream_view stream) +{ + auto build_table_ptr = cudf::table_device_view::create(build, stream); + auto const build_table_num_rows = build.num_rows(); + + CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); + CUDF_EXPECTS(0 != build_table_num_rows, "Build side table has no rows"); + + row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; + make_pair_function pair_func{hash_build, hash_table.get_empty_key_sentinel()}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + + if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { + hash_table.insert(iter, iter + build_table_num_rows, stream.value()); + } else { + thrust::counting_iterator stencil(0); + row_is_valid pred{static_cast(row_bitmask.data())}; + + // insert valid rows + hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); + } +} + /** * @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table`, * and returns the output indices of `build_table` and `probe_table` as a combined table. @@ -234,9 +286,9 @@ hash_join::hash_join_impl::~hash_join_impl() = default; hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, null_equality compare_nulls, rmm::cuda_stream_view stream) - : _is_empty{build.num_rows() == 0}, + : _valid_row_info{cudf::detail::get_valid_row_info(build, compare_nulls, stream)}, _nulls_equal{compare_nulls}, - _hash_table{compute_hash_table_size(build.num_rows()), + _hash_table{compute_hash_table_size(_valid_row_info.second), std::numeric_limits::max(), cudf::detail::JoinNoneValue, stream.value(), @@ -252,9 +304,11 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, build, {}, {}, structs::detail::column_nullability::FORCE); _build = _flattened_build_table; - if (_is_empty) { return; } + // skip filling if empty + if (_valid_row_info.second == 0) { return; } - cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream); + cudf::detail::fill_join_hash_table( + _build, _hash_table, _nulls_equal, _valid_row_info.first, stream); } std::pair>, @@ -296,7 +350,7 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p CUDF_FUNC_RANGE(); // Return directly if build table is empty - if (_is_empty) { return 0; } + if (_valid_row_info.second == 0) { return 0; } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -320,7 +374,7 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_is_empty) { return probe.num_rows(); } + if (_valid_row_info.second == 0) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -345,7 +399,7 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_is_empty) { return probe.num_rows(); } + if (_valid_row_info.second == 0) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -407,11 +461,11 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe_tabl rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early - if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) { + if (_valid_row_info.second == 0 and JoinKind != cudf::detail::join_kind::INNER_JOIN) { return get_trivial_left_join_indices(probe_table, stream, mr); } - CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); + CUDF_EXPECTS(_valid_row_info.second > 0, "Hash table of hash join is null."); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index 26c7dcdb8ba..1768afd13aa 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -162,14 +162,15 @@ void build_join_hash_table(cudf::table_view const& build, null_equality const nulls_equal, rmm::cuda_stream_view stream) { - auto build_table_ptr = cudf::table_device_view::create(build, stream); - auto const build_table_num_rows = build.num_rows(); + auto build_table_ptr = cudf::table_device_view::create(build, stream); + auto const build_table_num_rows{build_table_ptr->num_rows()}; CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); CUDF_EXPECTS(0 != build_table_num_rows, "Build side table has no rows"); row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; - make_pair_function pair_func{hash_build, hash_table.get_empty_key_sentinel()}; + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); + make_pair_function pair_func{hash_build, empty_key_sentinel}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); @@ -196,7 +197,7 @@ struct hash_join::hash_join_impl { hash_join_impl& operator=(hash_join_impl&&) = delete; private: - bool const _is_empty; + std::pair const _valid_row_info; cudf::null_equality const _nulls_equal; cudf::table_view _build; std::vector> _created_null_columns; diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index ef9e7867a2d..df0219e4047 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -78,7 +78,8 @@ std::unique_ptr
inner_join(table_view const& left_input, auto const left = scatter_columns(matched.second.front(), left_on, left_input); auto const right = scatter_columns(matched.second.back(), right_on, right_input); - auto join_indices = inner_join(left.select(left_on), right.select(right_on), compare_nulls, mr); + auto join_indices = + detail::inner_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr); std::unique_ptr
left_result = detail::gather(left, join_indices.first->begin(), join_indices.first->end(), @@ -134,7 +135,8 @@ std::unique_ptr
left_join(table_view const& left_input, table_view const left = scatter_columns(matched.second.front(), left_on, left_input); table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - auto join_indices = left_join(left.select(left_on), right.select(right_on), compare_nulls); + auto join_indices = + detail::left_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr); if ((left_on.empty() || right_on.empty()) || is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) { @@ -197,7 +199,8 @@ std::unique_ptr
full_join(table_view const& left_input, table_view const left = scatter_columns(matched.second.front(), left_on, left_input); table_view const right = scatter_columns(matched.second.back(), right_on, right_input); - auto join_indices = full_join(left.select(left_on), right.select(right_on), compare_nulls); + auto join_indices = + detail::full_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr); if ((left_on.empty() || right_on.empty()) || is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) { From 78e855f2c16cac2586f7461d0a97518a804acd78 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 11 Feb 2022 17:40:38 -0500 Subject: [PATCH 03/11] Update copyright year --- cpp/benchmarks/join/join_common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 7ae94de47c6..3cc3b264a9d 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From a38cb5c134bd2f84d21c04628bf78dd36eb98787 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 22 Feb 2022 15:08:08 -0500 Subject: [PATCH 04/11] Minor cleanup --- cpp/benchmarks/join/join_common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index c6424dbb73a..6cd2e9e5c48 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -129,7 +129,7 @@ static void BM_join(state_type& state, Join JoinFunc) cuda_event_timer raii(state, true, rmm::cuda_stream_default); auto result = JoinFunc( - build_table, probe_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); + probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } } if constexpr (std::is_same_v and (not is_conditional)) { From 8082a4abdcd21793c80f57256c253f69f9e46ceb Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 22 Feb 2022 15:28:09 -0500 Subject: [PATCH 05/11] Update docs --- cpp/src/join/hash_join.cu | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 7be847d80a5..1228f07fc8c 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -41,6 +41,15 @@ std::pair, std::unique_ptr
> get_empty_joined_table return std::make_pair(std::move(empty_probe), std::move(empty_build)); } +/** + * @brief Performs bitwise AND of the bitmasks of columns of `build`. Returns + * a pair of row bitmasks and count of valid rows. + * + * @param build Table of columns used to build join hash. + * @param nulls_equal Flag to denote nulls are equal or not. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return A pair of row bitmask and count of valid rows. + */ std::pair get_valid_row_info(cudf::table_view const& build, null_equality const nulls_equal, rmm::cuda_stream_view stream) @@ -54,13 +63,14 @@ std::pair get_valid_row_info(cudf::table_vi } /** - * @brief Fills the hash table based on the given `build_table`. + * @brief Fills the hash table based on the given table `build`. * - * @tparam MultimapType The type of the hash table + * @tparam MultimapType Type of the hash table * * @param build Table of columns used to build join hash. * @param hash_table Build hash table. * @param nulls_equal Flag to denote nulls are equal or not. + * @param row_bitmask Bitmasks to denote whether a row is valid or not. * @param stream CUDA stream used for device memory operations and kernel launches. * */ From b1cd09411b42012b6dede3aafc4ddcb74e6123f1 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 29 Mar 2022 10:33:23 -0400 Subject: [PATCH 06/11] Add peak memory measurement in join benchmarks --- cpp/benchmarks/join/join_common.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 65c2d7aacd7..b43d3e9c017 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -124,6 +124,7 @@ static void BM_join(state_type& state, Join JoinFunc) [[maybe_unused]] std::vector columns_to_join = {0}; // Benchmark the inner join operation + auto mem_stats_logger = cudf::memory_stats_logger(); if constexpr (std::is_same_v and (not is_conditional)) { for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); @@ -131,6 +132,7 @@ static void BM_join(state_type& state, Join JoinFunc) auto result = JoinFunc( probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } if constexpr (std::is_same_v and (not is_conditional)) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { @@ -142,6 +144,7 @@ static void BM_join(state_type& state, Join JoinFunc) cudf::null_equality::UNEQUAL, stream_view); }); + state.add_element_count(mem_stats_logger.peak_memory_usage(), "Peak Memory"); } // Benchmark conditional join From 3c3aa224e10d3fb38c60c5c67d90843be797c051 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 29 Mar 2022 11:23:21 -0400 Subject: [PATCH 07/11] Minor improvement: use helper struct instead of std::pair --- cpp/src/join/hash_join.cu | 34 ++++++++++++++++++---------------- cpp/src/join/hash_join.cuh | 7 ++++++- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 1228f07fc8c..a8d2d1b65f2 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -43,23 +43,25 @@ std::pair, std::unique_ptr
> get_empty_joined_table /** * @brief Performs bitwise AND of the bitmasks of columns of `build`. Returns - * a pair of row bitmasks and count of valid rows. + * a helper struct of row bitmasks and count of valid rows. * * @param build Table of columns used to build join hash. * @param nulls_equal Flag to denote nulls are equal or not. * @param stream CUDA stream used for device memory operations and kernel launches. - * @return A pair of row bitmask and count of valid rows. + * @return A helper struct of row bitmask and count of valid rows. */ -std::pair get_valid_row_info(cudf::table_view const& build, - null_equality const nulls_equal, - rmm::cuda_stream_view stream) +cudf::detail::row_info get_valid_row_info(cudf::table_view const& build, + null_equality const nulls_equal, + rmm::cuda_stream_view stream) { - auto valid_num_rows = build.num_rows(); + cudf::detail::row_info result; + result.num_valid_rows = build.num_rows(); auto [row_bitmask, null_count] = cudf::detail::bitmask_and(build, stream); if (nulls_equal == cudf::null_equality::UNEQUAL and nullable(build)) { - valid_num_rows -= null_count; + result.num_valid_rows -= null_count; } - return std::make_pair(std::move(row_bitmask), valid_num_rows); + result.composite_bitmask = std::move(row_bitmask); + return result; } /** @@ -298,7 +300,7 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, rmm::cuda_stream_view stream) : _valid_row_info{cudf::detail::get_valid_row_info(build, compare_nulls, stream)}, _nulls_equal{compare_nulls}, - _hash_table{compute_hash_table_size(_valid_row_info.second), + _hash_table{compute_hash_table_size(_valid_row_info.num_valid_rows), std::numeric_limits::max(), cudf::detail::JoinNoneValue, stream.value(), @@ -315,10 +317,10 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, _build = _flattened_build_table; // skip filling if empty - if (_valid_row_info.second == 0) { return; } + if (_valid_row_info.num_valid_rows == 0) { return; } cudf::detail::fill_join_hash_table( - _build, _hash_table, _nulls_equal, _valid_row_info.first, stream); + _build, _hash_table, _nulls_equal, _valid_row_info.composite_bitmask, stream); } std::pair>, @@ -360,7 +362,7 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p CUDF_FUNC_RANGE(); // Return directly if build table is empty - if (_valid_row_info.second == 0) { return 0; } + if (_valid_row_info.num_valid_rows == 0) { return 0; } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -384,7 +386,7 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_valid_row_info.second == 0) { return probe.num_rows(); } + if (_valid_row_info.num_valid_rows == 0) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -409,7 +411,7 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_valid_row_info.second == 0) { return probe.num_rows(); } + if (_valid_row_info.num_valid_rows == 0) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -471,11 +473,11 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe_tabl rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early - if (_valid_row_info.second == 0 and JoinKind != cudf::detail::join_kind::INNER_JOIN) { + if (_valid_row_info.num_valid_rows == 0 and JoinKind != cudf::detail::join_kind::INNER_JOIN) { return get_trivial_left_join_indices(probe_table, stream, mr); } - CUDF_EXPECTS(_valid_row_info.second > 0, "Hash table of hash join is null."); + CUDF_EXPECTS(_valid_row_info.num_valid_rows > 0, "Hash table of hash join is null."); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index 1768afd13aa..923b311b322 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -40,6 +40,11 @@ namespace cudf { namespace detail { +struct row_info { + rmm::device_buffer composite_bitmask; + cudf::size_type num_valid_rows; +}; + /** * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value. * @@ -197,7 +202,7 @@ struct hash_join::hash_join_impl { hash_join_impl& operator=(hash_join_impl&&) = delete; private: - std::pair const _valid_row_info; + cudf::detail::row_info const _valid_row_info; cudf::null_equality const _nulls_equal; cudf::table_view _build; std::vector> _created_null_columns; From e76cd75db582562af736946c6f19a68771f4e605 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 31 Mar 2022 16:04:06 -0400 Subject: [PATCH 08/11] Minor code cleanup --- cpp/src/join/hash_join.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index a8d2d1b65f2..7bacd008f10 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -54,14 +54,12 @@ cudf::detail::row_info get_valid_row_info(cudf::table_view const& build, null_equality const nulls_equal, rmm::cuda_stream_view stream) { - cudf::detail::row_info result; - result.num_valid_rows = build.num_rows(); + auto num_valid_rows = build.num_rows(); auto [row_bitmask, null_count] = cudf::detail::bitmask_and(build, stream); if (nulls_equal == cudf::null_equality::UNEQUAL and nullable(build)) { result.num_valid_rows -= null_count; } - result.composite_bitmask = std::move(row_bitmask); - return result; + return {std::move(row_bitmask), num_valid_rows}; } /** From 50e7dff5aae4f2ef4ce3e514b6a611c911f035c0 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 31 Mar 2022 16:12:30 -0400 Subject: [PATCH 09/11] Fix a typo --- cpp/src/join/hash_join.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 7bacd008f10..5f6754fe6c1 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -57,7 +57,7 @@ cudf::detail::row_info get_valid_row_info(cudf::table_view const& build, auto num_valid_rows = build.num_rows(); auto [row_bitmask, null_count] = cudf::detail::bitmask_and(build, stream); if (nulls_equal == cudf::null_equality::UNEQUAL and nullable(build)) { - result.num_valid_rows -= null_count; + num_valid_rows -= null_count; } return {std::move(row_bitmask), num_valid_rows}; } From 96674911744c97e44c5c285d71067abcba798c59 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 29 Apr 2022 10:48:22 -0400 Subject: [PATCH 10/11] Revert changes --- cpp/benchmarks/join/join_common.hpp | 7 +-- cpp/src/join/hash_join.cu | 82 ++++------------------------- cpp/src/join/hash_join.cuh | 11 ++-- 3 files changed, 14 insertions(+), 86 deletions(-) diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index dbc45e30406..6ff2543cf7d 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -125,7 +125,6 @@ static void BM_join(state_type& state, Join JoinFunc) [[maybe_unused]] std::vector columns_to_join = {0}; // Benchmark the inner join operation - auto mem_stats_logger = cudf::memory_stats_logger(); if constexpr (std::is_same_v and (not is_conditional)) { for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); @@ -133,19 +132,17 @@ static void BM_join(state_type& state, Join JoinFunc) auto result = JoinFunc( probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL); } - state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } if constexpr (std::is_same_v and (not is_conditional)) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - auto result = JoinFunc(build_table, - probe_table, + auto result = JoinFunc(probe_table, + build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL, stream_view); }); - state.add_element_count(mem_stats_logger.peak_memory_usage(), "Peak Memory"); } // Benchmark conditional join diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 3567da9e2d5..086e1e49986 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -47,68 +47,6 @@ std::pair, std::unique_ptr
> get_empty_joined_table return std::make_pair(std::move(empty_probe), std::move(empty_build)); } -/** - * @brief Performs bitwise AND of the bitmasks of columns of `build`. Returns - * a helper struct of row bitmasks and count of valid rows. - * - * @param build Table of columns used to build join hash. - * @param nulls_equal Flag to denote nulls are equal or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @return A helper struct of row bitmask and count of valid rows. - */ -cudf::detail::row_info get_valid_row_info(cudf::table_view const& build, - null_equality const nulls_equal, - rmm::cuda_stream_view stream) -{ - auto num_valid_rows = build.num_rows(); - auto [row_bitmask, null_count] = cudf::detail::bitmask_and(build, stream); - if (nulls_equal == cudf::null_equality::UNEQUAL and nullable(build)) { - num_valid_rows -= null_count; - } - return {std::move(row_bitmask), num_valid_rows}; -} - -/** - * @brief Fills the hash table based on the given table `build`. - * - * @tparam MultimapType Type of the hash table - * - * @param build Table of columns used to build join hash. - * @param hash_table Build hash table. - * @param nulls_equal Flag to denote nulls are equal or not. - * @param row_bitmask Bitmasks to denote whether a row is valid or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - * - */ -template -void fill_join_hash_table(cudf::table_view const& build, - MultimapType& hash_table, - null_equality const nulls_equal, - rmm::device_buffer const& row_bitmask, - rmm::cuda_stream_view stream) -{ - auto build_table_ptr = cudf::table_device_view::create(build, stream); - auto const build_table_num_rows = build.num_rows(); - - CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build_table_num_rows, "Build side table has no rows"); - - row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; - make_pair_function pair_func{hash_build, hash_table.get_empty_key_sentinel()}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { - hash_table.insert(iter, iter + build_table_num_rows, stream.value()); - } else { - thrust::counting_iterator stencil(0); - row_is_valid pred{static_cast(row_bitmask.data())}; - - // insert valid rows - hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); - } -} - /** * @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table`, * and returns the output indices of `build_table` and `probe_table` as a combined table. @@ -302,9 +240,9 @@ hash_join::hash_join_impl::~hash_join_impl() = default; hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, null_equality compare_nulls, rmm::cuda_stream_view stream) - : _valid_row_info{cudf::detail::get_valid_row_info(build, compare_nulls, stream)}, + : _is_empty{build.num_rows() == 0}, _nulls_equal{compare_nulls}, - _hash_table{compute_hash_table_size(_valid_row_info.num_valid_rows), + _hash_table{compute_hash_table_size(build.num_rows()), std::numeric_limits::max(), cudf::detail::JoinNoneValue, stream.value(), @@ -320,11 +258,9 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, build, {}, {}, structs::detail::column_nullability::FORCE); _build = _flattened_build_table; - // skip filling if empty - if (_valid_row_info.num_valid_rows == 0) { return; } + if (_is_empty) { return; } - cudf::detail::fill_join_hash_table( - _build, _hash_table, _nulls_equal, _valid_row_info.composite_bitmask, stream); + cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream); } std::pair>, @@ -366,7 +302,7 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p CUDF_FUNC_RANGE(); // Return directly if build table is empty - if (_valid_row_info.num_valid_rows == 0) { return 0; } + if (_is_empty) { return 0; } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -390,7 +326,7 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_valid_row_info.num_valid_rows == 0) { return probe.num_rows(); } + if (_is_empty) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -415,7 +351,7 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr CUDF_FUNC_RANGE(); // Trivial left join case - exit early - if (_valid_row_info.num_valid_rows == 0) { return probe.num_rows(); } + if (_is_empty) { return probe.num_rows(); } auto flattened_probe = structs::detail::flatten_nested_columns( probe, {}, {}, structs::detail::column_nullability::FORCE); @@ -477,11 +413,11 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe_tabl rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early - if (_valid_row_info.num_valid_rows == 0 and JoinKind != cudf::detail::join_kind::INNER_JOIN) { + if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) { return get_trivial_left_join_indices(probe_table, stream, mr); } - CUDF_EXPECTS(_valid_row_info.num_valid_rows > 0, "Hash table of hash join is null."); + CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); auto build_table_ptr = cudf::table_device_view::create(_build, stream); auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index 09fc846a08b..e55de043372 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -41,11 +41,6 @@ namespace cudf { namespace detail { -struct row_info { - rmm::device_buffer composite_bitmask; - cudf::size_type num_valid_rows; -}; - /** * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value. * @@ -169,10 +164,9 @@ void build_join_hash_table(cudf::table_view const& build, rmm::cuda_stream_view stream) { auto build_table_ptr = cudf::table_device_view::create(build, stream); - auto const build_table_num_rows{build_table_ptr->num_rows()}; CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build_table_num_rows, "Build side table has no rows"); + CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows"); row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); @@ -180,6 +174,7 @@ void build_join_hash_table(cudf::table_view const& build, auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + size_type const build_table_num_rows{build_table_ptr->num_rows()}; if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { hash_table.insert(iter, iter + build_table_num_rows, stream.value()); } else { @@ -203,7 +198,7 @@ struct hash_join::hash_join_impl { hash_join_impl& operator=(hash_join_impl&&) = delete; private: - cudf::detail::row_info const _valid_row_info; + bool const _is_empty; cudf::null_equality const _nulls_equal; cudf::table_view _build; std::vector> _created_null_columns; From 7b2f5f62893372a708fbca0805b2f2dffea1264a Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 29 Apr 2022 13:35:49 -0400 Subject: [PATCH 11/11] Add bitmask as hash join member --- cpp/include/cudf/detail/join.hpp | 8 +++++--- cpp/src/join/hash_join.cu | 7 ++++++- cpp/src/join/join_common_utils.cuh | 5 +++-- cpp/src/join/mixed_join.cu | 8 ++++++-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 12e4aaa03fd..2a94ee22a0d 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -68,9 +69,10 @@ struct hash_join { hash_join& operator=(hash_join&&) = delete; private: - bool const _is_empty; ///< true if `_hash_table` is empty - cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal - cudf::table_view _build; ///< input table to build the hash map + bool const _is_empty; ///< true if `_hash_table` is empty + rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid + cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal + cudf::table_view _build; ///< input table to build the hash map cudf::structs::detail::flattened_table _flattened_build_table; ///< flattened data structures for `_build` map_type _hash_table; ///< hash table built on `_build` diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 3e0e76de708..07995ba2785 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -283,6 +283,7 @@ hash_join::hash_join(cudf::table_view const& build, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) : _is_empty{build.num_rows() == 0}, + _composite_bitmask{cudf::detail::bitmask_and(build, stream).first}, _nulls_equal{compare_nulls}, _hash_table{compute_hash_table_size(build.num_rows()), std::numeric_limits::max(), @@ -302,7 +303,11 @@ hash_join::hash_join(cudf::table_view const& build, if (_is_empty) { return; } - cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream); + cudf::detail::build_join_hash_table(_build, + _hash_table, + _nulls_equal, + static_cast(_composite_bitmask.data()), + stream); } template diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index fdb63419c84..b3994685623 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -143,6 +143,7 @@ get_trivial_left_join_indices( * @param build Table of columns used to build join hash. * @param hash_table Build hash table. * @param nulls_equal Flag to denote nulls are equal or not. + * @param bitmask Bitmask to denote whether a row is valid. * @param stream CUDA stream used for device memory operations and kernel launches. * */ @@ -150,6 +151,7 @@ template void build_join_hash_table(cudf::table_view const& build, MultimapType& hash_table, null_equality const nulls_equal, + [[maybe_unused]] bitmask_type const* bitmask, rmm::cuda_stream_view stream) { auto build_table_ptr = cudf::table_device_view::create(build, stream); @@ -168,8 +170,7 @@ void build_join_hash_table(cudf::table_view const& build, hash_table.insert(iter, iter + build_table_num_rows, stream.value()); } else { thrust::counting_iterator stencil(0); - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; - row_is_valid pred{static_cast(row_bitmask.data())}; + row_is_valid pred{bitmask}; // insert valid rows hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 27ee77e3edd..11553858e5f 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -135,7 +135,9 @@ mixed_join( // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - build_join_hash_table(build, hash_table, compare_nulls, stream); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + build_join_hash_table( + build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); auto hash_table_view = hash_table.get_device_view(); auto left_conditional_view = table_device_view::create(left_conditional, stream); @@ -381,7 +383,9 @@ compute_mixed_join_output_size(table_view const& left_equality, // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - build_join_hash_table(build, hash_table, compare_nulls, stream); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + build_join_hash_table( + build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); auto hash_table_view = hash_table.get_device_view(); auto left_conditional_view = table_device_view::create(left_conditional, stream);