Skip to content

Commit

Permalink
fix slower times in small tables
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Feb 20, 2023
1 parent 38464ef commit 0113589
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 49 deletions.
5 changes: 4 additions & 1 deletion cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -74,7 +75,9 @@ struct hash_join {
rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
map_type _hash_table; ///< hash table built on `_build`
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
_preprocessed_build; ///< input table preprocssed for row operators
map_type _hash_table; ///< hash table built on `_build`

public:
/**
Expand Down
114 changes: 73 additions & 41 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ namespace {
* @return The exact size of the output of the join operation
*/
template <join_kind JoinKind>
std::size_t compute_join_output_size(table_view build_table,
table_view probe_table,
cudf::detail::multimap_type const& hash_table,
bool const has_nulls,
cudf::null_equality const nulls_equal,
rmm::cuda_stream_view stream)
std::size_t compute_join_output_size(
table_view const& build_table,
table_view const& probe_table,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_build,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
cudf::detail::multimap_type const& hash_table,
bool const has_nulls,
cudf::null_equality const nulls_equal,
rmm::cuda_stream_view stream)
{
const size_type build_table_num_rows{build_table.num_rows()};
const size_type probe_table_num_rows{probe_table.num_rows()};
Expand All @@ -88,15 +91,11 @@ std::size_t compute_join_output_size(table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};

Expand Down Expand Up @@ -144,24 +143,33 @@ std::size_t compute_join_output_size(table_view build_table,
template <join_kind JoinKind>
std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
std::unique_ptr<rmm::device_uvector<size_type>>>
probe_join_hash_table(cudf::table_view build_table,
cudf::table_view probe_table,
cudf::detail::multimap_type const& hash_table,
bool has_nulls,
null_equality compare_nulls,
std::optional<std::size_t> output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
probe_join_hash_table(
cudf::table_view const& build_table,
cudf::table_view const& probe_table,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_build,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
cudf::detail::multimap_type const& hash_table,
bool has_nulls,
null_equality compare_nulls,
std::optional<std::size_t> output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Use the output size directly if provided. Otherwise, compute the exact output size
constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
? cudf::detail::join_kind::LEFT_JOIN
: JoinKind;

std::size_t const join_size =
output_size ? *output_size
: compute_join_output_size<ProbeJoinKind>(
build_table, probe_table, hash_table, has_nulls, compare_nulls, stream);
std::size_t const join_size = output_size
? *output_size
: compute_join_output_size<ProbeJoinKind>(build_table,
probe_table,
preprocessed_build,
preprocessed_probe,
hash_table,
has_nulls,
compare_nulls,
stream);

// If output size is zero, return immediately
if (join_size == 0) {
Expand All @@ -174,15 +182,11 @@ probe_join_hash_table(cudf::table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const comparator_helper = [&](auto const device_comparator) {
Expand Down Expand Up @@ -247,16 +251,26 @@ probe_join_hash_table(cudf::table_view build_table,
*
* @return Output size of full join.
*/
std::size_t get_full_join_size(cudf::table_view build_table,
cudf::table_view probe_table,
cudf::detail::multimap_type const& hash_table,
bool const has_nulls,
null_equality const compare_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::size_t get_full_join_size(
cudf::table_view const& build_table,
cudf::table_view const& probe_table,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_build,
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
cudf::detail::multimap_type const& hash_table,
bool const has_nulls,
null_equality const compare_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
std::size_t join_size = compute_join_output_size<cudf::detail::join_kind::LEFT_JOIN>(
build_table, probe_table, hash_table, has_nulls, compare_nulls, stream);
std::size_t join_size =
compute_join_output_size<cudf::detail::join_kind::LEFT_JOIN>(build_table,
probe_table,
preprocessed_build,
preprocessed_probe,
hash_table,
has_nulls,
compare_nulls,
stream);

// If output size is zero, return immediately
if (join_size == 0) { return join_size; }
Expand All @@ -266,15 +280,11 @@ std::size_t get_full_join_size(cudf::table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const comparator_helper = [&](auto const device_comparator) {
Expand Down Expand Up @@ -360,15 +370,18 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE,
"Build column size is too big for hash join");

_build = std::move(build);
_build = build;
_preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(_build, stream);

if (_is_empty) { return; }

cudf::detail::build_join_hash_table(_build,
_hash_table,
_nulls_equal,
static_cast<bitmask_type const*>(_composite_bitmask.data()),
stream);
stream,
_preprocessed_build);
}

template <typename Hasher>
Expand Down Expand Up @@ -416,9 +429,14 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
// Return directly if build table is empty
if (_is_empty) { return 0; }

auto preprocessed_probe =
cudf::experimental::row::equality::preprocessed_table::create(probe, stream);

return cudf::detail::compute_join_output_size<cudf::detail::join_kind::INNER_JOIN>(
_build,
probe,
_preprocessed_build,
preprocessed_probe,
_hash_table,
cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build),
_nulls_equal,
Expand All @@ -434,9 +452,14 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
// Trivial left join case - exit early
if (_is_empty) { return probe.num_rows(); }

auto preprocessed_probe =
cudf::experimental::row::equality::preprocessed_table::create(probe, stream);

return cudf::detail::compute_join_output_size<cudf::detail::join_kind::LEFT_JOIN>(
_build,
probe,
_preprocessed_build,
preprocessed_probe,
_hash_table,
cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build),
_nulls_equal,
Expand All @@ -453,9 +476,14 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
// Trivial left join case - exit early
if (_is_empty) { return probe.num_rows(); }

auto preprocessed_probe =
cudf::experimental::row::equality::preprocessed_table::create(probe, stream);

return cudf::detail::get_full_join_size(
_build,
probe,
_preprocessed_build,
preprocessed_probe,
_hash_table,
cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build),
_nulls_equal,
Expand All @@ -479,9 +507,13 @@ hash_join<Hasher>::probe_join_indices(cudf::table_view const& probe_table,

CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null.");

auto preprocessed_probe =
cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream);
auto join_indices = cudf::detail::probe_join_hash_table<JoinKind>(
_build,
probe_table,
_preprocessed_build,
preprocessed_probe,
_hash_table,
cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build),
_nulls_equal,
Expand Down
17 changes: 11 additions & 6 deletions cpp/src/join/join_common_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,21 @@ get_trivial_left_join_indices(
*
*/
template <typename MultimapType>
void build_join_hash_table(cudf::table_view const& build,
MultimapType& hash_table,
null_equality const nulls_equal,
[[maybe_unused]] bitmask_type const* bitmask,
rmm::cuda_stream_view stream)
void build_join_hash_table(
cudf::table_view const& build,
MultimapType& hash_table,
null_equality const nulls_equal,
[[maybe_unused]] bitmask_type const* bitmask,
rmm::cuda_stream_view stream,
std::shared_ptr<experimental::row::equality::preprocessed_table> preprocessed_build = nullptr)
{
CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty");
CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows");

auto row_hash = experimental::row::hash::row_hasher{build, stream};
if (preprocessed_build == nullptr) {
preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream);
}
auto row_hash = experimental::row::hash::row_hasher{preprocessed_build};
auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)});

auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
Expand Down
1 change: 0 additions & 1 deletion cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/join.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_view.hpp>

Expand Down

0 comments on commit 0113589

Please sign in to comment.