Skip to content

Commit

Permalink
trying legacy again
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Feb 19, 2023
1 parent 02edad7 commit fa8f639
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 49 deletions.
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ ConfigureNVBench(
# ##################################################################################################
# * join benchmark --------------------------------------------------------------------------------
ConfigureBench(JOIN_BENCH join/left_join.cu join/conditional_join.cu)
ConfigureNVBench(JOIN_NVBENCH join/join.cu join/mixed_join.cu)
ConfigureNVBench(JOIN_NVBENCH join/join.cu)
# join/mixed_join.cu)

# ##################################################################################################
# * iterator benchmark ----------------------------------------------------------------------------
Expand Down
119 changes: 75 additions & 44 deletions cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,40 @@
namespace cudf {
namespace detail {
namespace {

auto get_legacy_comparator(table_view build_table,
table_view probe_table,
bool const has_nulls,
cudf::null_equality nulls_equal,
rmm::cuda_stream_view stream) {
auto build_table_d = cudf::table_device_view::create(build_table, stream);
auto probe_table_d = cudf::table_device_view::create(probe_table, stream);

auto probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

return row_equality{probe_nulls, *probe_table_d, *build_table_d, nulls_equal};
}

auto get_experimental_comparator(table_view build_table,
table_view probe_table,
bool const has_nulls,
cudf::null_equality const nulls_equal,
rmm::cuda_stream_view stream) {
auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};

auto const device_comparator = row_comparator.equal_to<true>(probe_nulls, nulls_equal);

return device_comparator;
}

/**
* @brief Calculates the exact size of the join output produced when
* joining two tables together.
Expand Down Expand Up @@ -88,21 +122,17 @@ std::size_t compute_join_output_size(table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
// auto const preprocessed_probe =
// cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
// auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
// auto hash_probe = row_hash.device_hasher(probe_nulls);
auto probe_table_d = cudf::table_device_view::create(probe_table, stream);
row_hash hash_probe{probe_nulls, *probe_table_d};

auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};

auto const comparator_helper = [&](auto const device_comparator) {
pair_equality equality{device_comparator};

auto const comparator_helper = [&](auto const equality) {
std::size_t size;
auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
if constexpr (JoinKind == join_kind::LEFT_JOIN) {
Expand All @@ -116,11 +146,13 @@ std::size_t compute_join_output_size(table_view build_table,
};

if (cudf::detail::has_nested_columns(probe_table)) {
auto const device_comparator = row_comparator.equal_to<true>(has_nulls, nulls_equal);
return comparator_helper(device_comparator);
auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, nulls_equal, stream);
experimental::pair_equality equality{device_comparator};
return comparator_helper(equality);
} else {
auto const device_comparator = row_comparator.equal_to<false>(has_nulls, nulls_equal);
return comparator_helper(device_comparator);
auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, nulls_equal, stream);
pair_equality equality(device_comparator);
return comparator_helper(equality);
}
}

Expand Down Expand Up @@ -174,19 +206,16 @@ probe_join_hash_table(cudf::table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
// auto const preprocessed_probe =
// cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
// auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
// auto hash_probe = row_hash.device_hasher(probe_nulls);
auto probe_table_d = cudf::table_device_view::create(probe_table, stream);
row_hash hash_probe{probe_nulls, *probe_table_d};
auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const comparator_helper = [&](auto const device_comparator) {
pair_equality equality{device_comparator};
auto const comparator_helper = [&](auto const equality) {

const cudf::size_type probe_table_num_rows = probe_table.num_rows();

Expand Down Expand Up @@ -222,11 +251,13 @@ probe_join_hash_table(cudf::table_view build_table,
};

if (cudf::detail::has_nested_columns(probe_table)) {
auto device_comparator = row_comparator.equal_to<true>(probe_nulls, compare_nulls);
comparator_helper(device_comparator);
auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream);
experimental::pair_equality equality{device_comparator};
comparator_helper(equality);
} else {
auto device_comparator = row_comparator.equal_to<false>(probe_nulls, compare_nulls);
comparator_helper(device_comparator);
auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream);
pair_equality equality{device_comparator};
comparator_helper(equality);
}

return std::pair(std::move(left_indices), std::move(right_indices));
Expand Down Expand Up @@ -266,19 +297,16 @@ std::size_t get_full_join_size(cudf::table_view build_table,

auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};

auto const preprocessed_probe =
cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
auto hash_probe = row_hash.device_hasher(probe_nulls);
// auto const preprocessed_probe =
// cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream);
// auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
// auto hash_probe = row_hash.device_hasher(probe_nulls);
auto probe_table_d = cudf::table_device_view::create(probe_table, stream);
row_hash hash_probe{probe_nulls, *probe_table_d};
auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_probe, empty_key_sentinel};

auto const preprocessed_build =
cudf::experimental::row::equality::preprocessed_table::create(build_table, stream);
auto const row_comparator =
cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
auto const comparator_helper = [&](auto const device_comparator) {
pair_equality equality{device_comparator};
auto const comparator_helper = [&](auto const equality) {

auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);

Expand All @@ -293,11 +321,14 @@ std::size_t get_full_join_size(cudf::table_view build_table,
iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value());
};
if (cudf::detail::has_nested_columns(probe_table)) {
auto const device_comparator = row_comparator.equal_to<true>(probe_nulls, compare_nulls);
comparator_helper(device_comparator);
auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream);
experimental::pair_equality equality{device_comparator};
comparator_helper(equality);
} else {
auto const device_comparator = row_comparator.equal_to<false>(probe_nulls, compare_nulls);
comparator_helper(device_comparator);

auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream);
pair_equality equality{device_comparator};
comparator_helper(equality);
}

// Release intermediate memory allocation
Expand Down
36 changes: 32 additions & 4 deletions cpp/src/join/join_common_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,31 @@ class row_is_valid {
*
* @tparam Comparator The row comparator type to perform row equality comparison from row indices.
*/

template <typename Comparator = row_equality>
class pair_equality {
public:
pair_equality(table_device_view lhs,
table_device_view rhs,
nullate::DYNAMIC has_nulls,
null_equality nulls_are_equal = null_equality::EQUAL)
: _check_row_equality{has_nulls, lhs, rhs, nulls_are_equal}
{
}

pair_equality(Comparator const d_eqcomp) : _check_row_equality{std::move(d_eqcomp)} {}

template <typename LhsPair, typename RhsPair>
__device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept
{
return lhs.first == rhs.first and _check_row_equality(rhs.second, lhs.second);
}

private:
Comparator _check_row_equality;
};

namespace experimental {
template <typename DeviceComparator>
class pair_equality {
public:
Expand All @@ -109,8 +134,8 @@ class pair_equality {
template <typename LhsPair, typename RhsPair>
__device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept
{
using experimental::row::lhs_index_type;
using experimental::row::rhs_index_type;
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;
// printf("lhs_index: %d, rhs_index: %d, hash: %d, equality: %d\n", lhs.second, rhs.second,
// lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second},
// lhs_index_type{lhs.second}));
Expand All @@ -121,6 +146,7 @@ class pair_equality {
private:
DeviceComparator _check_row_equality;
};
}

/**
* @brief Computes the trivial left join operation for the case when the
Expand Down Expand Up @@ -165,8 +191,10 @@ void build_join_hash_table(cudf::table_view const& build,
CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty");
CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows");

auto row_hash = experimental::row::hash::row_hasher{build, stream};
auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)});
auto build_table_ptr = cudf::table_device_view::create(build, stream);
row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr};
// auto row_hash = cudf::experimental::row::hash::row_hasher{build, stream};
// auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)});

auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
make_pair_function pair_func{hash_build, empty_key_sentinel};
Expand Down

0 comments on commit fa8f639

Please sign in to comment.