Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add specialized dispatch to improve occupancy for hash table operations in distinct join #16321

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3df7dce
nested template instantiation for hiding types
tgujar May 6, 2024
088042c
hasher conditional type dispatch works
tgujar May 7, 2024
34bad1e
delete dead comment block
tgujar May 8, 2024
40f291e
Fix docs
PointKernel May 14, 2024
b56bf75
fix type logic, minor refactor
tgujar May 17, 2024
078f53d
refactor
tgujar May 17, 2024
14785e4
added template specialization for equality comparator
tgujar May 29, 2024
b88b60b
added template specialized calls to comparator
tgujar May 30, 2024
1b89198
fix for register usage discrepancy
tgujar May 31, 2024
ca63201
fix for register usage discrepancy
tgujar May 31, 2024
ff5e0d4
revert edited comment blocks
tgujar Jun 3, 2024
80a9f61
added variant visitor pattern to avoid duplication for hasher
tgujar Jun 4, 2024
b8ad315
added variant visitor pattern for comparator
tgujar Jun 4, 2024
e29e346
add template specialization for hash joins
tgujar Jun 5, 2024
27fa7a0
add template specialization for hash join size
tgujar Jun 5, 2024
073a328
Merge branch 'branch-24.08' into hash-occupancy
PointKernel Jun 6, 2024
04f3906
minor refactor
tgujar Jun 7, 2024
90f33b4
fixup naming and docs
tgujar Jun 10, 2024
a016cd2
separate mixed semi join into multiple TU
tgujar Jul 12, 2024
8fc47df
Merge branch 'branch-24.08' into hash-occupancy
PointKernel Jul 12, 2024
0dc17ac
compile fails because traits not satisfied
tgujar Jul 15, 2024
ac5391d
compile fails with no instance of constructor
tgujar Jul 16, 2024
83ded25
compile fails with constructor not found
tgujar Jul 16, 2024
5306d1d
fix HasNested interaction, compile still fails
tgujar Jul 16, 2024
62c60fa
fails with illegal access
tgujar Jul 17, 2024
fbcf50f
fix illegal mem access
tgujar Jul 18, 2024
946d56f
fix index type
tgujar Jul 18, 2024
fe2ff8b
Merge branch 'branch-24.08' into distinct-join-occupancy
tgujar Jul 18, 2024
3febec0
Merge branch 'branch-24.08' into distinct-join-occupancy
tgujar Jul 26, 2024
77be694
Merge branch 'rapidsai:branch-24.10' into distinct-join-occupancy
tgujar Jul 26, 2024
448b14b
address review comments
tgujar Aug 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,10 @@ add_library(
src/join/mixed_join.cu
src/join/mixed_join_kernel.cu
src/join/mixed_join_kernel_nulls.cu
src/join/mixed_join_kernels_semi.cu
src/join/mixed_join_semi.cu
src/join/mixed_join_kernels_semi.cu
src/join/mixed_join_kernels_semi_nested.cu
src/join/mixed_join_kernels_semi_compound.cu
src/join/mixed_join_size_kernel.cu
src/join/mixed_join_size_kernel_nulls.cu
src/join/semi_join.cu
Expand Down
47 changes: 34 additions & 13 deletions cpp/include/cudf/detail/distinct_hash_join.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <memory>
#include <type_traits>
#include <utility>
#include <variant>

namespace cudf::detail {

Expand Down Expand Up @@ -85,22 +86,42 @@ struct hasher_adapter {
template <cudf::has_nested HasNested>
struct distinct_hash_join {
private:
/// Device row equal type
using d_equal_type = cudf::experimental::row::equality::strong_index_comparator_adapter<
cudf::experimental::row::equality::device_row_comparator<HasNested == cudf::has_nested::YES,
cudf::nullate::DYNAMIC>>;
using row_comparator = cudf::experimental::row::equality::device_row_comparator<
true,
cudf::nullate::DYNAMIC,
cudf::experimental::row::equality::nan_equal_physical_equality_comparator,
cudf::experimental::type_identity_t>;

using row_comparator_no_nested = cudf::experimental::row::equality::device_row_comparator<
false,
cudf::nullate::DYNAMIC,
cudf::experimental::row::equality::nan_equal_physical_equality_comparator,
cudf::experimental::dispatch_void_if_nested_t>;

using row_comparator_no_compound = cudf::experimental::row::equality::device_row_comparator<
false,
cudf::nullate::DYNAMIC,
cudf::experimental::row::equality::nan_equal_physical_equality_comparator,
cudf::experimental::dispatch_void_if_compound_t>;

using hasher = hasher_adapter<thrust::identity<hash_value_type>>;
using probing_scheme_type = cuco::linear_probing<1, hasher>;
using cuco_storage_type = cuco::storage<1>;

/// Hash table type
using hash_table_type = cuco::static_set<cuco::pair<hash_value_type, rhs_index_type>,
cuco::extent<size_type>,
cuda::thread_scope_device,
comparator_adapter<d_equal_type>,
probing_scheme_type,
cudf::detail::cuco_allocator,
cuco_storage_type>;
template <typename Comparator>
using static_set_with_comparator = cuco::static_set<
cuco::pair<hash_value_type, rhs_index_type>,
cuco::extent<size_type>,
cuda::thread_scope_device,
comparator_adapter<
cudf::experimental::row::equality::strong_index_comparator_adapter<Comparator>>,
probing_scheme_type,
cudf::detail::cuco_allocator,
cuco_storage_type>;
using hash_table_type = std::variant<static_set_with_comparator<row_comparator>,
static_set_with_comparator<row_comparator_no_nested>,
static_set_with_comparator<row_comparator_no_compound>>;

bool _has_nulls; ///< true if nulls are present in either build table or probe table
cudf::null_equality _nulls_equal; ///< whether to consider nulls as equal
Expand All @@ -109,8 +130,8 @@ struct distinct_hash_join {
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
_preprocessed_build; ///< input table preprocssed for row operators
std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
_preprocessed_probe; ///< input table preprocssed for row operators
hash_table_type _hash_table; ///< hash table built on `_build`
_preprocessed_probe; ///< input table preprocssed for row operators
std::unique_ptr<hash_table_type> _hash_table; ///< hash table built on `_build`

public:
distinct_hash_join() = delete;
Expand Down
Loading
Loading