-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve distinct_count
with cuco::static_set
#13343
Changes from 11 commits
405133a
e1300ee
4aab06d
6ee0e4e
8f411f9
27181ac
b8f9f8d
8fbeb0c
f4acb68
947513c
b99c029
bd7da78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,8 @@ | |
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/exec_policy.hpp> | ||
|
||
#include <cuco/static_set.cuh> | ||
|
||
#include <thrust/count.h> | ||
#include <thrust/execution_policy.h> | ||
#include <thrust/iterator/counting_iterator.h> | ||
|
@@ -127,40 +129,39 @@ cudf::size_type distinct_count(table_view const& keys, | |
null_equality nulls_equal, | ||
rmm::cuda_stream_view stream) | ||
{ | ||
auto const num_rows = keys.num_rows(); | ||
auto const num_rows = keys.num_rows(); | ||
if (num_rows == 0) { return 0; } // early exit for empty input | ||
auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; | ||
|
||
hash_map_type key_map{compute_hash_table_size(num_rows), | ||
cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, | ||
cuco::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL}, | ||
detail::hash_table_allocator_type{default_allocator<char>{}, stream}, | ||
stream.value()}; | ||
|
||
auto const preprocessed_input = | ||
cudf::experimental::row::hash::preprocessed_table::create(keys, stream); | ||
|
||
auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); | ||
auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); | ||
|
||
auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); | ||
|
||
auto iter = cudf::detail::make_counting_transform_iterator( | ||
0, [] __device__(size_type i) { return cuco::make_pair(i, i); }); | ||
auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); | ||
|
||
auto const comparator_helper = [&](auto const row_equal) { | ||
using hasher_type = decltype(hash_key); | ||
auto key_set = cuco::experimental::static_set{ | ||
cuco::experimental::extent{static_cast<cudf::size_type>(compute_hash_table_size(num_rows))}, | ||
PointKernel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cuco::empty_key<cudf::size_type>{COMPACTION_EMPTY_KEY_SENTINEL}, | ||
row_equal, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, we are trying to align with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see potential issue with adopting
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry to steal the show. Here is my suggestion to overcome the aforementioned issue: Combine both comparators into a thin proxy wrapper:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we make sure the desired operator will be used with the equal wrapper? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When inserting, we should insert all keys of type Theoretically, my proposal should work but that depends on the detail implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good point. cuco has an additional indirection layer to facilitate sentinel comparison thus the wrapper may not work as it is. To be tested. |
||
cuco::experimental::linear_probing<1, hasher_type>{hash_key}, | ||
detail::hash_table_allocator_type{default_allocator<char>{}, stream}, | ||
stream.value()}; | ||
|
||
auto const iter = thrust::counting_iterator<cudf::size_type>(0); | ||
// when nulls are equal, insert non-null rows only to improve efficiency | ||
if (nulls_equal == null_equality::EQUAL and has_nulls) { | ||
thrust::counting_iterator<size_type> stencil(0); | ||
auto const [row_bitmask, null_count] = | ||
cudf::detail::bitmask_or(keys, stream, rmm::mr::get_current_device_resource()); | ||
row_validity pred{static_cast<bitmask_type const*>(row_bitmask.data())}; | ||
|
||
key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value()); | ||
return key_map.get_size() + static_cast<std::size_t>(null_count > 0); | ||
return key_set.insert_if(iter, iter + num_rows, stencil, pred, stream.value()) + | ||
static_cast<cudf::size_type>(null_count > 0); | ||
} | ||
// otherwise, insert all | ||
key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value()); | ||
return key_map.get_size(); | ||
return key_set.insert(iter, iter + num_rows, stream.value()); | ||
}; | ||
|
||
if (cudf::detail::has_nested_columns(keys)) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I expect to see some header removed from this but don't see any?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not for now, some common utilities will no longer be needed but I will leave it to the
distinct
refactoring withcuco::static_reduction_map
to avoid back-and-forth effort.