Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor C++17 cleanup of groupby.cu: structured bindings, more concise lambda, etc #9193

Merged
merged 8 commits into from
Sep 9, 2021
Merged
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -537,27 +537,25 @@ void compute_single_pass_aggs(table_view const& keys,
* `map`.
*/
template <typename Map>
std::pair<rmm::device_uvector<size_type>, size_type> extract_populated_keys(
Map map, size_type num_keys, rmm::cuda_stream_view stream)
rmm::device_uvector<size_type> extract_populated_keys(Map map,
size_type num_keys,
rmm::cuda_stream_view stream)
{
rmm::device_uvector<size_type> populated_keys(num_keys, stream);

auto get_key = [] __device__(auto const& element) {
size_type key, value;
thrust::tie(key, value) = element;
return key;
};
auto get_key = [] __device__(auto const& element) { return element.first; }; // first = key
auto get_key_it = thrust::make_transform_iterator(map.data(), get_key);
auto key_used = [unused = map.get_unused_key()] __device__(auto key) { return key != unused; };

auto end_it = thrust::copy_if(
rmm::exec_policy(stream),
thrust::make_transform_iterator(map.data(), get_key),
thrust::make_transform_iterator(map.data() + map.capacity(), get_key),
populated_keys.begin(),
[unused_key = map.get_unused_key()] __device__(size_type key) { return key != unused_key; });
auto end_it = thrust::copy_if(rmm::exec_policy(stream),
get_key_it,
get_key_it + map.capacity(),
populated_keys.begin(),
key_used);

size_type map_size = end_it - populated_keys.begin();
populated_keys.resize(std::distance(populated_keys.begin(), end_it), stream);

return std::make_pair(std::move(populated_keys), map_size);
return populated_keys;
}

/**
Expand Down Expand Up @@ -594,8 +592,8 @@ std::unique_ptr<table> groupby_null_templated(table_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto d_keys = table_device_view::create(keys, stream);
auto map = create_hash_map<keys_have_nulls>(*d_keys, include_null_keys, stream);
auto d_keys_ptr = table_device_view::create(keys, stream);
auto map = create_hash_map<keys_have_nulls>(*d_keys_ptr, include_null_keys, stream);

// Cache of sparse results where the location of aggregate value in each
// column is indexed by the hash map
Expand All @@ -607,17 +605,15 @@ std::unique_ptr<table> groupby_null_templated(table_view const& keys,

// Extract the populated indices from the hash map and create a gather map.
// Gathering using this map from sparse results will give dense results.
auto map_and_size = extract_populated_keys(*map, keys.num_rows(), stream);
rmm::device_uvector<size_type> gather_map{std::move(map_and_size.first)};
size_type const map_size = map_and_size.second;
auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream);

// Compact all results from sparse_results and insert into cache
sparse_to_dense_results(keys,
requests,
&sparse_results,
cache,
gather_map,
map_size,
gather_map.size(),
*map,
keys_have_nulls,
include_null_keys,
Expand All @@ -626,7 +622,7 @@ std::unique_ptr<table> groupby_null_templated(table_view const& keys,

return cudf::detail::gather(keys,
gather_map.begin(),
gather_map.begin() + map_size,
gather_map.begin() + gather_map.size(),
codereport marked this conversation as resolved.
Show resolved Hide resolved
out_of_bounds_policy::DONT_CHECK,
stream,
mr);
Expand Down