Skip to content

Commit

Permalink
Update cudf for compatibility with the latest cuco (#14849)
Browse files Browse the repository at this point in the history
Depends on rapidsai/rapids-cmake#526

CMakes changes will be reverted once rapidsai/rapids-cmake#526 is merged.

This PR updates libcudf to make it compatible with the latest cuco.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #14849
  • Loading branch information
PointKernel authored Feb 15, 2024
1 parent 65d9c5e commit 3ba63c3
Show file tree
Hide file tree
Showing 12 changed files with 89 additions and 73 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/hash_reduce_by_row.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@

namespace cudf::detail {

using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;
using hash_map_type = cuco::legacy::
static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

/**
* @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct hash_join {
cudf::size_type,
cuda::thread_scope_device,
cudf::detail::cuco_allocator,
cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;
cuco::legacy::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;

hash_join() = delete;
~hash_join() = default;
Expand Down
32 changes: 17 additions & 15 deletions cpp/src/io/json/json_tree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -548,21 +548,22 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
using hasher_type = decltype(d_hasher);
constexpr size_type empty_node_index_sentinel = -1;
auto key_set =
cuco::experimental::static_set{cuco::experimental::extent{compute_hash_table_size(
num_fields, 40)}, // 40% occupancy in hash map
cuco::empty_key{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hasher},
cudf::detail::cuco_allocator{stream},
stream.value()};
cuco::static_set{cuco::extent{compute_hash_table_size(num_fields, 40)}, // 40% occupancy
cuco::empty_key{empty_node_index_sentinel},
d_equal,
cuco::linear_probing<1, hasher_type>{d_hasher},
{},
{},
cudf::detail::cuco_allocator{stream},
stream.value()};
key_set.insert_if_async(iter,
iter + num_nodes,
thrust::counting_iterator<size_type>(0), // stencil
is_field_name_node,
stream.value());

auto const get_hash_value =
[key_set = key_set.ref(cuco::experimental::op::find)] __device__(auto node_id) -> size_type {
[key_set = key_set.ref(cuco::op::find)] __device__(auto node_id) -> size_type {
auto const it = key_set.find(node_id);
return (it == key_set.end()) ? size_type{0} : *it;
};
Expand Down Expand Up @@ -735,13 +736,14 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
constexpr size_type empty_node_index_sentinel = -1;
using hasher_type = decltype(d_hashed_cache);

auto key_set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(num_nodes)},
cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache},
cudf::detail::cuco_allocator{stream},
stream.value()};
auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_nodes)},
cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
d_equal,
cuco::linear_probing<1, hasher_type>{d_hashed_cache},
{},
{},
cudf::detail::cuco_allocator{stream},
stream.value()};

// insert and convert node ids to unique set ids
auto nodes_itr = thrust::make_counting_iterator<size_type>(0);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,7 @@ using cudf::detail::host_2dspan;
auto constexpr KEY_SENTINEL = size_type{-1};
auto constexpr VALUE_SENTINEL = size_type{-1};

using map_type = cuco::static_map<size_type, size_type>;
using map_type = cuco::legacy::static_map<size_type, size_type>;

/**
* @brief The alias of `map_type::pair_atomic_type` class.
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/parquet_gpu.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,7 +28,7 @@ namespace cudf::io::parquet::detail {
auto constexpr KEY_SENTINEL = size_type{-1};
auto constexpr VALUE_SENTINEL = size_type{-1};

using map_type = cuco::static_map<size_type, size_type>;
using map_type = cuco::legacy::static_map<size_type, size_type>;

/**
* @brief The alias of `map_type::pair_atomic_type` class.
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/join/join_common_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ using multimap_type = cudf::hash_join::impl_type::map_type;
// Multimap type used for mixed joins. TODO: This is a temporary alias used
// until the mixed joins are converted to using CGs properly. Right now it's
// using a cooperative group of size 1.
using mixed_multimap_type = cuco::static_multimap<hash_value_type,
size_type,
cuda::thread_scope_device,
cudf::detail::cuco_allocator,
cuco::double_hashing<1, hash_type, hash_type>>;
using mixed_multimap_type =
cuco::static_multimap<hash_value_type,
size_type,
cuda::thread_scope_device,
cudf::detail::cuco_allocator,
cuco::legacy::double_hashing<1, hash_type, hash_type>>;

using semi_map_type = cuco::
using semi_map_type = cuco::legacy::
static_map<hash_value_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

using row_hash_legacy =
Expand Down
19 changes: 10 additions & 9 deletions cpp/src/search/contains_table.cu
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,9 @@ void dispatch_nan_comparator(
// Distinguish probing scheme CG sizes between nested and flat types for better performance
auto const probing_scheme = [&]() {
if constexpr (HasNested) {
return cuco::experimental::linear_probing<4, Hasher>{d_hasher};
return cuco::linear_probing<4, Hasher>{d_hasher};
} else {
return cuco::experimental::linear_probing<1, Hasher>{d_hasher};
return cuco::linear_probing<1, Hasher>{d_hasher};
}
}();

Expand Down Expand Up @@ -228,13 +228,14 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
[&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) {
auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};

auto set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())},
cuco::empty_key{lhs_index_type{-1}},
d_equal,
probing_scheme,
cudf::detail::cuco_allocator{stream},
stream.value()};
auto set = cuco::static_set{cuco::extent{compute_hash_table_size(haystack.num_rows())},
cuco::empty_key{lhs_index_type{-1}},
d_equal,
probing_scheme,
{},
{},
cudf::detail::cuco_allocator{stream},
stream.value()};

if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) {
auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream);
Expand Down
15 changes: 8 additions & 7 deletions cpp/src/stream_compaction/distinct_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,14 @@ cudf::size_type distinct_count(table_view const& keys,

auto const comparator_helper = [&](auto const row_equal) {
using hasher_type = decltype(hash_key);
auto key_set =
cuco::experimental::static_set{cuco::experimental::extent{compute_hash_table_size(num_rows)},
cuco::empty_key<cudf::size_type>{-1},
row_equal,
cuco::experimental::linear_probing<1, hasher_type>{hash_key},
cudf::detail::cuco_allocator{stream},
stream.value()};
auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_rows)},
cuco::empty_key<cudf::size_type>{-1},
row_equal,
cuco::linear_probing<1, hasher_type>{hash_key},
{},
{},
cudf::detail::cuco_allocator{stream},
stream.value()};

auto const iter = thrust::counting_iterator<cudf::size_type>(0);
// when nulls are equal, we skip hashing any row that has a null
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/stream_compaction/stream_compaction_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
namespace cudf {
namespace detail {

using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;
using hash_map_type = cuco::legacy::
static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;

} // namespace detail
} // namespace cudf
39 changes: 21 additions & 18 deletions cpp/src/text/bpe/byte_pair_encoding.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespace detail {
using string_hasher_type = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>;
using hash_value_type = string_hasher_type::result_type;
using merge_pair_type = thrust::pair<cudf::string_view, cudf::string_view>;
using cuco_storage = cuco::storage<1>;

/**
* @brief Hasher function used for building and using the cuco static-map
Expand Down Expand Up @@ -98,15 +99,16 @@ struct bpe_equal {
}
};

using bpe_probe_scheme = cuco::experimental::linear_probing<1, bpe_hasher>;
using bpe_probe_scheme = cuco::linear_probing<1, bpe_hasher>;

using merge_pairs_map_type = cuco::experimental::static_map<cudf::size_type,
cudf::size_type,
cuco::experimental::extent<std::size_t>,
cuda::thread_scope_device,
bpe_equal,
bpe_probe_scheme,
cudf::detail::cuco_allocator>;
using merge_pairs_map_type = cuco::static_map<cudf::size_type,
cudf::size_type,
cuco::extent<std::size_t>,
cuda::thread_scope_device,
bpe_equal,
bpe_probe_scheme,
cudf::detail::cuco_allocator,
cuco_storage>;

/**
* @brief Hasher function used for building and using the cuco static-map
Expand Down Expand Up @@ -155,15 +157,16 @@ struct mp_equal {
}
};

using mp_probe_scheme = cuco::experimental::linear_probing<1, mp_hasher>;
using mp_probe_scheme = cuco::linear_probing<1, mp_hasher>;

using mp_table_map_type = cuco::experimental::static_map<cudf::size_type,
cudf::size_type,
cuco::experimental::extent<std::size_t>,
cuda::thread_scope_device,
mp_equal,
mp_probe_scheme,
cudf::detail::cuco_allocator>;
using mp_table_map_type = cuco::static_map<cudf::size_type,
cudf::size_type,
cuco::extent<std::size_t>,
cuda::thread_scope_device,
mp_equal,
mp_probe_scheme,
cudf::detail::cuco_allocator,
cuco_storage>;

} // namespace detail

Expand All @@ -185,8 +188,8 @@ struct bpe_merge_pairs::bpe_merge_pairs_impl {
std::unique_ptr<detail::mp_table_map_type>&& mp_table_map);

auto const get_merge_pairs() const { return *d_merge_pairs; }
auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::experimental::op::find); }
auto get_mp_table_ref() const { return mp_table_map->ref(cuco::experimental::op::find); }
auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::op::find); }
auto get_mp_table_ref() const { return mp_table_map->ref(cuco::op::find); }
};

} // namespace nvtext
4 changes: 4 additions & 0 deletions cpp/src/text/bpe/load_merge_pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
cuco::empty_value{-1},
bpe_equal{input},
bpe_probe_scheme{bpe_hasher{input}},
cuco::thread_scope_device,
cuco_storage{},
cudf::detail::cuco_allocator{stream},
stream.value());

Expand All @@ -69,6 +71,8 @@ std::unique_ptr<detail::mp_table_map_type> initialize_mp_table_map(
cuco::empty_value{-1},
mp_equal{input},
mp_probe_scheme{mp_hasher{input}},
cuco::thread_scope_device,
cuco_storage{},
cudf::detail::cuco_allocator{stream},
stream.value());

Expand Down
22 changes: 13 additions & 9 deletions cpp/src/text/vocabulary_tokenize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,16 @@ struct vocab_equal {
}
};

using probe_scheme = cuco::experimental::linear_probing<1, vocab_hasher>;
using vocabulary_map_type = cuco::experimental::static_map<cudf::size_type,
cudf::size_type,
cuco::experimental::extent<std::size_t>,
cuda::thread_scope_device,
vocab_equal,
probe_scheme,
cudf::detail::cuco_allocator>;
using probe_scheme = cuco::linear_probing<1, vocab_hasher>;
using cuco_storage = cuco::storage<1>;
using vocabulary_map_type = cuco::static_map<cudf::size_type,
cudf::size_type,
cuco::extent<std::size_t>,
cuda::thread_scope_device,
vocab_equal,
probe_scheme,
cudf::detail::cuco_allocator,
cuco_storage>;
} // namespace
} // namespace detail

Expand All @@ -115,7 +117,7 @@ struct tokenize_vocabulary::tokenize_vocabulary_impl {
col_device_view const d_vocabulary;
std::unique_ptr<detail::vocabulary_map_type> vocabulary_map;

auto get_map_ref() const { return vocabulary_map->ref(cuco::experimental::op::find); }
auto get_map_ref() const { return vocabulary_map->ref(cuco::op::find); }

tokenize_vocabulary_impl(std::unique_ptr<cudf::column>&& vocab,
col_device_view&& d_vocab,
Expand Down Expand Up @@ -149,6 +151,8 @@ tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input,
cuco::empty_value{-1},
detail::vocab_equal{*d_vocabulary},
detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}},
cuco::thread_scope_device,
detail::cuco_storage{},
cudf::detail::cuco_allocator{stream},
stream.value());

Expand Down

0 comments on commit 3ba63c3

Please sign in to comment.