From ba2a6c8d63a7921629db6fdba96e0535419654de Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 29 Jan 2024 11:20:44 -0800 Subject: [PATCH 1/5] Custom cuco version --- cpp/cmake/thirdparty/get_cucollections.cmake | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 9758958b44f..a5c55130951 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,12 +14,14 @@ # This function finds cuCollections and performs any additional configuration. function(find_and_configure_cucollections) - include(${rapids-cmake-dir}/cpm/cuco.cmake) - if(BUILD_SHARED_LIBS) - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) - else() - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) - endif() + rapids_cpm_find( + cuco 0.0.1 + GLOBAL_TARGETS cuco::cuco + CPM_ARGS + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG fabd885327dc1662e1a63d9b8870ebf888893c09 + OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" + ) endfunction() find_and_configure_cucollections() From 2db13a0d6c08a1ecc12d1f2825ae5eae00df050a Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 29 Jan 2024 11:53:01 -0800 Subject: [PATCH 2/5] Fix conflicts --- .../cudf/detail/hash_reduce_by_row.cuh | 6 +-- cpp/include/cudf/detail/join.hpp | 4 +- cpp/src/io/json/json_tree.cu | 34 +++++++-------- cpp/src/io/orc/orc_gpu.hpp | 4 +- cpp/src/io/parquet/parquet_gpu.cuh | 4 +- cpp/src/join/join_common_utils.hpp | 17 ++++---- cpp/src/search/contains_table.cu | 10 +++-- cpp/src/stream_compaction/distinct_count.cu | 18 ++++---- .../stream_compaction_common.hpp | 6 +-- cpp/src/text/bpe/byte_pair_encoding.cuh | 41 ++++++++++--------- cpp/src/text/bpe/load_merge_pairs.cu | 6 ++- cpp/src/text/vocabulary_tokenize.cu | 22 ++++++---- 12 files changed, 95 insertions(+), 77 deletions(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index f63d1922950..90d9de142c2 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ namespace cudf::detail { using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; -using hash_map_type = - cuco::static_map; +using hash_map_type = cuco::legacy:: + static_map; /** * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index b69632c83ca..a3dd2dd2b96 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,7 +63,7 @@ struct hash_join { cudf::size_type, cuda::thread_scope_device, rmm::mr::stream_allocator_adaptor>, - cuco::double_hashing>; + cuco::legacy::double_hashing>; hash_join() = delete; ~hash_join() = default; diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index 275907c19c9..f200a54b435 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -549,14 +549,15 @@ rmm::device_uvector hash_node_type_with_field_name(device_span{d_hasher}, - hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; + auto key_set = cuco::static_set{ + cuco::extent{compute_hash_table_size(num_fields, 40)}, // 40% occupancy in hash map + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hasher}, + {}, + {}, + hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; key_set.insert_if_async(iter, iter + num_nodes, thrust::counting_iterator(0), // stencil @@ -564,7 +565,7 @@ rmm::device_uvector hash_node_type_with_field_name(device_span size_type { + [key_set = key_set.ref(cuco::op::find)] __device__(auto node_id) -> size_type { auto const it = key_set.find(node_id); return (it == key_set.end()) ? size_type{0} : *it; }; @@ -738,13 +739,14 @@ std::pair, rmm::device_uvector> hash_n using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; using hasher_type = decltype(d_hashed_cache); - auto key_set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(num_nodes)}, - cuco::empty_key{empty_node_index_sentinel}, - d_equal, - cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache}, - hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; + auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_nodes)}, + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hashed_cache}, + {}, + {}, + hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; // insert and convert node ids to unique set ids auto nodes_itr = thrust::make_counting_iterator(0); diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 243704b65d4..c2570d71c24 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ using cudf::detail::host_2dspan; auto constexpr KEY_SENTINEL = size_type{-1}; auto constexpr VALUE_SENTINEL = size_type{-1}; -using map_type = cuco::static_map; +using map_type = cuco::legacy::static_map; /** * @brief The alias of `map_type::pair_atomic_type` class. diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh index 10e12ebb782..e3c44c78898 100644 --- a/cpp/src/io/parquet/parquet_gpu.cuh +++ b/cpp/src/io/parquet/parquet_gpu.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ namespace cudf::io::parquet::detail { auto constexpr KEY_SENTINEL = size_type{-1}; auto constexpr VALUE_SENTINEL = size_type{-1}; -using map_type = cuco::static_map; +using map_type = cuco::legacy::static_map; /** * @brief The alias of `map_type::pair_atomic_type` class. diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index e96505e5ed6..e9df56d2d07 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,13 +50,14 @@ using multimap_type = cudf::hash_join::impl_type::map_type; // Multimap type used for mixed joins. TODO: This is a temporary alias used // until the mixed joins are converted to using CGs properly. Right now it's // using a cooperative group of size 1. -using mixed_multimap_type = cuco::static_multimap>; - -using semi_map_type = cuco:: +using mixed_multimap_type = + cuco::static_multimap>; + +using semi_map_type = cuco::legacy:: static_map; using row_hash_legacy = diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index b8ece03c4a0..19875a4c7c1 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -156,9 +156,9 @@ void dispatch_nan_comparator( // Distinguish probing scheme CG sizes between nested and flat types for better performance auto const probing_scheme = [&]() { if constexpr (HasNested) { - return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; + return cuco::linear_probing<4, Hasher>{d_hasher}; } else { - return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; + return cuco::linear_probing<1, Hasher>{d_hasher}; } }(); @@ -226,11 +226,13 @@ rmm::device_uvector contains(table_view const& haystack, [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; - auto set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, + auto set = cuco::static_set{ + cuco::extent{compute_hash_table_size(haystack.num_rows())}, cuco::empty_key{lhs_index_type{-1}}, d_equal, probing_scheme, + {}, + {}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index ac4811ad279..aea8111814d 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,13 +141,15 @@ cudf::size_type distinct_count(table_view const& keys, auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); - auto key_set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(num_rows)}, - cuco::empty_key{-1}, - row_equal, - cuco::experimental::linear_probing<1, hasher_type>{hash_key}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; + auto key_set = + cuco::static_set{cuco::extent{compute_hash_table_size(num_rows)}, + cuco::empty_key{-1}, + row_equal, + cuco::linear_probing<1, hasher_type>{hash_key}, + {}, + {}, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; auto const iter = thrust::counting_iterator(0); // when nulls are equal, we skip hashing any row that has a null diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp index 18c531e3e69..dd699224c69 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ b/cpp/src/stream_compaction/stream_compaction_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,8 +31,8 @@ namespace detail { using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; -using hash_map_type = - cuco::static_map; +using hash_map_type = cuco::legacy:: + static_map; } // namespace detail } // namespace cudf diff --git a/cpp/src/text/bpe/byte_pair_encoding.cuh b/cpp/src/text/bpe/byte_pair_encoding.cuh index 2a170317909..f052a40d62b 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cuh +++ b/cpp/src/text/bpe/byte_pair_encoding.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ using hash_value_type = string_hasher_type::result_type; using merge_pair_type = thrust::pair; using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; +using cuco_storage = cuco::storage<1>; /** * @brief Hasher function used for building and using the cuco static-map @@ -101,15 +102,16 @@ struct bpe_equal { } }; -using bpe_probe_scheme = cuco::experimental::linear_probing<1, bpe_hasher>; +using bpe_probe_scheme = cuco::linear_probing<1, bpe_hasher>; -using merge_pairs_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - bpe_equal, - bpe_probe_scheme, - hash_table_allocator_type>; +using merge_pairs_map_type = cuco::static_map, + cuda::thread_scope_device, + bpe_equal, + bpe_probe_scheme, + hash_table_allocator_type, + cuco_storage>; /** * @brief Hasher function used for building and using the cuco static-map @@ -158,15 +160,16 @@ struct mp_equal { } }; -using mp_probe_scheme = cuco::experimental::linear_probing<1, mp_hasher>; +using mp_probe_scheme = cuco::linear_probing<1, mp_hasher>; -using mp_table_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - mp_equal, - mp_probe_scheme, - hash_table_allocator_type>; +using mp_table_map_type = cuco::static_map, + cuda::thread_scope_device, + mp_equal, + mp_probe_scheme, + hash_table_allocator_type, + cuco_storage>; } // namespace detail @@ -188,8 +191,8 @@ struct bpe_merge_pairs::bpe_merge_pairs_impl { std::unique_ptr&& mp_table_map); auto const get_merge_pairs() const { return *d_merge_pairs; } - auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::experimental::op::find); } - auto get_mp_table_ref() const { return mp_table_map->ref(cuco::experimental::op::find); } + auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::op::find); } + auto get_mp_table_ref() const { return mp_table_map->ref(cuco::op::find); } }; } // namespace nvtext diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu index c07d929e98a..d7c9253d048 100644 --- a/cpp/src/text/bpe/load_merge_pairs.cu +++ b/cpp/src/text/bpe/load_merge_pairs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,6 +48,8 @@ std::unique_ptr initialize_merge_pairs_map( cuco::empty_value{-1}, bpe_equal{input}, bpe_probe_scheme{bpe_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, hash_table_allocator_type{default_allocator{}, stream}, stream.value()); @@ -70,6 +72,8 @@ std::unique_ptr initialize_mp_table_map( cuco::empty_value{-1}, mp_equal{input}, mp_probe_scheme{mp_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, hash_table_allocator_type{default_allocator{}, stream}, stream.value()); diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index a9e8d4d9a24..3e56eea70f5 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -94,14 +94,16 @@ struct vocab_equal { }; using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; -using probe_scheme = cuco::experimental::linear_probing<1, vocab_hasher>; -using vocabulary_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - vocab_equal, - probe_scheme, - hash_table_allocator_type>; +using probe_scheme = cuco::linear_probing<1, vocab_hasher>; +using cuco_storage = cuco::storage<1>; +using vocabulary_map_type = cuco::static_map, + cuda::thread_scope_device, + vocab_equal, + probe_scheme, + hash_table_allocator_type, + cuco_storage>; } // namespace } // namespace detail @@ -116,7 +118,7 @@ struct tokenize_vocabulary::tokenize_vocabulary_impl { col_device_view const d_vocabulary; std::unique_ptr vocabulary_map; - auto get_map_ref() const { return vocabulary_map->ref(cuco::experimental::op::find); } + auto get_map_ref() const { return vocabulary_map->ref(cuco::op::find); } tokenize_vocabulary_impl(std::unique_ptr&& vocab, col_device_view&& d_vocab, @@ -150,6 +152,8 @@ tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input, cuco::empty_value{-1}, detail::vocab_equal{*d_vocabulary}, detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}}, + cuco::thread_scope_device, + detail::cuco_storage{}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()); From 4e7ace553b5c5f53b392ccec8136d8f69f4173d7 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 29 Jan 2024 13:50:42 -0800 Subject: [PATCH 3/5] Update comments for better code formatting --- cpp/src/io/json/json_tree.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index f200a54b435..a7a9690d636 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -549,15 +549,15 @@ rmm::device_uvector hash_node_type_with_field_name(device_span{d_hasher}, - {}, - {}, - hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; + auto key_set = + cuco::static_set{cuco::extent{compute_hash_table_size(num_fields, 40)}, // 40% occupancy + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hasher}, + {}, + {}, + hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; key_set.insert_if_async(iter, iter + num_nodes, thrust::counting_iterator(0), // stencil From 11bf897a524247947403bd9a212ea71f12fe47f8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 6 Feb 2024 09:51:49 -0800 Subject: [PATCH 4/5] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index a5c55130951..c3a0d94fbf1 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -19,7 +19,7 @@ function(find_and_configure_cucollections) GLOBAL_TARGETS cuco::cuco CPM_ARGS GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG fabd885327dc1662e1a63d9b8870ebf888893c09 + GIT_TAG 56c53beb6fb0cafd265b7fcc3df78ae487811b22 OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) endfunction() From c7775c1574446cf7247d96baf11989e39a3a8841 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 15 Feb 2024 08:33:02 -0800 Subject: [PATCH 5/5] Revert custom CMake changes --- cpp/cmake/thirdparty/get_cucollections.cmake | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index c3a0d94fbf1..9758958b44f 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,14 +14,12 @@ # This function finds cuCollections and performs any additional configuration. function(find_and_configure_cucollections) - rapids_cpm_find( - cuco 0.0.1 - GLOBAL_TARGETS cuco::cuco - CPM_ARGS - GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 56c53beb6fb0cafd265b7fcc3df78ae487811b22 - OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" - ) + include(${rapids-cmake-dir}/cpm/cuco.cmake) + if(BUILD_SHARED_LIBS) + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) + else() + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + endif() endfunction() find_and_configure_cucollections()