From 405133acb2f579b35d74f5ae8850180c67fad7b1 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 1 May 2023 15:45:14 -0700 Subject: [PATCH 1/9] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 9758958b44f..bf31a379537 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,12 +14,14 @@ # This function finds cuCollections and performs any additional configuration. function(find_and_configure_cucollections) - include(${rapids-cmake-dir}/cpm/cuco.cmake) - if(BUILD_SHARED_LIBS) - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) - else() - rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) - endif() + rapids_cpm_find( + cuco 0.0.1 + GLOBAL_TARGETS cuco::cuco + CPM_ARGS + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG 1a841121d1a32b83c6de0ccccbcdf037790d61ad + OPTIONS "BUILD_TESTS OFF"  "BUILD_BENCHMARKS OFF"  "BUILD_EXAMPLES OFF" + ) endfunction() find_and_configure_cucollections() From 6ee0e4efcde55bea564f80be77e3267a6ece2363 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 May 2023 16:19:15 -0700 Subject: [PATCH 2/9] Improve distinct_count with cuco static_set --- cpp/src/stream_compaction/distinct_count.cu | 36 ++++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 8c50f8d29e8..4a7fb62f299 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -34,6 +34,8 @@ #include #include +#include + #include #include #include @@ -127,27 +129,30 @@ cudf::size_type distinct_count(table_view const& keys, null_equality nulls_equal, rmm::cuda_stream_view stream) { - auto const num_rows = keys.num_rows(); + auto const num_rows = keys.num_rows(); + if (num_rows == 0) { return 0; } // early exit for empty input auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; - hash_map_type key_map{compute_hash_table_size(num_rows), - cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, - cuco::empty_value{COMPACTION_EMPTY_VALUE_SENTINEL}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; - auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto iter = cudf::detail::make_counting_transform_iterator( - 0, [] __device__(size_type i) { return cuco::make_pair(i, i); }); + using extent_type = cuco::experimental::extent; + using probing_type = + cuco::experimental::double_hashing<4, decltype(hash_key), decltype(hash_key)>; + auto const iter = thrust::counting_iterator(0); auto const comparator_helper = [&](auto const row_equal) { + auto key_set = cuco::experimental::static_set{ + cuco::experimental::extent{static_cast(compute_hash_table_size(num_rows))}, + cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, + row_equal, + probing_type{hash_key, hash_key}, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + // when nulls are equal, insert non-null rows only to improve efficiency if (nulls_equal == null_equality::EQUAL and has_nulls) { thrust::counting_iterator stencil(0); @@ -155,12 +160,11 @@ cudf::size_type distinct_count(table_view const& keys, cudf::detail::bitmask_or(keys, stream, rmm::mr::get_current_device_resource()); row_validity pred{static_cast(row_bitmask.data())}; - key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value()); - return key_map.get_size() + static_cast(null_count > 0); + return key_set.insert_if(iter, iter + num_rows, stencil, pred, stream.value()) + + static_cast(null_count > 0); } // otherwise, insert all - key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value()); - return key_map.get_size(); + return key_set.insert(iter, iter + num_rows, stream.value()); }; if (cudf::detail::has_nested_columns(keys)) { From 8f411f9b726e7d9ce4a0df7d851406d8a92f0646 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 May 2023 16:25:01 -0700 Subject: [PATCH 3/9] Minor cleanups --- cpp/src/stream_compaction/distinct_count.cu | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 4a7fb62f299..3ba0b465f5c 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -139,17 +139,14 @@ cudf::size_type distinct_count(table_view const& keys, auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - using extent_type = cuco::experimental::extent; - using probing_type = - cuco::experimental::double_hashing<4, decltype(hash_key), decltype(hash_key)>; - auto const iter = thrust::counting_iterator(0); auto const comparator_helper = [&](auto const row_equal) { - auto key_set = cuco::experimental::static_set{ + using hasher_type = decltype(hash_key); + auto key_set = cuco::experimental::static_set{ cuco::experimental::extent{static_cast(compute_hash_table_size(num_rows))}, cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, row_equal, - probing_type{hash_key, hash_key}, + cuco::experimental::double_hashing<4, hasher_type, hasher_type>{hash_key, hash_key}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; From 27181ac9e1fd16894cd4f16a7634d978150b40a5 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 May 2023 16:28:18 -0700 Subject: [PATCH 4/9] Minor cleanup --- cpp/src/stream_compaction/distinct_count.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 3ba0b465f5c..5b42b45d201 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -139,7 +139,6 @@ cudf::size_type distinct_count(table_view const& keys, auto const hash_key = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - auto const iter = thrust::counting_iterator(0); auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); auto key_set = cuco::experimental::static_set{ @@ -150,6 +149,7 @@ cudf::size_type distinct_count(table_view const& keys, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; + auto const iter = thrust::counting_iterator(0); // when nulls are equal, insert non-null rows only to improve efficiency if (nulls_equal == null_equality::EQUAL and has_nulls) { thrust::counting_iterator stencil(0); From b8f9f8da2365d813ee4d02dc4369f4e8f3fe3b48 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 11 May 2023 17:28:51 -0700 Subject: [PATCH 5/9] Use custom cuco tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index bf31a379537..d4515d589a2 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -18,8 +18,8 @@ function(find_and_configure_cucollections) cuco 0.0.1 GLOBAL_TARGETS cuco::cuco CPM_ARGS - GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 1a841121d1a32b83c6de0ccccbcdf037790d61ad + GIT_REPOSITORY https://github.com/PointKernel/cuCollections.git + GIT_TAG probing-iter-narrow-conversion OPTIONS "BUILD_TESTS OFF"  "BUILD_BENCHMARKS OFF"  "BUILD_EXAMPLES OFF" ) endfunction() From 8fbeb0cd8b35bcc2251e9208060a0cc08d6f8a0d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 12 May 2023 09:23:18 -0700 Subject: [PATCH 6/9] Fetch new tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index d4515d589a2..560eb203f49 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -18,8 +18,8 @@ function(find_and_configure_cucollections) cuco 0.0.1 GLOBAL_TARGETS cuco::cuco CPM_ARGS - GIT_REPOSITORY https://github.com/PointKernel/cuCollections.git - GIT_TAG probing-iter-narrow-conversion + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG 546ca606a17f480fa9d58d1752cce2aad6575bc4 OPTIONS "BUILD_TESTS OFF"  "BUILD_BENCHMARKS OFF"  "BUILD_EXAMPLES OFF" ) endfunction() From f4acb68981e73f775382068beb8d2a19c6a08b25 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 12 May 2023 10:23:55 -0700 Subject: [PATCH 7/9] Performance tuning --- cpp/src/stream_compaction/distinct_count.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 5b42b45d201..d1c138d4ce1 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -145,7 +145,7 @@ cudf::size_type distinct_count(table_view const& keys, cuco::experimental::extent{static_cast(compute_hash_table_size(num_rows))}, cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, row_equal, - cuco::experimental::double_hashing<4, hasher_type, hasher_type>{hash_key, hash_key}, + cuco::experimental::linear_probing<1, hasher_type>{hash_key}, detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; From 947513c85737330df1faf41474d2e50ef322b460 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 12 May 2023 14:17:19 -0700 Subject: [PATCH 8/9] Revert temporary cmake changes --- cpp/cmake/thirdparty/get_cucollections.cmake | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 560eb203f49..9758958b44f 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,14 +14,12 @@ # This function finds cuCollections and performs any additional configuration. function(find_and_configure_cucollections) - rapids_cpm_find( - cuco 0.0.1 - GLOBAL_TARGETS cuco::cuco - CPM_ARGS - GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 546ca606a17f480fa9d58d1752cce2aad6575bc4 - OPTIONS "BUILD_TESTS OFF"  "BUILD_BENCHMARKS OFF"  "BUILD_EXAMPLES OFF" - ) + include(${rapids-cmake-dir}/cpm/cuco.cmake) + if(BUILD_SHARED_LIBS) + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) + else() + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + endif() endfunction() find_and_configure_cucollections() From bd7da7855961235ff3d484c2ea78c2d054a7c7ff Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 12 May 2023 16:55:15 -0700 Subject: [PATCH 9/9] Get rid of unnecessary cast --- cpp/src/stream_compaction/distinct_count.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index d1c138d4ce1..7185dae77b7 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -142,7 +142,7 @@ cudf::size_type distinct_count(table_view const& keys, auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); auto key_set = cuco::experimental::static_set{ - cuco::experimental::extent{static_cast(compute_hash_table_size(num_rows))}, + cuco::experimental::extent{compute_hash_table_size(num_rows)}, cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL}, row_equal, cuco::experimental::linear_probing<1, hasher_type>{hash_key},