rapidsai · rapids-bot · Jan 30, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 26, 2024
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
+
+namespace cudf::detail {
+
+/**
+ * @brief Stream-ordered allocator adaptor used for cuco data structures
+ *
+ * The stream-ordered `rmm::mr::polymorphic_allocator` cannot be used in `cuco` directly since the
+ * later expects a standard C++ `Allocator` interface. This allocator helper provides a simple way
+ * to handle cuco memory allocation/deallocation with the given `stream` and the rmm default memory
+ * resource.
+ */
+class cuco_allocator
+  : public rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>> {
+  /// Default stream-ordered allocator type
+  using default_allocator = rmm::mr::polymorphic_allocator<char>;
+  /// The base allocator adaptor type
+  using base_type = rmm::mr::stream_allocator_adaptor<default_allocator>;
+
+ public:
+  /**
+   * @brief Constructs the allocator adaptor with the given `stream`
+   */
+  cuco_allocator(rmm::cuda_stream_view stream) : base_type{default_allocator{}, stream} {}
+};
+
+}  // namespace cudf::detail
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,14 @@
  * limitations under the License.
  */
 
-#include <cudf/hashing/detail/hash_allocator.cuh>
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
-#include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -32,9 +31,8 @@
 
 namespace cudf::detail {
 
-using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
 using hash_map_type =
-  cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
+  cuco::static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;
 
 /**
  * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <cuco/static_multimap.cuh>
 
@@ -33,16 +32,16 @@
 #include <optional>
 
 // Forward declaration
-template <typename T>
-class default_allocator;
-
 namespace cudf::experimental::row::equality {
 class preprocessed_table;
 }
 
 namespace cudf {
 namespace detail {
 
+// Forward declaration
+class cuco_allocator;
+
 constexpr int DEFAULT_JOIN_CG_SIZE = 2;
 
 enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
@@ -62,7 +61,7 @@ struct hash_join {
     cuco::static_multimap<hash_value_type,
                           cudf::size_type,
                           cuda::thread_scope_device,
-                          rmm::mr::stream_allocator_adaptor<default_allocator<char>>,
+                          cudf::detail::cuco_allocator,
                           cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;
 
   hash_join()                            = delete;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2024, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,12 +20,12 @@
 
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/hash_allocator.cuh>
 #include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <thrust/pair.h>
 
@@ -117,7 +117,7 @@ template <typename Key,
           typename Element,
           typename Hasher    = cudf::hashing::detail::default_hash<Key>,
           typename Equality  = equal_to<Key>,
-          typename Allocator = default_allocator<thrust::pair<Key, Element>>>
+          typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
  public:
   using size_type      = size_t;

@@ -17,12 +17,12 @@
 #include "nested_json.hpp"
 #include <io/utilities/hostdevice_vector.hpp>
 
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/scatter.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/hash_allocator.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/utilities/error.hpp>
@@ -31,7 +31,6 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
-#include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <cub/device/device_radix_sort.cuh>
 
@@ -511,7 +510,6 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
                                                               rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
-  using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
 
   auto const num_nodes  = d_tree.node_categories.size();
   auto const num_fields = thrust::count(rmm::exec_policy(stream),
@@ -555,7 +553,7 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
                                    cuco::empty_key{empty_node_index_sentinel},
                                    d_equal,
                                    cuco::experimental::linear_probing<1, hasher_type>{d_hasher},
-                                   hash_table_allocator_type{default_allocator<char>{}, stream},
+                                   cudf::detail::cuco_allocator{stream},
                                    stream.value()};
   key_set.insert_if_async(iter,
                           iter + num_nodes,
@@ -735,15 +733,14 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
   };
 
   constexpr size_type empty_node_index_sentinel = -1;
-  using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
-  using hasher_type               = decltype(d_hashed_cache);
+  using hasher_type                             = decltype(d_hashed_cache);
 
   auto key_set = cuco::experimental::static_set{
     cuco::experimental::extent{compute_hash_table_size(num_nodes)},
     cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
     d_equal,
     cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache},
-    hash_table_allocator_type{default_allocator<char>{}, stream},
+    cudf::detail::cuco_allocator{stream},
     stream.value()};
 
   // insert and convert node ids to unique set ids

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/join.hpp>
 #include <cudf/detail/structs/utilities.hpp>
+#include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/join.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -368,7 +369,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
                 cuco::empty_value{cudf::detail::JoinNoneValue},
                 stream.value(),
-                detail::hash_table_allocator_type{default_allocator<char>{}, stream}},
+                cudf::detail::cuco_allocator{stream}},
     _build{build},
     _preprocessed_build{
       cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,16 +15,13 @@
  */
 #pragma once
 
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/join.hpp>
 #include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/hash_allocator.cuh>
-#include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/join.hpp>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
-#include <rmm/mr/device/polymorphic_allocator.hpp>
-
 #include <cuco/static_map.cuh>
 #include <cuco/static_multimap.cuh>
 
@@ -43,8 +40,6 @@ using pair_type = cuco::pair<hash_value_type, size_type>;
 
 using hash_type = cuco::murmurhash3_32<hash_value_type>;
 
-using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
-
 using multimap_type = cudf::hash_join::impl_type::map_type;
 
 // Multimap type used for mixed joins. TODO: This is a temporary alias used
@@ -53,11 +48,11 @@ using multimap_type = cudf::hash_join::impl_type::map_type;
 using mixed_multimap_type = cuco::static_multimap<hash_value_type,
                                                   size_type,
                                                   cuda::thread_scope_device,
-                                                  hash_table_allocator_type,
+                                                  cudf::detail::cuco_allocator,
                                                   cuco::double_hashing<1, hash_type, hash_type>>;
 
 using semi_map_type = cuco::
-  static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
+  static_map<hash_value_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator>;
 
 using row_hash_legacy =
   cudf::row_hasher<cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/join.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
@@ -127,12 +128,11 @@ mixed_join(
   auto build_view = table_device_view::create(build, stream);
 
   // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{
-    compute_hash_table_size(build.num_rows()),
-    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-    cuco::empty_value{cudf::detail::JoinNoneValue},
-    stream.value(),
-    detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
+  mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
+                                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+                                 cuco::empty_value{cudf::detail::JoinNoneValue},
+                                 stream.value(),
+                                 cudf::detail::cuco_allocator{stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -393,12 +393,11 @@ compute_mixed_join_output_size(table_view const& left_equality,
   auto build_view = table_device_view::create(build, stream);
 
   // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{
-    compute_hash_table_size(build.num_rows()),
-    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-    cuco::empty_value{cudf::detail::JoinNoneValue},
-    stream.value(),
-    detail::hash_table_allocator_type{default_allocator<char>{}, stream}};
+  mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()),
+                                 cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+                                 cuco::empty_value{cudf::detail::JoinNoneValue},
+                                 stream.value(),
+                                 cudf::detail::cuco_allocator{stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we