Revert "Remove template from hashing class, template on operator() an…

…d compute instead." This reverts commit 466243b.
rapidsai · Mar 7, 2022 · c972ba4 · c972ba4
1 parent 0d9df03
commit c972ba4
Show file tree

Hide file tree

Showing 10 changed files with 100 additions and 80 deletions.
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -83,6 +83,7 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
 // algorithms are optimized for their respective platforms. You can still
 // compile and run any of them on any platform, but your performance with the
 // non-native version will be less than optimal.
+template <typename Key>
 struct MurmurHash3_32 {
   using result_type = hash_value_type;
 
@@ -114,8 +115,7 @@ struct MurmurHash3_32 {
   }
 
   // TODO Do we need this operator() and/or compute? Probably not both.
-  template <typename T>
-  [[nodiscard]] result_type __device__ inline operator()(T const& key) const
+  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
     return compute(key);
   }
@@ -187,72 +187,82 @@ struct MurmurHash3_32 {
 };
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const
+hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
 {
   return this->compute(static_cast<uint8_t>(key));
 }
 
+/**
+ * @brief Specialization of MurmurHash3_32 operator for strings.
+ */
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const
+hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
+  cudf::string_view const& key) const
 {
-  return this->compute_floating_point(key);
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return this->compute_bytes(data, len);
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const
+hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
 {
   return this->compute_floating_point(key);
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::string_view const& key) const
+hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
 {
-  auto const data = reinterpret_cast<std::byte const*>(key.data());
-  auto const len  = key.size_bytes();
-  return this->compute_bytes(data, len);
+  return this->compute_floating_point(key);
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal32 const& key) const
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
+  numeric::decimal32 const& key) const
 {
   return this->compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal64 const& key) const
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
+  numeric::decimal64 const& key) const
 {
   return this->compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal128 const& key) const
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
+  numeric::decimal128 const& key) const
 {
   return this->compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::list_view const&) const
+hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
+  cudf::list_view const& key) const
 {
   cudf_assert(false && "List column hashing is not supported");
   return 0;
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::struct_view const&) const
+hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
+  cudf::struct_view const& key) const
 {
   cudf_assert(false && "Direct hashing of struct_view is not supported");
   return 0;
 }
 
+template <typename Key>
 struct SparkMurmurHash3_32 {
   using result_type = hash_value_type;
 
   SparkMurmurHash3_32() = default;
   constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
 
-  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
+  __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
   {
-    return __funnelshift_l(x, x, r);  // Equivalent to (x << r) | (x >> (32 - r))
+    return (x << r) | (x >> (32 - r));
   }
 
   __device__ inline uint32_t fmix32(uint32_t h) const
@@ -265,11 +275,7 @@ struct SparkMurmurHash3_32 {
     return h;
   }
 
-  template <typename T>
-  result_type __device__ inline operator()(T const& key) const
-  {
-    return compute(key);
-  }
+  result_type __device__ inline operator()(Key const& key) const { return compute(key); }
 
   // compute wrapper for floating point types
   template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
@@ -346,72 +352,52 @@ struct SparkMurmurHash3_32 {
 };
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const
+hash_value_type __device__ inline SparkMurmurHash3_32<bool>::operator()(bool const& key) const
 {
   return this->compute<uint32_t>(key);
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const
+hash_value_type __device__ inline SparkMurmurHash3_32<int8_t>::operator()(int8_t const& key) const
 {
   return this->compute<uint32_t>(key);
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const
+hash_value_type __device__ inline SparkMurmurHash3_32<uint8_t>::operator()(uint8_t const& key) const
 {
   return this->compute<uint32_t>(key);
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const
+hash_value_type __device__ inline SparkMurmurHash3_32<int16_t>::operator()(int16_t const& key) const
 {
   return this->compute<uint32_t>(key);
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint16_t const& key) const
+hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
+  uint16_t const& key) const
 {
   return this->compute<uint32_t>(key);
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const
-{
-  return this->compute_floating_point(key);
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const
-{
-  return this->compute_floating_point(key);
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
-  cudf::string_view const& key) const
-{
-  auto const data = reinterpret_cast<std::byte const*>(key.data());
-  auto const len  = key.size_bytes();
-  return this->compute_bytes(data, len);
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
+hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
   return this->compute<uint64_t>(key.value());
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
+hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
   return this->compute<uint64_t>(key.value());
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
+hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
   // Generates the Spark MurmurHash3 hash value, mimicking the conversion:
@@ -453,37 +439,66 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::list_view const&) const
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::list_view>::operator()(
+  cudf::list_view const& key) const
 {
   cudf_assert(false && "List column hashing is not supported");
   return 0;
 }
 
 template <>
-hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::struct_view const&) const
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operator()(
+  cudf::struct_view const& key) const
 {
   cudf_assert(false && "Direct hashing of struct_view is not supported");
   return 0;
 }
 
+/**
+ * @brief Specialization of MurmurHash3_32 operator for strings.
+ */
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
+  cudf::string_view const& key) const
+{
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return this->compute_bytes(data, len);
+}
+
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
+{
+  return this->compute_floating_point(key);
+}
+
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
+{
+  return this->compute_floating_point(key);
+}
+
 /**
  * @brief  This hash function simply returns the value that is asked to be hash
  * reinterpreted as the result_type of the functor.
  */
+template <typename Key>
 struct IdentityHash {
   using result_type = hash_value_type;
   IdentityHash()    = default;
   constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
 
-  template <typename T, std::enable_if_t<!std::is_arithmetic_v<T>>* = nullptr>
-  constexpr result_type operator()(T const& key) const
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
   {
     cudf_assert(false && "IdentityHash does not support this data type");
     return 0;
   }
 
-  template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
-  constexpr result_type operator()(T const& key) const
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
   {
     return static_cast<result_type>(key);
   }
@@ -492,4 +507,5 @@ struct IdentityHash {
   uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
 };
 
-using default_hash = MurmurHash3_32;
+template <typename Key>
+using default_hash = MurmurHash3_32<Key>;
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
@@ -430,14 +430,14 @@ class row_lexicographic_comparator {
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
-template <typename hash_function, typename Nullate>
+template <template <typename> class hash_function, typename Nullate>
 class element_hasher {
  public:
   template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
   __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
   {
     if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
-    return hash_function{}(col.element<T>(row_index));
+    return hash_function<T>{}(col.element<T>(row_index));
   }
 
   template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
@@ -450,7 +450,7 @@ class element_hasher {
   Nullate has_nulls;
 };
 
-template <typename hash_function, typename Nullate>
+template <template <typename> class hash_function, typename Nullate>
 class element_hasher_with_seed {
  public:
   __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
@@ -467,7 +467,7 @@ class element_hasher_with_seed {
   __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
   {
     if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
-    return hash_function{_seed}(col.element<T>(row_index));
+    return hash_function<T>{_seed}(col.element<T>(row_index));
   }
 
   template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
@@ -489,7 +489,7 @@ class element_hasher_with_seed {
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
-template <typename hash_function, typename Nullate>
+template <template <typename> class hash_function, typename Nullate>
 class row_hasher {
  public:
   row_hasher() = delete;

diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -113,7 +113,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
  */
 template <typename Key,
           typename Element,
-          typename Hasher    = default_hash,
+          typename Hasher    = default_hash<Key>,
           typename Equality  = equal_to<Key>,
           typename Allocator = default_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {

diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ template <typename Key,
           typename size_type,
           Key unused_key,
           Element unused_element,
-          typename Hasher       = default_hash,
+          typename Hasher       = default_hash<Key>,
           typename Equality     = equal_to<Key>,
           typename Allocator    = managed_allocator<thrust::pair<Key, Element>>,
           bool count_collisions = false>

diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ std::vector<column_view> to_leaf_columns(IterType iter_begin, IterType iter_end)
 
 }  // namespace
 
-template <typename hash_function>
+template <template <typename> class hash_function>
 std::unique_ptr<column> serial_murmur_hash3_32(table_view const& input,
                                                uint32_t seed,
                                                rmm::cuda_stream_view stream,