From 63f187acc9804dede36ddc64f52f4a1e980e9551 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 12:48:36 -0800
Subject: [PATCH 01/15] Refactor float normalization.

---
 .../cudf/detail/utilities/hash_functions.cuh  | 52 ++++++-------------
 1 file changed, 17 insertions(+), 35 deletions(-)
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 7eefdc90f4b..36fe7c635a0 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -32,6 +32,18 @@ using hash_value_type = uint32_t;
 namespace cudf {
 namespace detail {
 
+/**
+ * Normalization of floating point NaNs, passthrough for all other values.
+ */
+template <typename T>
+T __device__ inline normalize_nans(T const& key)
+{
+  if constexpr (cudf::is_floating_point<T>()) {
+    if (std::isnan(key)) { return std::numeric_limits<T>::quiet_NaN(); }
+  }
+  return key;
+}
+
 /**
  * Normalization of floating point NaNs and zeros, passthrough for all other values.
  */
@@ -39,13 +51,9 @@ template <typename T>
 T __device__ inline normalize_nans_and_zeros(T const& key)
 {
   if constexpr (cudf::is_floating_point<T>()) {
-    if (std::isnan(key)) {
-      return std::numeric_limits<T>::quiet_NaN();
-    } else if (key == T{0.0}) {
-      return T{0.0};
-    }
+    if (key == T{0.0}) { return T{0.0}; }
   }
-  return key;
+  return normalize_nans(key);
 }
 
 /**
@@ -117,21 +125,7 @@ struct MurmurHash3_32 {
   // TODO Do we need this operator() and/or compute? Probably not both.
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(key);
-  }
-
-  // compute wrapper for floating point types
-  template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
-  hash_value_type __device__ inline compute_floating_point(T const& key) const
-  {
-    if (key == T{0.0}) {
-      return compute(T{0.0});
-    } else if (std::isnan(key)) {
-      T nan = std::numeric_limits<T>::quiet_NaN();
-      return compute(nan);
-    } else {
-      return compute(key);
-    }
+    return compute(detail::normalize_nans_and_zeros(key));
   }
 
   template <typename T>
@@ -274,18 +268,6 @@ struct SparkMurmurHash3_32 {
 
   result_type __device__ inline operator()(Key const& key) const { return compute(key); }
 
-  // compute wrapper for floating point types
-  template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
-  hash_value_type __device__ inline compute_floating_point(T const& key) const
-  {
-    if (std::isnan(key)) {
-      T nan = std::numeric_limits<T>::quiet_NaN();
-      return compute(nan);
-    } else {
-      return compute(key);
-    }
-  }
-
   template <typename T>
   result_type __device__ inline compute(T const& key) const
   {
@@ -382,13 +364,13 @@ hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
 {
-  return this->compute_floating_point(key);
+  return compute<float>(detail::normalize_nans(key));
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
 {
-  return this->compute_floating_point(key);
+  return compute<double>(detail::normalize_nans(key));
 }
 
 template <>

From 84de276b1ac9807733ed5ca84efb99e53b6bfac0 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 12:52:01 -0800
Subject: [PATCH 02/15] Refactor namespaces.

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 3 ++-
 cpp/src/hash/concurrent_unordered_map.cuh            | 2 +-
 cpp/src/hash/concurrent_unordered_multimap.cuh       | 4 ++--
 cpp/tests/hash_map/multimap_test.cu                  | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 36fe7c635a0..82718a31ac5 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -80,7 +80,6 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
 }
 
 }  // namespace detail
-}  // namespace cudf
 
 // MurmurHash3_32 implementation from
 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
@@ -485,3 +484,5 @@ struct IdentityHash {
 
 template <typename Key>
 using default_hash = MurmurHash3_32<Key>;
+
+}  // namespace cudf
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index 0ae0baa9908..8cf3cd2fc72 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -113,7 +113,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
  */
 template <typename Key,
           typename Element,
-          typename Hasher    = default_hash<Key>,
+          typename Hasher    = cudf::default_hash<Key>,
           typename Equality  = equal_to<Key>,
           typename Allocator = default_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
index cdf5b6a8649..802156adf35 100644
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ template <typename Key,
           typename size_type,
           Key unused_key,
           Element unused_element,
-          typename Hasher       = default_hash<Key>,
+          typename Hasher       = cudf::default_hash<Key>,
           typename Equality     = equal_to<Key>,
           typename Allocator    = managed_allocator<thrust::pair<Key, Element>>,
           bool count_collisions = false>
diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu
index 456ba951a45..8ba69b96cb8 100644
--- a/cpp/tests/hash_map/multimap_test.cu
+++ b/cpp/tests/hash_map/multimap_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ class MultimapTest : public cudf::test::BaseFixture {
                                   size_type,
                                   std::numeric_limits<key_type>::max(),
                                   std::numeric_limits<value_type>::max(),
-                                  default_hash<key_type>,
+                                  cudf::default_hash<key_type>,
                                   equal_to<key_type>,
                                   default_allocator<thrust::pair<key_type, value_type>>>;
 

From bda59104171a80204f9076f13e73b60dd225ab54 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 12:52:14 -0800
Subject: [PATCH 03/15] Remove this-> for consistency.

---
 .../cudf/detail/utilities/hash_functions.cuh  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 82718a31ac5..3be8f8e5d71 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -182,19 +182,19 @@ struct MurmurHash3_32 {
 template <>
 hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
 {
-  return this->compute(static_cast<uint8_t>(key));
+  return compute(static_cast<uint8_t>(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
 {
-  return this->compute_floating_point(key);
+  return compute(detail::normalize_nans_and_zeros(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
 {
-  return this->compute_floating_point(key);
+  return compute(detail::normalize_nans_and_zeros(key));
 }
 
 template <>
@@ -203,28 +203,28 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
 {
   auto const data = reinterpret_cast<std::byte const*>(key.data());
   auto const len  = key.size_bytes();
-  return this->compute_bytes(data, len);
+  return compute_bytes(data, len);
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
-  return this->compute(key.value());
+  return compute(key.value());
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
-  return this->compute(key.value());
+  return compute(key.value());
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
-  return this->compute(key.value());
+  return compute(key.value());
 }
 
 template <>
@@ -332,32 +332,32 @@ struct SparkMurmurHash3_32 {
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<bool>::operator()(bool const& key) const
 {
-  return this->compute<uint32_t>(key);
+  return compute<uint32_t>(key);
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<int8_t>::operator()(int8_t const& key) const
 {
-  return this->compute<uint32_t>(key);
+  return compute<uint32_t>(key);
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<uint8_t>::operator()(uint8_t const& key) const
 {
-  return this->compute<uint32_t>(key);
+  return compute<uint32_t>(key);
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<int16_t>::operator()(int16_t const& key) const
 {
-  return this->compute<uint32_t>(key);
+  return compute<uint32_t>(key);
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
   uint16_t const& key) const
 {
-  return this->compute<uint32_t>(key);
+  return compute<uint32_t>(key);
 }
 
 template <>
@@ -378,21 +378,21 @@ hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operat
 {
   auto const data = reinterpret_cast<std::byte const*>(key.data());
   auto const len  = key.size_bytes();
-  return this->compute_bytes(data, len);
+  return compute_bytes(data, len);
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
-  return this->compute<uint64_t>(key.value());
+  return compute<uint64_t>(key.value());
 }
 
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
-  return this->compute<uint64_t>(key.value());
+  return compute<uint64_t>(key.value());
 }
 
 template <>
@@ -434,7 +434,7 @@ hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::oper
   __int128_t big_endian_value = 0;
   auto big_endian_data        = reinterpret_cast<std::byte*>(&big_endian_value);
   thrust::reverse_copy(thrust::seq, data, data + length, big_endian_data);
-  return this->compute_bytes(big_endian_data, length);
+  return compute_bytes(big_endian_data, length);
 }
 
 template <>

From 13b831a92a20edbf06caf348c07e45d4116de23b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 15:22:29 -0800
Subject: [PATCH 04/15] Unify Spark/non-Spark implementations and separate tail
 processing into its own function.

---
 .../cudf/detail/utilities/hash_functions.cuh  | 144 ++++++++++--------
 1 file changed, 83 insertions(+), 61 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 3be8f8e5d71..475db958df4 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -97,9 +97,9 @@ struct MurmurHash3_32 {
   MurmurHash3_32() = default;
   constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
 
-  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
+  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t h, uint32_t r) const
   {
-    return __funnelshift_l(x, x, r);  // Equivalent to (x << r) | (x >> (32 - r))
+    return __funnelshift_l(h, h, r);  // Equivalent to (h << r) | (h >> (32 - r))
   }
 
   [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
@@ -121,7 +121,6 @@ struct MurmurHash3_32 {
     return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
   }
 
-  // TODO Do we need this operator() and/or compute? Probably not both.
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
     return compute(detail::normalize_nans_and_zeros(key));
@@ -133,17 +132,32 @@ struct MurmurHash3_32 {
     return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
   }
 
+  result_type __device__ inline compute_remaining_bytes(std::byte const* data,
+                                                        cudf::size_type len,
+                                                        cudf::size_type tail_offset,
+                                                        result_type h) const
+  {
+    // Process remaining bytes that do not fill a four-byte chunk.
+    uint32_t k1 = 0;
+    switch (len % 4) {
+      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16;
+      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8;
+      case 1:
+        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
+        k1 *= c1;
+        k1 = rotl32(k1, rot_c1);
+        k1 *= c2;
+        h ^= k1;
+    };
+    return h;
+  }
+
   result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
   {
     constexpr cudf::size_type BLOCK_SIZE = 4;
     cudf::size_type const nblocks        = len / BLOCK_SIZE;
     cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
-    result_type h1                       = m_seed;
-    constexpr uint32_t c1                = 0xcc9e2d51;
-    constexpr uint32_t c2                = 0x1b873593;
-    constexpr uint32_t c3                = 0xe6546b64;
-    constexpr uint32_t rot_c1            = 15;
-    constexpr uint32_t rot_c2            = 13;
+    result_type h                        = m_seed;
 
     // Process all four-byte chunks.
     for (cudf::size_type i = 0; i < nblocks; i++) {
@@ -151,32 +165,26 @@ struct MurmurHash3_32 {
       k1 *= c1;
       k1 = rotl32(k1, rot_c1);
       k1 *= c2;
-      h1 ^= k1;
-      h1 = rotl32(h1, rot_c2);
-      h1 = h1 * 5 + c3;
+      h ^= k1;
+      h = rotl32(h, rot_c2);
+      h = h * 5 + c3;
     }
 
-    // Process remaining bytes that do not fill a four-byte chunk.
-    uint32_t k1 = 0;
-    switch (len % 4) {
-      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16;
-      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8;
-      case 1:
-        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
-        k1 *= c1;
-        k1 = rotl32(k1, rot_c1);
-        k1 *= c2;
-        h1 ^= k1;
-    };
+    h = compute_remaining_bytes(data, len, tail_offset, h);
 
     // Finalize hash.
-    h1 ^= len;
-    h1 = fmix32(h1);
-    return h1;
+    h ^= len;
+    h = fmix32(h);
+    return h;
   }
 
  private:
   uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
+  static constexpr uint32_t c1     = 0xcc9e2d51;
+  static constexpr uint32_t c2     = 0x1b873593;
+  static constexpr uint32_t c3     = 0xe6546b64;
+  static constexpr uint32_t rot_c1 = 15;
+  static constexpr uint32_t rot_c2 = 13;
 };
 
 template <>
@@ -255,7 +263,7 @@ struct SparkMurmurHash3_32 {
     return __funnelshift_l(x, x, r);  // Equivalent to (x << r) | (x >> (32 - r))
   }
 
-  __device__ inline uint32_t fmix32(uint32_t h) const
+  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
   {
     h ^= h >> 16;
     h *= 0x85ebca6b;
@@ -265,7 +273,19 @@ struct SparkMurmurHash3_32 {
     return h;
   }
 
-  result_type __device__ inline operator()(Key const& key) const { return compute(key); }
+  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
+                                                      cudf::size_type offset) const
+  {
+    // Read a 4-byte value from the data pointer as individual bytes for safe
+    // unaligned access (very likely for string types).
+    auto block = reinterpret_cast<uint8_t const*>(data + offset);
+    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
+  }
+
+  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
+  {
+    return compute(key);
+  }
 
   template <typename T>
   result_type __device__ inline compute(T const& key) const
@@ -273,24 +293,35 @@ struct SparkMurmurHash3_32 {
     return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
   }
 
-  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
-                                                      cudf::size_type offset) const
+  result_type __device__ inline compute_remaining_bytes(std::byte const* data,
+                                                        cudf::size_type len,
+                                                        cudf::size_type tail_offset,
+                                                        result_type h) const
   {
-    // Individual byte reads for unaligned accesses (very likely for strings)
-    auto block = reinterpret_cast<uint8_t const*>(data + offset);
-    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
+    // Process remaining bytes that do not fill a four-byte chunk using Spark's approach
+    // (does not conform to normal MurmurHash3).
+    for (auto i = tail_offset; i < len; i++) {
+      // We require a two-step cast to get the k1 value from the byte. First,
+      // we must cast to a signed int8_t. Then, the sign bit is preserved when
+      // casting to uint32_t under 2's complement. Java preserves the sign when
+      // casting byte-to-int, but C++ does not.
+      uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
+      k1 *= c1;
+      k1 = rotl32(k1, rot_c1);
+      k1 *= c2;
+      h ^= k1;
+      h = rotl32(h, rot_c2);
+      h = h * 5 + c3;
+    }
+    return h;
   }
 
   result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
   {
     constexpr cudf::size_type BLOCK_SIZE = 4;
     cudf::size_type const nblocks        = len / BLOCK_SIZE;
-    result_type h1                       = m_seed;
-    constexpr uint32_t c1                = 0xcc9e2d51;
-    constexpr uint32_t c2                = 0x1b873593;
-    constexpr uint32_t c3                = 0xe6546b64;
-    constexpr uint32_t rot_c1            = 15;
-    constexpr uint32_t rot_c2            = 13;
+    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
+    result_type h                        = m_seed;
 
     // Process all four-byte chunks.
     for (cudf::size_type i = 0; i < nblocks; i++) {
@@ -298,35 +329,26 @@ struct SparkMurmurHash3_32 {
       k1 *= c1;
       k1 = rotl32(k1, rot_c1);
       k1 *= c2;
-      h1 ^= k1;
-      h1 = rotl32(h1, rot_c2);
-      h1 = h1 * 5 + c3;
+      h ^= k1;
+      h = rotl32(h, rot_c2);
+      h = h * 5 + c3;
     }
 
-    // Process remaining bytes that do not fill a four-byte chunk using Spark's approach
-    // (does not conform to normal MurmurHash3).
-    for (cudf::size_type i = nblocks * 4; i < len; i++) {
-      // We require a two-step cast to get the k1 value from the byte. First,
-      // we must cast to a signed int8_t. Then, the sign bit is preserved when
-      // casting to uint32_t under 2's complement. Java preserves the
-      // signedness when casting byte-to-int, but C++ does not.
-      uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
-      k1 *= c1;
-      k1 = rotl32(k1, rot_c1);
-      k1 *= c2;
-      h1 ^= k1;
-      h1 = rotl32(h1, rot_c2);
-      h1 = h1 * 5 + c3;
-    }
+    h = compute_remaining_bytes(data, len, tail_offset, h);
 
     // Finalize hash.
-    h1 ^= len;
-    h1 = fmix32(h1);
-    return h1;
+    h ^= len;
+    h = fmix32(h);
+    return h;
   }
 
  private:
   uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
+  static constexpr uint32_t c1     = 0xcc9e2d51;
+  static constexpr uint32_t c2     = 0x1b873593;
+  static constexpr uint32_t c3     = 0xe6546b64;
+  static constexpr uint32_t rot_c1 = 15;
+  static constexpr uint32_t rot_c2 = 13;
 };
 
 template <>

From 6929e63a0374d1b43c08a39508b69847b5dfd454 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 15:48:14 -0800
Subject: [PATCH 05/15] Move MurmurHash3_32 and default_hash into cudf::detail.

---
 .../cudf/detail/utilities/hash_functions.cuh       |  3 +--
 cpp/src/groupby/hash/groupby.cu                    | 14 ++++++++------
 cpp/src/hash/concurrent_unordered_map.cuh          |  2 +-
 cpp/src/hash/concurrent_unordered_multimap.cuh     |  2 +-
 cpp/src/io/json/json_gpu.cu                        |  5 +++--
 cpp/src/io/parquet/chunk_dict.cu                   |  7 +++++--
 cpp/src/partitioning/partitioning.cu               |  6 +++---
 cpp/tests/hash_map/multimap_test.cu                |  2 +-
 8 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 475db958df4..13ca74026c0 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -79,8 +79,6 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
   std::memcpy(destination, reinterpret_cast<uint8_t*>(&x), 8);
 }
 
-}  // namespace detail
-
 // MurmurHash3_32 implementation from
 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
 //-----------------------------------------------------------------------------
@@ -507,4 +505,5 @@ struct IdentityHash {
 template <typename Key>
 using default_hash = MurmurHash3_32<Key>;
 
+}  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 4f2cb4de14b..171a2726aa4 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -424,17 +424,19 @@ auto create_hash_map(table_device_view const& d_keys,
   size_type constexpr unused_key{std::numeric_limits<size_type>::max()};
   size_type constexpr unused_value{std::numeric_limits<size_type>::max()};
 
-  using map_type = concurrent_unordered_map<size_type,
-                                            size_type,
-                                            row_hasher<default_hash, nullate::DYNAMIC>,
-                                            row_equality_comparator<nullate::DYNAMIC>>;
+  using map_type =
+    concurrent_unordered_map<size_type,
+                             size_type,
+                             row_hasher<cudf::detail::default_hash, nullate::DYNAMIC>,
+                             row_equality_comparator<nullate::DYNAMIC>>;
 
   using allocator_type = typename map_type::allocator_type;
 
   auto const null_keys_are_equal =
     include_null_keys == null_policy::INCLUDE ? null_equality::EQUAL : null_equality::UNEQUAL;
 
-  row_hasher<default_hash, nullate::DYNAMIC> hasher{nullate::DYNAMIC{keys_have_nulls}, d_keys};
+  row_hasher<cudf::detail::default_hash, nullate::DYNAMIC> hasher{nullate::DYNAMIC{keys_have_nulls},
+                                                                  d_keys};
   row_equality_comparator rows_equal{
     nullate::DYNAMIC{keys_have_nulls}, d_keys, d_keys, null_keys_are_equal};
 
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index 8cf3cd2fc72..0ccdf9ecc31 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -113,7 +113,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
  */
 template <typename Key,
           typename Element,
-          typename Hasher    = cudf::default_hash<Key>,
+          typename Hasher    = cudf::detail::default_hash<Key>,
           typename Equality  = equal_to<Key>,
           typename Allocator = default_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
index 802156adf35..33a6c1437ef 100644
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -52,7 +52,7 @@ template <typename Key,
           typename size_type,
           Key unused_key,
           Element unused_element,
-          typename Hasher       = cudf::default_hash<Key>,
+          typename Hasher       = cudf::detail::default_hash<Key>,
           typename Equality     = equal_to<Key>,
           typename Allocator    = managed_allocator<thrust::pair<Key, Element>>,
           bool count_collisions = false>
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index 21455e3ab93..2fa9acab789 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -350,7 +350,7 @@ __device__ field_descriptor next_field_descriptor(const char* begin,
       ? field_descriptor{field_idx, begin, cudf::io::gpu::seek_field_end(begin, end, opts, true)}
       : [&]() {
           auto const key_range = get_next_key(begin, end, opts.quotechar);
-          auto const key_hash  = MurmurHash3_32<cudf::string_view>{}(
+          auto const key_hash  = cudf::detail::MurmurHash3_32<cudf::string_view>{}(
             cudf::string_view(key_range.first, key_range.second - key_range.first));
           auto const hash_col = col_map.find(key_hash);
           // Fall back to field index if not found (parsing error)
@@ -661,7 +661,8 @@ __global__ void collect_keys_info_kernel(parse_options_view const options,
       keys_info->column(0).element<uint64_t>(idx) = field_range.key_begin - data.begin();
       keys_info->column(1).element<uint16_t>(idx) = len;
       keys_info->column(2).element<uint32_t>(idx) =
-        MurmurHash3_32<cudf::string_view>{}(cudf::string_view(field_range.key_begin, len));
+        cudf::detail::MurmurHash3_32<cudf::string_view>{}(
+          cudf::string_view(field_range.key_begin, len));
     }
   }
 }
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 5589f87e57e..c4c0ad2005d 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,7 +55,10 @@ struct equality_functor {
 template <typename T>
 struct hash_functor {
   column_device_view const& col;
-  __device__ auto operator()(size_type idx) { return MurmurHash3_32<T>{}(col.element<T>(idx)); }
+  __device__ auto operator()(size_type idx)
+  {
+    return cudf::detail::MurmurHash3_32<T>{}(col.element<T>(idx));
+  }
 };
 
 struct map_insert_fn {
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 66b26148ede..10198b86b4b 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -775,10 +775,10 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
         if (!is_numeric(input.column(column_id).type()))
           CUDF_FAIL("IdentityHash does not support this data type");
       }
-      return detail::local::hash_partition<IdentityHash>(
+      return detail::local::hash_partition<detail::IdentityHash>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     case (hash_id::HASH_MURMUR3):
-      return detail::local::hash_partition<MurmurHash3_32>(
+      return detail::local::hash_partition<detail::MurmurHash3_32>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     default: CUDF_FAIL("Unsupported hash function in hash_partition");
   }
diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu
index 8ba69b96cb8..5407b20830a 100644
--- a/cpp/tests/hash_map/multimap_test.cu
+++ b/cpp/tests/hash_map/multimap_test.cu
@@ -49,7 +49,7 @@ class MultimapTest : public cudf::test::BaseFixture {
                                   size_type,
                                   std::numeric_limits<key_type>::max(),
                                   std::numeric_limits<value_type>::max(),
-                                  cudf::default_hash<key_type>,
+                                  cudf::detail::default_hash<key_type>,
                                   equal_to<key_type>,
                                   default_allocator<thrust::pair<key_type, value_type>>>;
 

From 6c293ed9a96cb54f5e915323bd396e83038cb726 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Mar 2022 17:42:35 -0800
Subject: [PATCH 06/15] Make SparkMurmurHash3_32 inherit from MurmurHash3_32
 (tests currently broken).

---
 .../cudf/detail/utilities/hash_functions.cuh  | 131 ++----------------
 1 file changed, 13 insertions(+), 118 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 13ca74026c0..c85bc1c48d5 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -121,7 +121,7 @@ struct MurmurHash3_32 {
 
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(detail::normalize_nans_and_zeros(key));
+    return compute(normalize_nans_and_zeros(key));
   }
 
   template <typename T>
@@ -176,13 +176,14 @@ struct MurmurHash3_32 {
     return h;
   }
 
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
   static constexpr uint32_t c1     = 0xcc9e2d51;
   static constexpr uint32_t c2     = 0x1b873593;
   static constexpr uint32_t c3     = 0xe6546b64;
   static constexpr uint32_t rot_c1 = 15;
   static constexpr uint32_t rot_c2 = 13;
+
+ private:
+  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
 };
 
 template <>
@@ -191,18 +192,6 @@ hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& k
   return compute(static_cast<uint8_t>(key));
 }
 
-template <>
-hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
-{
-  return compute(detail::normalize_nans_and_zeros(key));
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
-{
-  return compute(detail::normalize_nans_and_zeros(key));
-}
-
 template <>
 hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
@@ -250,45 +239,15 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
 }
 
 template <typename Key>
-struct SparkMurmurHash3_32 {
+struct SparkMurmurHash3_32 : public MurmurHash3_32<Key> {
   using result_type = hash_value_type;
 
   SparkMurmurHash3_32() = default;
-  constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
-
-  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
-  {
-    return __funnelshift_l(x, x, r);  // Equivalent to (x << r) | (x >> (32 - r))
-  }
-
-  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
-  {
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-    return h;
-  }
-
-  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
-                                                      cudf::size_type offset) const
-  {
-    // Read a 4-byte value from the data pointer as individual bytes for safe
-    // unaligned access (very likely for string types).
-    auto block = reinterpret_cast<uint8_t const*>(data + offset);
-    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
-  }
+  constexpr SparkMurmurHash3_32(uint32_t seed) : MurmurHash3_32<Key>(seed) {}
 
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(key);
-  }
-
-  template <typename T>
-  result_type __device__ inline compute(T const& key) const
-  {
-    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
+    return compute(normalize_nans(key));
   }
 
   result_type __device__ inline compute_remaining_bytes(std::byte const* data,
@@ -314,39 +273,12 @@ struct SparkMurmurHash3_32 {
     return h;
   }
 
-  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
-  {
-    constexpr cudf::size_type BLOCK_SIZE = 4;
-    cudf::size_type const nblocks        = len / BLOCK_SIZE;
-    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
-    result_type h                        = m_seed;
-
-    // Process all four-byte chunks.
-    for (cudf::size_type i = 0; i < nblocks; i++) {
-      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
-      k1 *= c1;
-      k1 = rotl32(k1, rot_c1);
-      k1 *= c2;
-      h ^= k1;
-      h = rotl32(h, rot_c2);
-      h = h * 5 + c3;
-    }
-
-    h = compute_remaining_bytes(data, len, tail_offset, h);
-
-    // Finalize hash.
-    h ^= len;
-    h = fmix32(h);
-    return h;
-  }
-
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
-  static constexpr uint32_t c1     = 0xcc9e2d51;
-  static constexpr uint32_t c2     = 0x1b873593;
-  static constexpr uint32_t c3     = 0xe6546b64;
-  static constexpr uint32_t rot_c1 = 15;
-  static constexpr uint32_t rot_c2 = 13;
+  using MurmurHash3_32<Key>::rotl32;
+  using MurmurHash3_32<Key>::c1;
+  using MurmurHash3_32<Key>::c2;
+  using MurmurHash3_32<Key>::c3;
+  using MurmurHash3_32<Key>::rot_c1;
+  using MurmurHash3_32<Key>::rot_c2;
 };
 
 template <>
@@ -380,27 +312,6 @@ hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
   return compute<uint32_t>(key);
 }
 
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
-{
-  return compute<float>(detail::normalize_nans(key));
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
-{
-  return compute<double>(detail::normalize_nans(key));
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
-  cudf::string_view const& key) const
-{
-  auto const data = reinterpret_cast<std::byte const*>(key.data());
-  auto const len  = key.size_bytes();
-  return compute_bytes(data, len);
-}
-
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
@@ -457,22 +368,6 @@ hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::oper
   return compute_bytes(big_endian_data, length);
 }
 
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32<cudf::list_view>::operator()(
-  cudf::list_view const& key) const
-{
-  cudf_assert(false && "List column hashing is not supported");
-  return 0;
-}
-
-template <>
-hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operator()(
-  cudf::struct_view const& key) const
-{
-  cudf_assert(false && "Direct hashing of struct_view is not supported");
-  return 0;
-}
-
 /**
  * @brief  This hash function simply returns the value that is asked to be hash
  * reinterpreted as the result_type of the functor.

From 7cdec5f6b0bb11a55f2e58ceb5a6dd9453298a37 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 9 Mar 2022 12:26:33 -0800
Subject: [PATCH 07/15] Revert "Make SparkMurmurHash3_32 inherit from
 MurmurHash3_32 (tests currently broken)."

This reverts commit 6c293ed9a96cb54f5e915323bd396e83038cb726.
---
 .../cudf/detail/utilities/hash_functions.cuh  | 131 ++++++++++++++++--
 1 file changed, 118 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index c85bc1c48d5..13ca74026c0 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -121,7 +121,7 @@ struct MurmurHash3_32 {
 
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(normalize_nans_and_zeros(key));
+    return compute(detail::normalize_nans_and_zeros(key));
   }
 
   template <typename T>
@@ -176,14 +176,13 @@ struct MurmurHash3_32 {
     return h;
   }
 
+ private:
+  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
   static constexpr uint32_t c1     = 0xcc9e2d51;
   static constexpr uint32_t c2     = 0x1b873593;
   static constexpr uint32_t c3     = 0xe6546b64;
   static constexpr uint32_t rot_c1 = 15;
   static constexpr uint32_t rot_c2 = 13;
-
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
 };
 
 template <>
@@ -192,6 +191,18 @@ hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& k
   return compute(static_cast<uint8_t>(key));
 }
 
+template <>
+hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
+{
+  return compute(detail::normalize_nans_and_zeros(key));
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
+{
+  return compute(detail::normalize_nans_and_zeros(key));
+}
+
 template <>
 hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
@@ -239,15 +250,45 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
 }
 
 template <typename Key>
-struct SparkMurmurHash3_32 : public MurmurHash3_32<Key> {
+struct SparkMurmurHash3_32 {
   using result_type = hash_value_type;
 
   SparkMurmurHash3_32() = default;
-  constexpr SparkMurmurHash3_32(uint32_t seed) : MurmurHash3_32<Key>(seed) {}
+  constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
+
+  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
+  {
+    return __funnelshift_l(x, x, r);  // Equivalent to (x << r) | (x >> (32 - r))
+  }
+
+  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
+  {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
+  }
+
+  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
+                                                      cudf::size_type offset) const
+  {
+    // Read a 4-byte value from the data pointer as individual bytes for safe
+    // unaligned access (very likely for string types).
+    auto block = reinterpret_cast<uint8_t const*>(data + offset);
+    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
+  }
 
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(normalize_nans(key));
+    return compute(key);
+  }
+
+  template <typename T>
+  result_type __device__ inline compute(T const& key) const
+  {
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
   }
 
   result_type __device__ inline compute_remaining_bytes(std::byte const* data,
@@ -273,12 +314,39 @@ struct SparkMurmurHash3_32 : public MurmurHash3_32<Key> {
     return h;
   }
 
-  using MurmurHash3_32<Key>::rotl32;
-  using MurmurHash3_32<Key>::c1;
-  using MurmurHash3_32<Key>::c2;
-  using MurmurHash3_32<Key>::c3;
-  using MurmurHash3_32<Key>::rot_c1;
-  using MurmurHash3_32<Key>::rot_c2;
+  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
+  {
+    constexpr cudf::size_type BLOCK_SIZE = 4;
+    cudf::size_type const nblocks        = len / BLOCK_SIZE;
+    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
+    result_type h                        = m_seed;
+
+    // Process all four-byte chunks.
+    for (cudf::size_type i = 0; i < nblocks; i++) {
+      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
+      k1 *= c1;
+      k1 = rotl32(k1, rot_c1);
+      k1 *= c2;
+      h ^= k1;
+      h = rotl32(h, rot_c2);
+      h = h * 5 + c3;
+    }
+
+    h = compute_remaining_bytes(data, len, tail_offset, h);
+
+    // Finalize hash.
+    h ^= len;
+    h = fmix32(h);
+    return h;
+  }
+
+ private:
+  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
+  static constexpr uint32_t c1     = 0xcc9e2d51;
+  static constexpr uint32_t c2     = 0x1b873593;
+  static constexpr uint32_t c3     = 0xe6546b64;
+  static constexpr uint32_t rot_c1 = 15;
+  static constexpr uint32_t rot_c2 = 13;
 };
 
 template <>
@@ -312,6 +380,27 @@ hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
   return compute<uint32_t>(key);
 }
 
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
+{
+  return compute<float>(detail::normalize_nans(key));
+}
+
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
+{
+  return compute<double>(detail::normalize_nans(key));
+}
+
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
+  cudf::string_view const& key) const
+{
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return compute_bytes(data, len);
+}
+
 template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
@@ -368,6 +457,22 @@ hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::oper
   return compute_bytes(big_endian_data, length);
 }
 
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::list_view>::operator()(
+  cudf::list_view const& key) const
+{
+  cudf_assert(false && "List column hashing is not supported");
+  return 0;
+}
+
+template <>
+hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operator()(
+  cudf::struct_view const& key) const
+{
+  cudf_assert(false && "Direct hashing of struct_view is not supported");
+  return 0;
+}
+
 /**
  * @brief  This hash function simply returns the value that is asked to be hash
  * reinterpreted as the result_type of the functor.

From cafd0b3473482a5521241747a66ca25a3c1f77d5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 9 Mar 2022 12:27:41 -0800
Subject: [PATCH 08/15] Make default constructor constexpr.

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 13ca74026c0..8fb06cec022 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -92,7 +92,7 @@ template <typename Key>
 struct MurmurHash3_32 {
   using result_type = hash_value_type;
 
-  MurmurHash3_32() = default;
+  constexpr MurmurHash3_32() = default;
   constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
 
   [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t h, uint32_t r) const
@@ -253,7 +253,7 @@ template <typename Key>
 struct SparkMurmurHash3_32 {
   using result_type = hash_value_type;
 
-  SparkMurmurHash3_32() = default;
+  constexpr SparkMurmurHash3_32() = default;
   constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
 
   [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const

From a24f52dd386728e46501b231a1a4127e5007f141 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 9 Mar 2022 12:29:48 -0800
Subject: [PATCH 09/15] Define hash_value_type in cudf namespace.

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 8fb06cec022..a3e6ae7db24 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -27,9 +27,10 @@
 
 #include <thrust/iterator/reverse_iterator.h>
 
+namespace cudf {
+
 using hash_value_type = uint32_t;
 
-namespace cudf {
 namespace detail {
 
 /**

From 79277357b22fdd4248e251a337996b3516639103 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Sun, 17 Apr 2022 13:54:29 -0500
Subject: [PATCH 10/15] Replace rotl32 with rotate_bits_left.

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 8663045c02f..bf1699cab58 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -244,7 +244,7 @@ struct MurmurHash3_32 {
       case 1:
         k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
         k1 *= c1;
-        k1 = rotl32(k1, rot_c1);
+        k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
         k1 *= c2;
         h ^= k1;
     };
@@ -399,10 +399,10 @@ struct SparkMurmurHash3_32 {
       // casting byte-to-int, but C++ does not.
       uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
       k1 *= c1;
-      k1 = rotl32(k1, rot_c1);
+      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
       k1 *= c2;
       h ^= k1;
-      h = rotl32(h, rot_c2);
+      h = cudf::detail::rotate_bits_left(h, rot_c2);
       h = h * 5 + c3;
     }
     return h;

From f02fa68fb2bd9f9423434441a91f14a29fc22ab1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 18 Apr 2022 10:54:24 -0500
Subject: [PATCH 11/15] Update bpe_tokenizer.cuh.

---
 cpp/src/text/subword/bpe_tokenizer.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh
index 31cc29a8d8a..24b10fc4a36 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cuh
+++ b/cpp/src/text/subword/bpe_tokenizer.cuh
@@ -36,12 +36,12 @@ namespace detail {
 
 using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
 
-using merge_pairs_map_type = cuco::static_map<hash_value_type,
+using merge_pairs_map_type = cuco::static_map<cudf::hash_value_type,
                                               cudf::size_type,
                                               cuda::thread_scope_device,
                                               hash_table_allocator_type>;
 
-using string_hasher_type = MurmurHash3_32<cudf::string_view>;
+using string_hasher_type = cudf::detail::MurmurHash3_32<cudf::string_view>;
 
 }  // namespace detail
 

From e4911159e2b2981ae7e013cdbf8266cce4b53dd7 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 18 Apr 2022 13:56:59 -0500
Subject: [PATCH 12/15] Fix subword includes.

---
 cpp/src/text/subword/bpe_tokenizer.cu    |  5 +++--
 cpp/src/text/subword/load_merges_file.cu | 13 +++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu
index fb631b3f31f..404ecf1248c 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cu
+++ b/cpp/src/text/subword/bpe_tokenizer.cu
@@ -22,6 +22,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/strings/detail/combine.hpp>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/detail/utilities.hpp>
@@ -144,8 +145,8 @@ struct byte_pair_encoding_fn {
    * @param rhs Second string.
    * @return The hash value to match with `d_map`.
    */
-  __device__ hash_value_type compute_hash(cudf::string_view const& lhs,
-                                          cudf::string_view const& rhs)
+  __device__ cudf::hash_value_type compute_hash(cudf::string_view const& lhs,
+                                                cudf::string_view const& rhs)
   {
     __shared__ char shmem[48 * 1024];  // max for Pascal
     auto const total_size         = lhs.size_bytes() + rhs.size_bytes() + 1;
diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu
index 31f579dc9d4..1e0c9c81fcd 100644
--- a/cpp/src/text/subword/load_merges_file.cu
+++ b/cpp/src/text/subword/load_merges_file.cu
@@ -21,6 +21,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -42,7 +43,7 @@ struct make_pair_function {
   /**
    * @brief Hash the merge pair entry
    */
-  __device__ cuco::pair_type<hash_value_type, cudf::size_type> operator()(cudf::size_type idx)
+  __device__ cuco::pair_type<cudf::hash_value_type, cudf::size_type> operator()(cudf::size_type idx)
   {
     auto const result = _hasher(d_strings.element<cudf::string_view>(idx));
     return cuco::make_pair(result, idx);
@@ -105,9 +106,9 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
   // Ensure capacity is at least (size/0.7) as documented here:
   // https://github.com/NVIDIA/cuCollections/blob/6ec8b6dcdeceea07ab4456d32461a05c18864411/include/cuco/static_map.cuh#L179-L182
   auto merge_pairs_map = std::make_unique<merge_pairs_map_type>(
-    static_cast<size_t>(input.size() * 2),        // capacity is 2x;
-    std::numeric_limits<hash_value_type>::max(),  // empty key;
-    -1,                                           // empty value is not used
+    static_cast<size_t>(input.size() * 2),              // capacity is 2x;
+    std::numeric_limits<cudf::hash_value_type>::max(),  // empty key;
+    -1,                                                 // empty value is not used
     hash_table_allocator_type{default_allocator<char>{}, stream},
     stream.value());
 
@@ -117,8 +118,8 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
 
   merge_pairs_map->insert(iter,
                           iter + input.size(),
-                          cuco::detail::MurmurHash3_32<hash_value_type>{},
-                          thrust::equal_to<hash_value_type>{},
+                          cuco::detail::MurmurHash3_32<cudf::hash_value_type>{},
+                          thrust::equal_to<cudf::hash_value_type>{},
                           stream.value());
 
   return merge_pairs_map;

From 1e63821dd84aa25940a6912f086b58dfb61f7afe Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 19 Apr 2022 15:01:49 -0500
Subject: [PATCH 13/15] Revert copyright change.

---
 python/cudf/cudf/core/resample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index 57630e7d4a9..2bed71ea751 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022, NVIDIA CORPORATION &
+# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION &
 # AFFILIATES. All rights reserved.  SPDX-License-Identifier:
 # Apache-2.0
 #

From 0a170183e3541840aac458853da0890d9217c530 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 19 Apr 2022 16:42:45 -0500
Subject: [PATCH 14/15] Add [[fallthrough]].

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index bf1699cab58..9c6f3e9cb13 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -239,8 +239,8 @@ struct MurmurHash3_32 {
     // Process remaining bytes that do not fill a four-byte chunk.
     uint32_t k1 = 0;
     switch (len % 4) {
-      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16;
-      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8;
+      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
+      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
       case 1:
         k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
         k1 *= c1;

From 0fcbb23d24f91c875f608c0edfbca76c1d0ee505 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 20 Apr 2022 13:32:51 -0500
Subject: [PATCH 15/15] Make operator() const.

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 cpp/src/io/parquet/chunk_dict.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 20131c8a826..45d0ea40a26 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -55,7 +55,7 @@ struct equality_functor {
 template <typename T>
 struct hash_functor {
   column_device_view const& col;
-  __device__ auto operator()(size_type idx)
+  __device__ auto operator()(size_type idx) const
   {
     return cudf::detail::MurmurHash3_32<T>{}(col.element<T>(idx));
   }