From 788bd5483c82219df8b37b16e4f040c8e6359ffe Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 10 Jul 2023 15:39:41 -0400
Subject: [PATCH 1/8] Separate MurmurHash32 from hash_functions.cuh

---
 conda/recipes/libcudf/meta.yaml               |   2 +-
 .../cudf/detail/aggregation/result_cache.hpp  |   2 +-
 cpp/include/cudf/detail/join.hpp              |   3 +-
 .../cudf/hashing/detail/hash_functions.cuh    | 212 ++++++++++++++++++
 .../cudf/{ => hashing}/detail/hashing.hpp     |  11 -
 .../cudf/hashing/detail/murmur_hash32.cuh     | 196 ++++++++++++++++
 .../cudf/table/experimental/row_operators.cuh |   4 +-
 cpp/include/cudf/table/row_operators.cuh      |   4 +-
 cpp/src/column/column_view.cpp                |   2 +-
 cpp/src/groupby/hash/groupby.cu               |   2 +-
 cpp/src/hash/concurrent_unordered_map.cuh     |   2 +-
 cpp/src/hash/hashing.cu                       |  40 +---
 cpp/src/hash/md5_hash.cu                      |  14 +-
 cpp/src/hash/murmur_hash.cu                   |  16 +-
 cpp/src/hash/spark_murmur_hash.cu             |  15 +-
 cpp/src/hash/unordered_multiset.cuh           |   3 +-
 cpp/src/io/json/json_gpu.cu                   |   2 +-
 cpp/src/io/json/json_tree.cu                  |   4 +-
 cpp/src/io/parquet/page_data.cu               |   3 +-
 cpp/src/join/join_common_utils.hpp            |   2 +-
 cpp/src/partitioning/partitioning.cu          |   2 +-
 .../stream_compaction_common.hpp              |   2 +-
 cpp/src/text/minhash.cu                       |   4 +-
 cpp/src/text/subword/bpe_tokenizer.cu         |   2 +-
 cpp/src/text/subword/bpe_tokenizer.cuh        |   4 +-
 cpp/src/text/subword/load_merges_file.cu      |   2 +-
 cpp/tests/io/json_tree.cpp                    |   2 +-
 27 files changed, 476 insertions(+), 81 deletions(-)
 create mode 100644 cpp/include/cudf/hashing/detail/hash_functions.cuh
 rename cpp/include/cudf/{ => hashing}/detail/hashing.hpp (89%)
 create mode 100644 cpp/include/cudf/hashing/detail/murmur_hash32.cuh

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 4e9b5e2fdc1..f4b9945de0f 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -139,7 +139,6 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/groupby.hpp
         - test -f $PREFIX/include/cudf/detail/groupby/group_replace_nulls.hpp
         - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp
-        - test -f $PREFIX/include/cudf/detail/hashing.hpp
         - test -f $PREFIX/include/cudf/detail/interop.hpp
         - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp
         - test -f $PREFIX/include/cudf/detail/join.hpp
@@ -192,6 +191,7 @@ outputs:
         - test -f $PREFIX/include/cudf/fixed_point/temporary.hpp
         - test -f $PREFIX/include/cudf/groupby.hpp
         - test -f $PREFIX/include/cudf/hashing.hpp
+        - test -f $PREFIX/include/cudf/hashing/detail/hashing.hpp
         - test -f $PREFIX/include/cudf/interop.hpp
         - test -f $PREFIX/include/cudf/io/avro.hpp
         - test -f $PREFIX/include/cudf/io/csv.hpp
diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index b1a2a369d22..41eec156c47 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
-#include <cudf/detail/hashing.hpp>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/types.hpp>
 
 #include <unordered_map>
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index 4a34eb6b328..4cbb59c035c 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -16,7 +16,8 @@
 #pragma once
 
 #include <cudf/column/column.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh
new file mode 100644
index 00000000000..96f5b5b067d
--- /dev/null
+++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/strings/string_view.cuh>
+#include <cudf/utilities/traits.hpp>
+
+#include <thrust/pair.h>
+
+#include <cstring>
+
+namespace cudf {
+namespace detail {
+
+template <typename K>
+struct MurmurHash3_32;
+
+template <typename Key>
+using default_hash = MurmurHash3_32<Key>;
+
+/**
+ * Normalization of floating point NaNs, passthrough for all other values.
+ */
+template <typename T>
+T __device__ inline normalize_nans(T const& key)
+{
+  if constexpr (cudf::is_floating_point<T>()) {
+    if (std::isnan(key)) { return std::numeric_limits<T>::quiet_NaN(); }
+  }
+  return key;
+}
+
+/**
+ * Normalization of floating point NaNs and zeros, passthrough for all other values.
+ */
+template <typename T>
+T __device__ inline normalize_nans_and_zeros(T const& key)
+{
+  if constexpr (cudf::is_floating_point<T>()) {
+    if (key == T{0.0}) { return T{0.0}; }
+  }
+  return normalize_nans(key);
+}
+
+__device__ inline uint32_t rotate_bits_left(uint32_t x, uint32_t r)
+{
+  // This function is equivalent to (x << r) | (x >> (32 - r))
+  return __funnelshift_l(x, x, r);
+}
+
+__device__ inline uint32_t rotate_bits_right(uint32_t x, uint32_t r)
+{
+  // This function is equivalent to (x >> r) | (x << (32 - r))
+  return __funnelshift_r(x, x, r);
+}
+
+__device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r)
+{
+  return (x >> r) | (x << (64 - r));
+}
+
+// Swap the endianness of a 32 bit value
+__device__ inline uint32_t swap_endian(uint32_t x)
+{
+  // The selector 0x0123 reverses the byte order
+  return __byte_perm(x, 0, 0x0123);
+}
+
+// Swap the endianness of a 64 bit value
+// There is no CUDA intrinsic for permuting bytes in 64 bit integers
+__device__ inline uint64_t swap_endian(uint64_t x)
+{
+  // Reverse the endianness of each 32 bit section
+  uint32_t low_bits  = swap_endian(static_cast<uint32_t>(x));
+  uint32_t high_bits = swap_endian(static_cast<uint32_t>(x >> 32));
+  // Reassemble a 64 bit result, swapping the low bits and high bits
+  return (static_cast<uint64_t>(low_bits) << 32) | (static_cast<uint64_t>(high_bits));
+};
+
+template <int capacity, typename hash_step_callable>
+struct hash_circular_buffer {
+  uint8_t storage[capacity];
+  uint8_t* cur;
+  int available_space{capacity};
+  hash_step_callable hash_step;
+
+  __device__ inline hash_circular_buffer(hash_step_callable hash_step)
+    : cur{storage}, hash_step{hash_step}
+  {
+  }
+
+  __device__ inline void put(uint8_t const* in, int size)
+  {
+    int copy_start = 0;
+    while (size >= available_space) {
+      // The buffer will be filled by this chunk of data. Copy a chunk of the
+      // data to fill the buffer and trigger a hash step.
+      memcpy(cur, in + copy_start, available_space);
+      hash_step(storage);
+      size -= available_space;
+      copy_start += available_space;
+      cur             = storage;
+      available_space = capacity;
+    }
+    // The buffer will not be filled by the remaining data. That is, `size >= 0
+    // && size < capacity`. We copy the remaining data into the buffer but do
+    // not trigger a hash step.
+    memcpy(cur, in + copy_start, size);
+    cur += size;
+    available_space -= size;
+  }
+
+  __device__ inline void pad(int const space_to_leave)
+  {
+    if (space_to_leave > available_space) {
+      memset(cur, 0x00, available_space);
+      hash_step(storage);
+      cur             = storage;
+      available_space = capacity;
+    }
+    memset(cur, 0x00, available_space - space_to_leave);
+    cur += available_space - space_to_leave;
+    available_space = space_to_leave;
+  }
+
+  __device__ inline uint8_t const& operator[](int idx) const { return storage[idx]; }
+};
+
+// Get a uint8_t pointer to a column element and its size as a pair.
+template <typename Element>
+auto __device__ inline get_element_pointer_and_size(Element const& element)
+{
+  if constexpr (is_fixed_width<Element>() && !is_chrono<Element>()) {
+    return thrust::make_pair(reinterpret_cast<uint8_t const*>(&element), sizeof(Element));
+  } else {
+    CUDF_UNREACHABLE("Unsupported type.");
+  }
+}
+
+template <>
+auto __device__ inline get_element_pointer_and_size(string_view const& element)
+{
+  return thrust::make_pair(reinterpret_cast<uint8_t const*>(element.data()), element.size_bytes());
+}
+
+/**
+ * Modified GPU implementation of
+ * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
+ * Copyright (c) 2015 Barry Clark
+ * Licensed under the MIT license.
+ * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+ */
+void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination)
+{
+  // Transform 0xABCD'1234 => 0x0000'ABCD'0000'1234 => 0x0B0A'0D0C'0201'0403
+  uint64_t x = num;
+  x          = ((x & 0xFFFF'0000u) << 16) | ((x & 0xFFFF));
+  x          = ((x & 0x000F'0000'000Fu) << 8) | ((x & 0x00F0'0000'00F0u) >> 4) |
+      ((x & 0x0F00'0000'0F00u) << 16) | ((x & 0xF000'0000'F000) << 4);
+
+  // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits
+  uint64_t offsets = (((x + 0x0606'0606'0606'0606) >> 4) & 0x0101'0101'0101'0101) * 0x27;
+
+  x |= 0x3030'3030'3030'3030;
+  x += offsets;
+  std::memcpy(destination, reinterpret_cast<uint8_t*>(&x), 8);
+}
+
+/**
+ * @brief  This hash function simply returns the value that is asked to be hash
+ * reinterpreted as the result_type of the functor.
+ */
+template <typename Key>
+struct IdentityHash {
+  using result_type = uint32_t;
+  IdentityHash()    = default;
+  constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
+
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
+  {
+    CUDF_UNREACHABLE("IdentityHash does not support this data type");
+  }
+
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
+  {
+    return static_cast<result_type>(key);
+  }
+
+ private:
+  uint32_t m_seed{0};
+};
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp
similarity index 89%
rename from cpp/include/cudf/detail/hashing.hpp
rename to cpp/include/cudf/hashing/detail/hashing.hpp
index 0447384ffdc..08e1a1d03c0 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/hashing/detail/hashing.hpp
@@ -84,17 +84,6 @@ constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
 }
 
 }  // namespace detail
-
-/**
- * @copydoc cudf::hash
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::unique_ptr<column> hash(table_view const& input,
-                             hash_id hash_function,
-                             uint32_t seed,
-                             rmm::cuda_stream_view stream,
-                             rmm::mr::device_memory_resource* mr);
 }  // namespace hashing
 }  // namespace cudf
 
diff --git a/cpp/include/cudf/hashing/detail/murmur_hash32.cuh b/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
new file mode 100644
index 00000000000..26425875068
--- /dev/null
+++ b/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/hashing.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/lists/list_view.hpp>
+#include <cudf/strings/string_view.cuh>
+#include <cudf/structs/struct_view.hpp>
+#include <cudf/types.hpp>
+
+#include <cstddef>
+
+namespace cudf {
+namespace detail {
+
+// MurmurHash3_32 implementation from
+// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+template <typename Key>
+struct MurmurHash3_32 {
+  using result_type = hash_value_type;
+
+  constexpr MurmurHash3_32() = default;
+  constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
+
+  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
+  {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
+  }
+
+  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
+                                                      cudf::size_type offset) const
+  {
+    // Read a 4-byte value from the data pointer as individual bytes for safe
+    // unaligned access (very likely for string types).
+    auto const block = reinterpret_cast<uint8_t const*>(data + offset);
+    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
+  }
+
+  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
+  {
+    return compute(detail::normalize_nans_and_zeros(key));
+  }
+
+  template <typename T>
+  result_type __device__ inline compute(T const& key) const
+  {
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
+  }
+
+  result_type __device__ inline compute_remaining_bytes(std::byte const* data,
+                                                        cudf::size_type len,
+                                                        cudf::size_type tail_offset,
+                                                        result_type h) const
+  {
+    // Process remaining bytes that do not fill a four-byte chunk.
+    uint32_t k1 = 0;
+    switch (len % 4) {
+      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
+      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
+      case 1:
+        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
+        k1 *= c1;
+        k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+        k1 *= c2;
+        h ^= k1;
+    };
+    return h;
+  }
+
+  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
+  {
+    constexpr cudf::size_type BLOCK_SIZE = 4;
+    cudf::size_type const nblocks        = len / BLOCK_SIZE;
+    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
+    result_type h                        = m_seed;
+
+    // Process all four-byte chunks.
+    for (cudf::size_type i = 0; i < nblocks; i++) {
+      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
+      k1 *= c1;
+      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+      k1 *= c2;
+      h ^= k1;
+      h = cudf::detail::rotate_bits_left(h, rot_c2);
+      h = h * 5 + c3;
+    }
+
+    h = compute_remaining_bytes(data, len, tail_offset, h);
+
+    // Finalize hash.
+    h ^= len;
+    h = fmix32(h);
+    return h;
+  }
+
+ private:
+  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
+  static constexpr uint32_t c1     = 0xcc9e2d51;
+  static constexpr uint32_t c2     = 0x1b873593;
+  static constexpr uint32_t c3     = 0xe6546b64;
+  static constexpr uint32_t rot_c1 = 15;
+  static constexpr uint32_t rot_c2 = 13;
+};
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
+{
+  return compute(static_cast<uint8_t>(key));
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
+{
+  return compute(detail::normalize_nans_and_zeros(key));
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
+{
+  return compute(detail::normalize_nans_and_zeros(key));
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
+  cudf::string_view const& key) const
+{
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return compute_bytes(data, len);
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
+  numeric::decimal32 const& key) const
+{
+  return compute(key.value());
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
+  numeric::decimal64 const& key) const
+{
+  return compute(key.value());
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
+  numeric::decimal128 const& key) const
+{
+  return compute(key.value());
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
+  cudf::list_view const& key) const
+{
+  CUDF_UNREACHABLE("List column hashing is not supported");
+}
+
+template <>
+hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
+  cudf::struct_view const& key) const
+{
+  CUDF_UNREACHABLE("Direct hashing of struct_view is not supported");
+}
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index ce6dd024622..7e876d6cbad 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -17,11 +17,11 @@
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/assert.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/lists/detail/dremel.hpp>
 #include <cudf/lists/list_device_view.cuh>
 #include <cudf/lists/lists_column_device_view.cuh>
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
index f90c78b9ba6..599a85c8a54 100644
--- a/cpp/include/cudf/table/row_operators.cuh
+++ b/cpp/include/cudf/table/row_operators.cuh
@@ -17,9 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/utilities/assert.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/traits.hpp>
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index bf7c84b2735..75722ede9d2 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -15,8 +15,8 @@
  */
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 62d83e8f6ae..f7ecc40c20b 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -33,10 +33,10 @@
 #include <cudf/detail/unary.hpp>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index 5acfba0a8bf..89fa8442f21 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -21,7 +21,7 @@
 #include <hash/managed.cuh>
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
index ca32bd14aff..875d17587c6 100644
--- a/cpp/src/hash/hashing.cu
+++ b/cpp/src/hash/hashing.cu
@@ -13,22 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/algorithm.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/table_device_view.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/execution_policy.h>
-#include <thrust/tabulate.h>
-
-#include <algorithm>
 
 namespace cudf {
 namespace hashing {
@@ -49,33 +38,6 @@ std::unique_ptr<column> hash(table_view const& input,
 }
 
 }  // namespace detail
-
-std::unique_ptr<column> murmur_hash3_32(table_view const& input,
-                                        uint32_t seed,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::murmur_hash3_32(input, seed, stream, mr);
-}
-
-std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
-                                              uint32_t seed,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::spark_murmur_hash3_32(input, seed, stream, mr);
-}
-
-std::unique_ptr<column> md5(table_view const& input,
-                            rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::md5(input, stream, mr);
-}
-
 }  // namespace hashing
 
 std::unique_ptr<column> hash(table_view const& input,
diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu
index 62946902960..7a3c3526e75 100644
--- a/cpp/src/hash/md5_hash.cu
+++ b/cpp/src/hash/md5_hash.cu
@@ -15,10 +15,11 @@
  */
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
@@ -281,5 +282,14 @@ std::unique_ptr<column> md5(table_view const& input,
 }
 
 }  // namespace detail
+
+std::unique_ptr<column> md5(table_view const& input,
+                            rmm::cuda_stream_view stream,
+                            rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::md5(input, stream, mr);
+}
+
 }  // namespace hashing
 }  // namespace cudf
diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmur_hash.cu
index 3683a45246f..83bef54d32f 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmur_hash.cu
@@ -14,9 +14,11 @@
  * limitations under the License.
  */
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/hashing.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
@@ -57,5 +59,15 @@ std::unique_ptr<column> murmur_hash3_32(table_view const& input,
 }
 
 }  // namespace detail
+
+std::unique_ptr<column> murmur_hash3_32(table_view const& input,
+                                        uint32_t seed,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::murmur_hash3_32(input, seed, stream, mr);
+}
+
 }  // namespace hashing
 }  // namespace cudf
diff --git a/cpp/src/hash/spark_murmur_hash.cu b/cpp/src/hash/spark_murmur_hash.cu
index a201f1133ea..239ded002d3 100644
--- a/cpp/src/hash/spark_murmur_hash.cu
+++ b/cpp/src/hash/spark_murmur_hash.cu
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/hashing.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
@@ -425,5 +426,15 @@ std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
 }
 
 }  // namespace detail
+
+std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
+                                              uint32_t seed,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::spark_murmur_hash3_32(input, seed, stream, mr);
+}
+
 }  // namespace hashing
 }  // namespace cudf
diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh
index 96e6728df81..e51e603dfb5 100644
--- a/cpp/src/hash/unordered_multiset.cuh
+++ b/cpp/src/hash/unordered_multiset.cuh
@@ -18,8 +18,9 @@
 
 #include <hash/helper_functions.cuh>
 
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index 16aba0e70dc..0cc3b79a679 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -19,8 +19,8 @@
 #include <io/utilities/column_type_histogram.hpp>
 #include <io/utilities/parsing_utils.cuh>
 
-#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/span.hpp>
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index 1a8ddeefdf5..8f94fbcd9c6 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -19,12 +19,12 @@
 #include <hash/helper_functions.cuh>
 #include <io/utilities/hostdevice_vector.hpp>
 
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/scatter.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index e49378485fc..b54d487d5a3 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -18,7 +18,8 @@
 
 #include <io/utilities/column_buffer.hpp>
 
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 
 namespace cudf {
 namespace io {
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index cbccd78049a..64686c7a763 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cudf/detail/join.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/join.hpp>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 12e1a00b8ba..3e389fcfd42 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -21,8 +21,8 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/scatter.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/partitioning.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp
index eb57a62fd71..4e887b1199b 100644
--- a/cpp/src/stream_compaction/stream_compaction_common.hpp
+++ b/cpp/src/stream_compaction/stream_compaction_common.hpp
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu
index 6658d574dcc..e73654fdf4c 100644
--- a/cpp/src/text/minhash.cu
+++ b/cpp/src/text/minhash.cu
@@ -20,12 +20,12 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy.hpp>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sequence.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu
index 2ce29ec8d5c..1cd83cf4c48 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cu
+++ b/cpp/src/text/subword/bpe_tokenizer.cu
@@ -22,7 +22,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/strings/detail/combine.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh
index 24b10fc4a36..df367f49a18 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cuh
+++ b/cpp/src/text/subword/bpe_tokenizer.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 #include <hash/hash_allocator.cuh>
 
 #include <cudf/column/column.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/murmur_hash32.cuh>
 
 #include <cuco/static_map.cuh>
 
diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu
index 68294ac882b..5d465be9cbe 100644
--- a/cpp/src/text/subword/load_merges_file.cu
+++ b/cpp/src/text/subword/load_merges_file.cu
@@ -21,8 +21,8 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index a81348872cf..2e95fe6cdd9 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -17,8 +17,8 @@
 #include <io/json/nested_json.hpp>
 #include <io/utilities/hostdevice_vector.hpp>
 
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/span.hpp>

From 8c4187b258677f2279c5f3cf2f0079a3317a7934 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 11 Jul 2023 15:23:30 -0400
Subject: [PATCH 2/8] put hash utils in hashing namespace

---
 .../cudf/detail/utilities/hash_functions.cuh  | 381 ------------------
 .../cudf/hashing/detail/hash_functions.cuh    | 113 +-----
 .../cudf/hashing/detail/murmur_hash32.cuh     |  18 +-
 cpp/include/cudf/join.hpp                     |   7 +-
 .../cudf/table/experimental/row_operators.cuh |   2 +-
 cpp/src/groupby/hash/groupby.cu               |  12 +-
 cpp/src/hash/concurrent_unordered_map.cuh     |   2 +-
 cpp/src/hash/md5_hash.cu                      |  99 ++++-
 cpp/src/hash/murmur_hash.cu                   |   2 +-
 cpp/src/hash/spark_murmur_hash.cu             |  12 +-
 cpp/src/hash/unordered_multiset.cuh           |   4 +-
 cpp/src/io/json/json_gpu.cu                   |   4 +-
 cpp/src/io/json/json_tree.cu                  |  10 +-
 cpp/src/io/parquet/chunk_dict.cu              |   2 +-
 cpp/src/io/parquet/page_data.cu               |   3 +-
 cpp/src/join/join_common_utils.hpp            |   3 +-
 cpp/src/join/mixed_join_common_utils.cuh      |   3 +-
 cpp/src/partitioning/partitioning.cu          |   4 +-
 cpp/src/text/minhash.cu                       |   3 +-
 cpp/src/text/subword/bpe_tokenizer.cuh        |   2 +-
 20 files changed, 144 insertions(+), 542 deletions(-)
 delete mode 100644 cpp/include/cudf/detail/utilities/hash_functions.cuh

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
deleted file mode 100644
index e57822f3fdb..00000000000
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Copyright (c) 2017-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cstddef>
-
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/utilities/assert.cuh>
-#include <cudf/fixed_point/fixed_point.hpp>
-#include <cudf/hashing.hpp>
-#include <cudf/strings/string_view.cuh>
-#include <cudf/types.hpp>
-
-#include <thrust/distance.h>
-#include <thrust/execution_policy.h>
-#include <thrust/find.h>
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/pair.h>
-#include <thrust/reverse.h>
-
-namespace cudf {
-namespace detail {
-
-/**
- * Normalization of floating point NaNs, passthrough for all other values.
- */
-template <typename T>
-T __device__ inline normalize_nans(T const& key)
-{
-  if constexpr (cudf::is_floating_point<T>()) {
-    if (std::isnan(key)) { return std::numeric_limits<T>::quiet_NaN(); }
-  }
-  return key;
-}
-
-/**
- * Normalization of floating point NaNs and zeros, passthrough for all other values.
- */
-template <typename T>
-T __device__ inline normalize_nans_and_zeros(T const& key)
-{
-  if constexpr (cudf::is_floating_point<T>()) {
-    if (key == T{0.0}) { return T{0.0}; }
-  }
-  return normalize_nans(key);
-}
-
-__device__ inline uint32_t rotate_bits_left(uint32_t x, uint32_t r)
-{
-  // This function is equivalent to (x << r) | (x >> (32 - r))
-  return __funnelshift_l(x, x, r);
-}
-
-__device__ inline uint32_t rotate_bits_right(uint32_t x, uint32_t r)
-{
-  // This function is equivalent to (x >> r) | (x << (32 - r))
-  return __funnelshift_r(x, x, r);
-}
-
-__device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r)
-{
-  return (x >> r) | (x << (64 - r));
-}
-
-// Swap the endianness of a 32 bit value
-__device__ inline uint32_t swap_endian(uint32_t x)
-{
-  // The selector 0x0123 reverses the byte order
-  return __byte_perm(x, 0, 0x0123);
-}
-
-// Swap the endianness of a 64 bit value
-// There is no CUDA intrinsic for permuting bytes in 64 bit integers
-__device__ inline uint64_t swap_endian(uint64_t x)
-{
-  // Reverse the endianness of each 32 bit section
-  uint32_t low_bits  = swap_endian(static_cast<uint32_t>(x));
-  uint32_t high_bits = swap_endian(static_cast<uint32_t>(x >> 32));
-  // Reassemble a 64 bit result, swapping the low bits and high bits
-  return (static_cast<uint64_t>(low_bits) << 32) | (static_cast<uint64_t>(high_bits));
-};
-
-template <int capacity, typename hash_step_callable>
-struct hash_circular_buffer {
-  uint8_t storage[capacity];
-  uint8_t* cur;
-  int available_space{capacity};
-  hash_step_callable hash_step;
-
-  __device__ inline hash_circular_buffer(hash_step_callable hash_step)
-    : cur{storage}, hash_step{hash_step}
-  {
-  }
-
-  __device__ inline void put(uint8_t const* in, int size)
-  {
-    int copy_start = 0;
-    while (size >= available_space) {
-      // The buffer will be filled by this chunk of data. Copy a chunk of the
-      // data to fill the buffer and trigger a hash step.
-      memcpy(cur, in + copy_start, available_space);
-      hash_step(storage);
-      size -= available_space;
-      copy_start += available_space;
-      cur             = storage;
-      available_space = capacity;
-    }
-    // The buffer will not be filled by the remaining data. That is, `size >= 0
-    // && size < capacity`. We copy the remaining data into the buffer but do
-    // not trigger a hash step.
-    memcpy(cur, in + copy_start, size);
-    cur += size;
-    available_space -= size;
-  }
-
-  __device__ inline void pad(int const space_to_leave)
-  {
-    if (space_to_leave > available_space) {
-      memset(cur, 0x00, available_space);
-      hash_step(storage);
-      cur             = storage;
-      available_space = capacity;
-    }
-    memset(cur, 0x00, available_space - space_to_leave);
-    cur += available_space - space_to_leave;
-    available_space = space_to_leave;
-  }
-
-  __device__ inline uint8_t const& operator[](int idx) const { return storage[idx]; }
-};
-
-// Get a uint8_t pointer to a column element and its size as a pair.
-template <typename Element>
-auto __device__ inline get_element_pointer_and_size(Element const& element)
-{
-  if constexpr (is_fixed_width<Element>() && !is_chrono<Element>()) {
-    return thrust::make_pair(reinterpret_cast<uint8_t const*>(&element), sizeof(Element));
-  } else {
-    CUDF_UNREACHABLE("Unsupported type.");
-  }
-}
-
-template <>
-auto __device__ inline get_element_pointer_and_size(string_view const& element)
-{
-  return thrust::make_pair(reinterpret_cast<uint8_t const*>(element.data()), element.size_bytes());
-}
-
-/**
- * Modified GPU implementation of
- * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
- * Copyright (c) 2015 Barry Clark
- * Licensed under the MIT license.
- * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
-void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination)
-{
-  // Transform 0xABCD'1234 => 0x0000'ABCD'0000'1234 => 0x0B0A'0D0C'0201'0403
-  uint64_t x = num;
-  x          = ((x & 0xFFFF'0000u) << 16) | ((x & 0xFFFF));
-  x          = ((x & 0x000F'0000'000Fu) << 8) | ((x & 0x00F0'0000'00F0u) >> 4) |
-      ((x & 0x0F00'0000'0F00u) << 16) | ((x & 0xF000'0000'F000) << 4);
-
-  // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits
-  uint64_t offsets = (((x + 0x0606'0606'0606'0606) >> 4) & 0x0101'0101'0101'0101) * 0x27;
-
-  x |= 0x3030'3030'3030'3030;
-  x += offsets;
-  std::memcpy(destination, reinterpret_cast<uint8_t*>(&x), 8);
-}
-
-// MurmurHash3_32 implementation from
-// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-template <typename Key>
-struct MurmurHash3_32 {
-  using result_type = hash_value_type;
-
-  constexpr MurmurHash3_32() = default;
-  constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
-
-  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
-  {
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-    return h;
-  }
-
-  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
-                                                      cudf::size_type offset) const
-  {
-    // Read a 4-byte value from the data pointer as individual bytes for safe
-    // unaligned access (very likely for string types).
-    auto const block = reinterpret_cast<uint8_t const*>(data + offset);
-    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
-  }
-
-  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
-  {
-    return compute(detail::normalize_nans_and_zeros(key));
-  }
-
-  template <typename T>
-  result_type __device__ inline compute(T const& key) const
-  {
-    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
-  }
-
-  result_type __device__ inline compute_remaining_bytes(std::byte const* data,
-                                                        cudf::size_type len,
-                                                        cudf::size_type tail_offset,
-                                                        result_type h) const
-  {
-    // Process remaining bytes that do not fill a four-byte chunk.
-    uint32_t k1 = 0;
-    switch (len % 4) {
-      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
-      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
-      case 1:
-        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
-        k1 *= c1;
-        k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
-        k1 *= c2;
-        h ^= k1;
-    };
-    return h;
-  }
-
-  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
-  {
-    constexpr cudf::size_type BLOCK_SIZE = 4;
-    cudf::size_type const nblocks        = len / BLOCK_SIZE;
-    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
-    result_type h                        = m_seed;
-
-    // Process all four-byte chunks.
-    for (cudf::size_type i = 0; i < nblocks; i++) {
-      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
-      k1 *= c1;
-      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
-      k1 *= c2;
-      h ^= k1;
-      h = cudf::detail::rotate_bits_left(h, rot_c2);
-      h = h * 5 + c3;
-    }
-
-    h = compute_remaining_bytes(data, len, tail_offset, h);
-
-    // Finalize hash.
-    h ^= len;
-    h = fmix32(h);
-    return h;
-  }
-
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
-  static constexpr uint32_t c1     = 0xcc9e2d51;
-  static constexpr uint32_t c2     = 0x1b873593;
-  static constexpr uint32_t c3     = 0xe6546b64;
-  static constexpr uint32_t rot_c1 = 15;
-  static constexpr uint32_t rot_c2 = 13;
-};
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
-{
-  return compute(static_cast<uint8_t>(key));
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
-{
-  return compute(detail::normalize_nans_and_zeros(key));
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
-{
-  return compute(detail::normalize_nans_and_zeros(key));
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
-  cudf::string_view const& key) const
-{
-  auto const data = reinterpret_cast<std::byte const*>(key.data());
-  auto const len  = key.size_bytes();
-  return compute_bytes(data, len);
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
-  numeric::decimal32 const& key) const
-{
-  return compute(key.value());
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
-  numeric::decimal64 const& key) const
-{
-  return compute(key.value());
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
-  numeric::decimal128 const& key) const
-{
-  return compute(key.value());
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
-  cudf::list_view const& key) const
-{
-  CUDF_UNREACHABLE("List column hashing is not supported");
-}
-
-template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
-  cudf::struct_view const& key) const
-{
-  CUDF_UNREACHABLE("Direct hashing of struct_view is not supported");
-}
-
-/**
- * @brief  This hash function simply returns the value that is asked to be hash
- * reinterpreted as the result_type of the functor.
- */
-template <typename Key>
-struct IdentityHash {
-  using result_type = hash_value_type;
-  IdentityHash()    = default;
-  constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
-
-  template <typename return_type = result_type>
-  constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
-    Key const& key) const
-  {
-    CUDF_UNREACHABLE("IdentityHash does not support this data type");
-  }
-
-  template <typename return_type = result_type>
-  constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
-    Key const& key) const
-  {
-    return static_cast<result_type>(key);
-  }
-
- private:
-  uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
-};
-
-template <typename Key>
-using default_hash = MurmurHash3_32<Key>;
-
-}  // namespace detail
-}  // namespace cudf
diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh
index 96f5b5b067d..769d1ad552e 100644
--- a/cpp/include/cudf/hashing/detail/hash_functions.cuh
+++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh
@@ -23,8 +23,7 @@
 
 #include <cstring>
 
-namespace cudf {
-namespace detail {
+namespace cudf::hashing::detail {
 
 template <typename K>
 struct MurmurHash3_32;
@@ -73,113 +72,6 @@ __device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r)
   return (x >> r) | (x << (64 - r));
 }
 
-// Swap the endianness of a 32 bit value
-__device__ inline uint32_t swap_endian(uint32_t x)
-{
-  // The selector 0x0123 reverses the byte order
-  return __byte_perm(x, 0, 0x0123);
-}
-
-// Swap the endianness of a 64 bit value
-// There is no CUDA intrinsic for permuting bytes in 64 bit integers
-__device__ inline uint64_t swap_endian(uint64_t x)
-{
-  // Reverse the endianness of each 32 bit section
-  uint32_t low_bits  = swap_endian(static_cast<uint32_t>(x));
-  uint32_t high_bits = swap_endian(static_cast<uint32_t>(x >> 32));
-  // Reassemble a 64 bit result, swapping the low bits and high bits
-  return (static_cast<uint64_t>(low_bits) << 32) | (static_cast<uint64_t>(high_bits));
-};
-
-template <int capacity, typename hash_step_callable>
-struct hash_circular_buffer {
-  uint8_t storage[capacity];
-  uint8_t* cur;
-  int available_space{capacity};
-  hash_step_callable hash_step;
-
-  __device__ inline hash_circular_buffer(hash_step_callable hash_step)
-    : cur{storage}, hash_step{hash_step}
-  {
-  }
-
-  __device__ inline void put(uint8_t const* in, int size)
-  {
-    int copy_start = 0;
-    while (size >= available_space) {
-      // The buffer will be filled by this chunk of data. Copy a chunk of the
-      // data to fill the buffer and trigger a hash step.
-      memcpy(cur, in + copy_start, available_space);
-      hash_step(storage);
-      size -= available_space;
-      copy_start += available_space;
-      cur             = storage;
-      available_space = capacity;
-    }
-    // The buffer will not be filled by the remaining data. That is, `size >= 0
-    // && size < capacity`. We copy the remaining data into the buffer but do
-    // not trigger a hash step.
-    memcpy(cur, in + copy_start, size);
-    cur += size;
-    available_space -= size;
-  }
-
-  __device__ inline void pad(int const space_to_leave)
-  {
-    if (space_to_leave > available_space) {
-      memset(cur, 0x00, available_space);
-      hash_step(storage);
-      cur             = storage;
-      available_space = capacity;
-    }
-    memset(cur, 0x00, available_space - space_to_leave);
-    cur += available_space - space_to_leave;
-    available_space = space_to_leave;
-  }
-
-  __device__ inline uint8_t const& operator[](int idx) const { return storage[idx]; }
-};
-
-// Get a uint8_t pointer to a column element and its size as a pair.
-template <typename Element>
-auto __device__ inline get_element_pointer_and_size(Element const& element)
-{
-  if constexpr (is_fixed_width<Element>() && !is_chrono<Element>()) {
-    return thrust::make_pair(reinterpret_cast<uint8_t const*>(&element), sizeof(Element));
-  } else {
-    CUDF_UNREACHABLE("Unsupported type.");
-  }
-}
-
-template <>
-auto __device__ inline get_element_pointer_and_size(string_view const& element)
-{
-  return thrust::make_pair(reinterpret_cast<uint8_t const*>(element.data()), element.size_bytes());
-}
-
-/**
- * Modified GPU implementation of
- * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
- * Copyright (c) 2015 Barry Clark
- * Licensed under the MIT license.
- * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
-void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination)
-{
-  // Transform 0xABCD'1234 => 0x0000'ABCD'0000'1234 => 0x0B0A'0D0C'0201'0403
-  uint64_t x = num;
-  x          = ((x & 0xFFFF'0000u) << 16) | ((x & 0xFFFF));
-  x          = ((x & 0x000F'0000'000Fu) << 8) | ((x & 0x00F0'0000'00F0u) >> 4) |
-      ((x & 0x0F00'0000'0F00u) << 16) | ((x & 0xF000'0000'F000) << 4);
-
-  // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits
-  uint64_t offsets = (((x + 0x0606'0606'0606'0606) >> 4) & 0x0101'0101'0101'0101) * 0x27;
-
-  x |= 0x3030'3030'3030'3030;
-  x += offsets;
-  std::memcpy(destination, reinterpret_cast<uint8_t*>(&x), 8);
-}
-
 /**
  * @brief  This hash function simply returns the value that is asked to be hash
  * reinterpreted as the result_type of the functor.
@@ -208,5 +100,4 @@ struct IdentityHash {
   uint32_t m_seed{0};
 };
 
-}  // namespace detail
-}  // namespace cudf
+}  // namespace cudf::hashing::detail
diff --git a/cpp/include/cudf/hashing/detail/murmur_hash32.cuh b/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
index 26425875068..1a63f8c45ca 100644
--- a/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
+++ b/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
@@ -26,8 +26,7 @@
 
 #include <cstddef>
 
-namespace cudf {
-namespace detail {
+namespace cudf::hashing::detail {
 
 // MurmurHash3_32 implementation from
 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
@@ -66,7 +65,7 @@ struct MurmurHash3_32 {
 
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
-    return compute(detail::normalize_nans_and_zeros(key));
+    return compute(normalize_nans_and_zeros(key));
   }
 
   template <typename T>
@@ -88,7 +87,7 @@ struct MurmurHash3_32 {
       case 1:
         k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
         k1 *= c1;
-        k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+        k1 = rotate_bits_left(k1, rot_c1);
         k1 *= c2;
         h ^= k1;
     };
@@ -106,10 +105,10 @@ struct MurmurHash3_32 {
     for (cudf::size_type i = 0; i < nblocks; i++) {
       uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
       k1 *= c1;
-      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+      k1 = rotate_bits_left(k1, rot_c1);
       k1 *= c2;
       h ^= k1;
-      h = cudf::detail::rotate_bits_left(h, rot_c2);
+      h = rotate_bits_left(h, rot_c2);
       h = h * 5 + c3;
     }
 
@@ -139,13 +138,13 @@ hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& k
 template <>
 hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
 {
-  return compute(detail::normalize_nans_and_zeros(key));
+  return compute(normalize_nans_and_zeros(key));
 }
 
 template <>
 hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
 {
-  return compute(detail::normalize_nans_and_zeros(key));
+  return compute(normalize_nans_and_zeros(key));
 }
 
 template <>
@@ -192,5 +191,4 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
   CUDF_UNREACHABLE("Direct hashing of struct_view is not supported");
 }
 
-}  // namespace detail
-}  // namespace cudf
+}  // namespace cudf::hashing::detail
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 61f8c13bb77..e1455bd325c 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -34,10 +34,11 @@
 namespace cudf {
 
 // forward declaration
-namespace detail {
+namespace hashing::detail {
 template <typename T>
 class MurmurHash3_32;
-
+}  // namespace hashing::detail
+namespace detail {
 template <typename T>
 class hash_join;
 }  // namespace detail
@@ -272,7 +273,7 @@ enum class nullable_join : bool { YES, NO };
 class hash_join {
  public:
   using impl_type = typename cudf::detail::hash_join<
-    cudf::detail::MurmurHash3_32<cudf::hash_value_type>>;  ///< Implementation type
+    cudf::hashing::detail::MurmurHash3_32<cudf::hash_value_type>>;  ///< Implementation type
 
   hash_join() = delete;
   ~hash_join();
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 7e876d6cbad..1aa0f21fca2 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -1942,7 +1942,7 @@ class row_hasher {
    * @param seed The seed to use for the hash function
    * @return A hash operator to use on the device
    */
-  template <template <typename> class hash_function = detail::default_hash,
+  template <template <typename> class hash_function = cudf::hashing::detail::default_hash,
             template <template <typename> class, typename>
             class DeviceRowHasher = device_row_hasher,
             typename Nullate>
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index f7ecc40c20b..8d4c0c9e4c4 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -69,12 +69,12 @@ namespace {
 // TODO: replace it with `cuco::static_map`
 // https://github.com/rapidsai/cudf/issues/10401
 template <typename ComparatorType>
-using map_type =
-  concurrent_unordered_map<cudf::size_type,
-                           cudf::size_type,
-                           cudf::experimental::row::hash::
-                             device_row_hasher<cudf::detail::default_hash, cudf::nullate::DYNAMIC>,
-                           ComparatorType>;
+using map_type = concurrent_unordered_map<
+  cudf::size_type,
+  cudf::size_type,
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   cudf::nullate::DYNAMIC>,
+  ComparatorType>;
 
 /**
  * @brief List of aggregation operations that can be computed with a hash-based
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index 89fa8442f21..941a116bcc4 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -115,7 +115,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
  */
 template <typename Key,
           typename Element,
-          typename Hasher    = cudf::detail::default_hash<Key>,
+          typename Hasher    = cudf::hashing::detail::default_hash<Key>,
           typename Equality  = equal_to<Key>,
           typename Allocator = default_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu
index 7a3c3526e75..4e25f9f8c23 100644
--- a/cpp/src/hash/md5_hash.cu
+++ b/cpp/src/hash/md5_hash.cu
@@ -42,6 +42,95 @@ namespace detail {
 
 namespace {
 
+template <int capacity, typename hash_step_callable>
+struct hash_circular_buffer {
+  uint8_t storage[capacity];
+  uint8_t* cur;
+  int available_space{capacity};
+  hash_step_callable hash_step;
+
+  __device__ inline hash_circular_buffer(hash_step_callable hash_step)
+    : cur{storage}, hash_step{hash_step}
+  {
+  }
+
+  __device__ inline void put(uint8_t const* in, int size)
+  {
+    int copy_start = 0;
+    while (size >= available_space) {
+      // The buffer will be filled by this chunk of data. Copy a chunk of the
+      // data to fill the buffer and trigger a hash step.
+      memcpy(cur, in + copy_start, available_space);
+      hash_step(storage);
+      size -= available_space;
+      copy_start += available_space;
+      cur             = storage;
+      available_space = capacity;
+    }
+    // The buffer will not be filled by the remaining data. That is, `size >= 0
+    // && size < capacity`. We copy the remaining data into the buffer but do
+    // not trigger a hash step.
+    memcpy(cur, in + copy_start, size);
+    cur += size;
+    available_space -= size;
+  }
+
+  __device__ inline void pad(int const space_to_leave)
+  {
+    if (space_to_leave > available_space) {
+      memset(cur, 0x00, available_space);
+      hash_step(storage);
+      cur             = storage;
+      available_space = capacity;
+    }
+    memset(cur, 0x00, available_space - space_to_leave);
+    cur += available_space - space_to_leave;
+    available_space = space_to_leave;
+  }
+
+  __device__ inline uint8_t const& operator[](int idx) const { return storage[idx]; }
+};
+
+// Get a uint8_t pointer to a column element and its size as a pair.
+template <typename Element>
+auto __device__ inline get_element_pointer_and_size(Element const& element)
+{
+  if constexpr (is_fixed_width<Element>() && !is_chrono<Element>()) {
+    return thrust::make_pair(reinterpret_cast<uint8_t const*>(&element), sizeof(Element));
+  } else {
+    CUDF_UNREACHABLE("Unsupported type.");
+  }
+}
+
+template <>
+auto __device__ inline get_element_pointer_and_size(string_view const& element)
+{
+  return thrust::make_pair(reinterpret_cast<uint8_t const*>(element.data()), element.size_bytes());
+}
+
+/**
+ * Modified GPU implementation of
+ * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
+ * Copyright (c) 2015 Barry Clark
+ * Licensed under the MIT license.
+ * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+ */
+void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination)
+{
+  // Transform 0xABCD'1234 => 0x0000'ABCD'0000'1234 => 0x0B0A'0D0C'0201'0403
+  uint64_t x = num;
+  x          = ((x & 0xFFFF'0000u) << 16) | ((x & 0xFFFF));
+  x          = ((x & 0x000F'0000'000Fu) << 8) | ((x & 0x00F0'0000'00F0u) >> 4) |
+      ((x & 0x0F00'0000'0F00u) << 16) | ((x & 0xF000'0000'F000) << 4);
+
+  // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits
+  uint64_t offsets = (((x + 0x0606'0606'0606'0606) >> 4) & 0x0101'0101'0101'0101) * 0x27;
+
+  x |= 0x3030'3030'3030'3030;
+  x += offsets;
+  std::memcpy(destination, reinterpret_cast<uint8_t*>(&x), 8);
+}
+
 // The MD5 algorithm and its hash/shift constants are officially specified in
 // RFC 1321. For convenience, these values can also be found on Wikipedia:
 // https://en.wikipedia.org/wiki/MD5
@@ -82,7 +171,7 @@ struct MD5Hasher {
                sizeof(message_length_in_bits));
 
     for (int i = 0; i < 4; ++i) {
-      cudf::detail::uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i));
+      uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i));
     }
   }
 
@@ -94,8 +183,8 @@ struct MD5Hasher {
   template <typename Element>
   void __device__ inline process(Element const& element)
   {
-    auto const normalized_element  = cudf::detail::normalize_nans_and_zeros(element);
-    auto const [element_ptr, size] = cudf::detail::get_element_pointer_and_size(normalized_element);
+    auto const normalized_element  = normalize_nans_and_zeros(element);
+    auto const [element_ptr, size] = get_element_pointer_and_size(normalized_element);
     buffer.put(element_ptr, size);
     message_length += size;
   }
@@ -143,7 +232,7 @@ struct MD5Hasher {
         A = D;
         D = C;
         C = B;
-        B = B + cudf::detail::rotate_bits_left(F, md5_shift_constants[((j / 16) * 4) + (j % 4)]);
+        B = B + rotate_bits_left(F, md5_shift_constants[((j / 16) * 4) + (j % 4)]);
       }
 
       hash_values[0] += A;
@@ -154,7 +243,7 @@ struct MD5Hasher {
   };
 
   char* result_location;
-  cudf::detail::hash_circular_buffer<message_chunk_size, md5_hash_step> buffer;
+  hash_circular_buffer<message_chunk_size, md5_hash_step> buffer;
   uint64_t message_length = 0;
   uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};
 };
diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmur_hash.cu
index 83bef54d32f..f0a355d79c1 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmur_hash.cu
@@ -53,7 +53,7 @@ std::unique_ptr<column> murmur_hash3_32(table_view const& input,
   thrust::tabulate(rmm::exec_policy(stream),
                    output_view.begin<hash_value_type>(),
                    output_view.end<hash_value_type>(),
-                   row_hasher.device_hasher<cudf::detail::MurmurHash3_32>(nullable, seed));
+                   row_hasher.device_hasher<MurmurHash3_32>(nullable, seed));
 
   return output;
 }
diff --git a/cpp/src/hash/spark_murmur_hash.cu b/cpp/src/hash/spark_murmur_hash.cu
index 239ded002d3..b996cc5bec5 100644
--- a/cpp/src/hash/spark_murmur_hash.cu
+++ b/cpp/src/hash/spark_murmur_hash.cu
@@ -86,10 +86,10 @@ struct SparkMurmurHash3_32 {
       // casting byte-to-int, but C++ does not.
       uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
       k1 *= c1;
-      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+      k1 = rotate_bits_left(k1, rot_c1);
       k1 *= c2;
       h ^= k1;
-      h = cudf::detail::rotate_bits_left(h, rot_c2);
+      h = rotate_bits_left(h, rot_c2);
       h = h * 5 + c3;
     }
     return h;
@@ -106,10 +106,10 @@ struct SparkMurmurHash3_32 {
     for (cudf::size_type i = 0; i < nblocks; i++) {
       uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
       k1 *= c1;
-      k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
+      k1 = rotate_bits_left(k1, rot_c1);
       k1 *= c2;
       h ^= k1;
-      h = cudf::detail::rotate_bits_left(h, rot_c2);
+      h = rotate_bits_left(h, rot_c2);
       h = h * 5 + c3;
     }
 
@@ -168,14 +168,14 @@ template <>
 spark_hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(
   float const& key) const
 {
-  return compute<float>(cudf::detail::normalize_nans(key));
+  return compute<float>(normalize_nans(key));
 }
 
 template <>
 spark_hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(
   double const& key) const
 {
-  return compute<double>(cudf::detail::normalize_nans(key));
+  return compute<double>(normalize_nans(key));
 }
 
 template <>
diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh
index e51e603dfb5..d61884f7514 100644
--- a/cpp/src/hash/unordered_multiset.cuh
+++ b/cpp/src/hash/unordered_multiset.cuh
@@ -40,7 +40,7 @@ namespace detail {
  *  Device view of the unordered multiset
  */
 template <typename Element,
-          typename Hasher   = default_hash<Element>,
+          typename Hasher   = cudf::hashing::detail::default_hash<Element>,
           typename Equality = equal_to<Element>>
 class unordered_multiset_device_view {
  public:
@@ -74,7 +74,7 @@ class unordered_multiset_device_view {
  * Fixed size set on a device.
  */
 template <typename Element,
-          typename Hasher   = default_hash<Element>,
+          typename Hasher   = cudf::hashing::detail::default_hash<Element>,
           typename Equality = equal_to<Element>>
 class unordered_multiset {
  public:
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index 0cc3b79a679..f34d1aab7eb 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -185,7 +185,7 @@ __device__ field_descriptor next_field_descriptor(char const* begin,
                          false}
       : [&]() {
           auto const key_range = get_next_key(begin, end, opts.quotechar);
-          auto const key_hash  = cudf::detail::MurmurHash3_32<cudf::string_view>{}(
+          auto const key_hash  = cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{}(
             cudf::string_view(key_range.first, key_range.second - key_range.first));
           auto const hash_col = col_map.find(key_hash);
           // Fall back to field index if not found (parsing error)
@@ -506,7 +506,7 @@ __global__ void collect_keys_info_kernel(parse_options_view const options,
       keys_info->column(0).element<uint64_t>(idx) = field_range.key_begin - data.begin();
       keys_info->column(1).element<uint16_t>(idx) = len;
       keys_info->column(2).element<uint32_t>(idx) =
-        cudf::detail::MurmurHash3_32<cudf::string_view>{}(
+        cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{}(
           cudf::string_view(field_range.key_begin, len));
     }
   }
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index 8f94fbcd9c6..3a089f0dfdd 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -420,7 +420,7 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
                          node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id) {
     auto const field_name = cudf::string_view(d_input + node_range_begin[node_id],
                                               node_range_end[node_id] - node_range_begin[node_id]);
-    return cudf::detail::default_hash<cudf::string_view>{}(field_name);
+    return cudf::hashing::detail::default_hash<cudf::string_view>{}(field_name);
   };
   auto const d_equal = [d_input          = d_input.data(),
                         node_range_begin = d_tree.node_range_begin.data(),
@@ -579,15 +579,15 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
                          is_array_of_arrays,
                          row_array_children_level] __device__(auto node_id) {
     auto hash = cudf::hashing::detail::hash_combine(
-      cudf::detail::default_hash<TreeDepthT>{}(node_level[node_id]),
-      cudf::detail::default_hash<size_type>{}(node_type[node_id]));
+      cudf::hashing::detail::default_hash<TreeDepthT>{}(node_level[node_id]),
+      cudf::hashing::detail::default_hash<size_type>{}(node_type[node_id]));
     node_id = parent_node_ids[node_id];
     // Each node computes its hash by walking from its node up to the root.
     while (node_id != parent_node_sentinel) {
       hash = cudf::hashing::detail::hash_combine(
-        hash, cudf::detail::default_hash<TreeDepthT>{}(node_level[node_id]));
+        hash, cudf::hashing::detail::default_hash<TreeDepthT>{}(node_level[node_id]));
       hash = cudf::hashing::detail::hash_combine(
-        hash, cudf::detail::default_hash<size_type>{}(node_type[node_id]));
+        hash, cudf::hashing::detail::default_hash<size_type>{}(node_type[node_id]));
       if (is_array_of_arrays and node_level[node_id] == row_array_children_level)
         hash = cudf::hashing::detail::hash_combine(hash, list_indices[node_id]);
       node_id = parent_node_ids[node_id];
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index a4d9673e8fa..97fd8047277 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -63,7 +63,7 @@ struct hash_functor {
   column_device_view const& col;
   __device__ auto operator()(size_type idx) const
   {
-    return cudf::detail::MurmurHash3_32<T>{}(col.element<T>(idx));
+    return cudf::hashing::detail::MurmurHash3_32<T>{}(col.element<T>(idx));
   }
 };
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index b54d487d5a3..18dd442a9e4 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -47,7 +47,8 @@ inline __device__ void gpuOutputString(volatile page_state_s* s,
     // categoricals is enabled. The seed value is chosen arbitrarily.
     uint32_t constexpr hash_seed = 33;
     cudf::string_view const sv{ptr, static_cast<size_type>(len)};
-    *static_cast<uint32_t*>(dstv) = cudf::detail::MurmurHash3_32<cudf::string_view>{hash_seed}(sv);
+    *static_cast<uint32_t*>(dstv) =
+      cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{hash_seed}(sv);
   } else {
     // Output string descriptor
     auto* dst   = static_cast<string_index_pair*>(dstv);
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 64686c7a763..e4d131219a3 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -60,7 +60,8 @@ using mixed_multimap_type = cuco::static_multimap<hash_value_type,
 using semi_map_type = cuco::
   static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
 
-using row_hash_legacy = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;
+using row_hash_legacy =
+  cudf::row_hasher<cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>;
 
 using row_equality_legacy = cudf::row_equality_comparator<cudf::nullate::DYNAMIC>;
 
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index 80c3cef9899..38e5b75ade6 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -30,7 +30,8 @@ namespace cudf {
 namespace detail {
 
 using row_hash =
-  cudf::experimental::row::hash::device_row_hasher<default_hash, cudf::nullate::DYNAMIC>;
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   cudf::nullate::DYNAMIC>;
 
 // // This alias is used by mixed_joins, which support only non-nested types
 using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter<
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 3e389fcfd42..2543d3a99cc 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -789,10 +789,10 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
         if (!is_numeric(input.column(column_id).type()))
           CUDF_FAIL("IdentityHash does not support this data type");
       }
-      return detail::hash_partition<detail::IdentityHash>(
+      return detail::hash_partition<cudf::hashing::detail::IdentityHash>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     case (hash_id::HASH_MURMUR3):
-      return detail::hash_partition<detail::MurmurHash3_32>(
+      return detail::hash_partition<cudf::hashing::detail::MurmurHash3_32>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     default: CUDF_FAIL("Unsupported hash function in hash_partition");
   }
diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu
index e73654fdf4c..6961e39096a 100644
--- a/cpp/src/text/minhash.cu
+++ b/cpp/src/text/minhash.cu
@@ -91,7 +91,8 @@ struct minhash_fn {
       // hashing with each seed on the same section of the string is 10x faster than
       // computing the substrings for each seed
       for (std::size_t seed_idx = 0; seed_idx < seeds.size(); ++seed_idx) {
-        auto const hasher = cudf::detail::MurmurHash3_32<cudf::string_view>{seeds[seed_idx]};
+        auto const hasher =
+          cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{seeds[seed_idx]};
         auto const hvalue = hasher(hash_str);
         cuda::atomic_ref<cudf::hash_value_type, cuda::thread_scope_block> ref{
           *(d_output + seed_idx)};
diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh
index df367f49a18..72f861ab887 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cuh
+++ b/cpp/src/text/subword/bpe_tokenizer.cuh
@@ -41,7 +41,7 @@ using merge_pairs_map_type = cuco::static_map<cudf::hash_value_type,
                                               cuda::thread_scope_device,
                                               hash_table_allocator_type>;
 
-using string_hasher_type = cudf::detail::MurmurHash3_32<cudf::string_view>;
+using string_hasher_type = cudf::hashing::detail::MurmurHash3_32<cudf::string_view>;
 
 }  // namespace detail
 

From 6edaf2dbc77a045b7d3b2c4a4305c78b7bb8d215 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 11 Jul 2023 17:14:09 -0400
Subject: [PATCH 3/8] remove unneeded includes

---
 cpp/include/cudf/hashing/detail/hash_functions.cuh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh
index 769d1ad552e..04f71cbc9cb 100644
--- a/cpp/include/cudf/hashing/detail/hash_functions.cuh
+++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh
@@ -16,12 +16,9 @@
 
 #pragma once
 
-#include <cudf/strings/string_view.cuh>
 #include <cudf/utilities/traits.hpp>
 
-#include <thrust/pair.h>
-
-#include <cstring>
+#include <limits>
 
 namespace cudf::hashing::detail {
 

From 9b46475ff3204dc2bdaa3eb6193f2df058de9068 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 12 Jul 2023 09:47:12 -0400
Subject: [PATCH 4/8] remove unneeded includes

---
 cpp/src/hash/murmur_hash.cu              | 1 -
 cpp/src/text/subword/load_merges_file.cu | 1 -
 2 files changed, 2 deletions(-)

diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmur_hash.cu
index f0a355d79c1..5765b83311e 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmur_hash.cu
@@ -16,7 +16,6 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu
index 1798fb76a6a..dffe035ad35 100644
--- a/cpp/src/text/subword/load_merges_file.cu
+++ b/cpp/src/text/subword/load_merges_file.cu
@@ -22,7 +22,6 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 

From fb265ace513df42a07d8b42fd2665dddde6024e3 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 13 Jul 2023 15:29:58 -0400
Subject: [PATCH 5/8] create default_hash.cuh and rename functions

---
 cpp/CMakeLists.txt                            |  4 +-
 cpp/include/cudf/detail/join.hpp              |  1 -
 cpp/include/cudf/hashing.hpp                  |  6 +-
 .../cudf/hashing/detail/default_hash.cuh      | 29 +++++++++
 .../cudf/hashing/detail/hash_functions.cuh    | 34 ----------
 cpp/include/cudf/hashing/detail/hashing.hpp   | 16 ++---
 ...rmur_hash32.cuh => murmurhash3_x86_32.cuh} | 26 ++++----
 cpp/include/cudf/join.hpp                     |  4 +-
 .../cudf/table/experimental/row_operators.cuh |  2 +-
 cpp/include/nvtext/minhash.hpp                |  4 +-
 cpp/src/groupby/hash/groupby.cu               |  2 +-
 cpp/src/hash/concurrent_unordered_map.cuh     |  2 +-
 cpp/src/hash/hashing.cu                       |  4 +-
 .../{murmur_hash.cu => murmurhash3_x86_32.cu} | 22 +++----
 ...ur_hash.cu => spark_murmurhash3_x86_32.cu} | 58 +++++++++--------
 cpp/src/hash/unordered_multiset.cuh           |  2 +-
 cpp/src/io/json/json_gpu.cu                   |  6 +-
 cpp/src/io/json/json_tree.cu                  |  2 +-
 cpp/src/io/parquet/chunk_dict.cu              |  2 +-
 cpp/src/io/parquet/page_data.cu               |  5 +-
 cpp/src/join/join_common_utils.hpp            |  2 +-
 cpp/src/partitioning/partitioning.cu          | 35 +++++++++-
 cpp/src/search/contains_table.cu              |  1 -
 .../stream_compaction_common.hpp              |  1 -
 cpp/src/text/generate_ngrams.cu               |  4 +-
 cpp/src/text/minhash.cu                       |  4 +-
 cpp/src/text/subword/bpe_tokenizer.cu         |  1 -
 cpp/src/text/subword/bpe_tokenizer.cuh        |  4 +-
 cpp/tests/CMakeLists.txt                      |  3 +-
 ...3_test.cpp => murmurhash3_x86_32_test.cpp} | 39 +++++------
 ....cpp => spark_murmurhash3_x86_32_test.cpp} | 65 ++++++++++---------
 31 files changed, 210 insertions(+), 180 deletions(-)
 create mode 100644 cpp/include/cudf/hashing/detail/default_hash.cuh
 rename cpp/include/cudf/hashing/detail/{murmur_hash32.cuh => murmurhash3_x86_32.cuh} (84%)
 rename cpp/src/hash/{murmur_hash.cu => murmurhash3_x86_32.cu} (71%)
 rename cpp/src/hash/{spark_murmur_hash.cu => spark_murmurhash3_x86_32.cu} (86%)
 rename cpp/tests/hashing/{murmur3_test.cpp => murmurhash3_x86_32_test.cpp} (91%)
 rename cpp/tests/hashing/{spark_murmur3_test.cpp => spark_murmurhash3_x86_32_test.cpp} (90%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0742d039092..27bde5dda73 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -344,8 +344,8 @@ add_library(
   src/groupby/sort/sort_helper.cu
   src/hash/hashing.cu
   src/hash/md5_hash.cu
-  src/hash/murmur_hash.cu
-  src/hash/spark_murmur_hash.cu
+  src/hash/murmurhash3_x86_32.cu
+  src/hash/spark_murmurhash3_x86_32.cu
   src/interop/dlpack.cpp
   src/interop/from_arrow.cu
   src/interop/to_arrow.cu
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index 4cbb59c035c..6fcf10aef57 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -17,7 +17,6 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/hashing.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp
index a8f5652c384..67f52d517cc 100644
--- a/cpp/include/cudf/hashing.hpp
+++ b/cpp/include/cudf/hashing.hpp
@@ -86,7 +86,7 @@ namespace hashing {
  *
  * @returns A column where each row is the hash of a row from the input
  */
-std::unique_ptr<column> murmur_hash3_32(
+std::unique_ptr<column> murmurhash3_x86_32(
   table_view const& input,
   uint32_t seed                       = DEFAULT_HASH_SEED,
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
@@ -95,7 +95,7 @@ std::unique_ptr<column> murmur_hash3_32(
 /**
  * @brief Computes the MurmurHash3 32-bit of each row in the given table
  *
- * This function computes the hash similar to MurmurHash3_32 with special processing
+ * This function computes the hash similar to MurmurHash3_x86_32 with special processing
  * to match Spark's implementation results.
  *
  * @param input The table of columns to hash
@@ -105,7 +105,7 @@ std::unique_ptr<column> murmur_hash3_32(
  *
  * @returns A column where each row is the hash of a row from the input
  */
-std::unique_ptr<column> spark_murmur_hash3_32(
+std::unique_ptr<column> spark_murmurhash3_x86_32(
   table_view const& input,
   uint32_t seed                       = DEFAULT_HASH_SEED,
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
diff --git a/cpp/include/cudf/hashing/detail/default_hash.cuh b/cpp/include/cudf/hashing/detail/default_hash.cuh
new file mode 100644
index 00000000000..21aad0ce91e
--- /dev/null
+++ b/cpp/include/cudf/hashing/detail/default_hash.cuh
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
+
+namespace cudf::hashing::detail {
+
+template <typename Key>
+struct MurmurHash3_x86_32;
+
+template <typename Key>
+using default_hash = MurmurHash3_x86_32<Key>;
+
+}  // namespace cudf::hashing::detail
diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh
index 04f71cbc9cb..f681bef6648 100644
--- a/cpp/include/cudf/hashing/detail/hash_functions.cuh
+++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh
@@ -22,12 +22,6 @@
 
 namespace cudf::hashing::detail {
 
-template <typename K>
-struct MurmurHash3_32;
-
-template <typename Key>
-using default_hash = MurmurHash3_32<Key>;
-
 /**
  * Normalization of floating point NaNs, passthrough for all other values.
  */
@@ -69,32 +63,4 @@ __device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r)
   return (x >> r) | (x << (64 - r));
 }
 
-/**
- * @brief  This hash function simply returns the value that is asked to be hash
- * reinterpreted as the result_type of the functor.
- */
-template <typename Key>
-struct IdentityHash {
-  using result_type = uint32_t;
-  IdentityHash()    = default;
-  constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
-
-  template <typename return_type = result_type>
-  constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
-    Key const& key) const
-  {
-    CUDF_UNREACHABLE("IdentityHash does not support this data type");
-  }
-
-  template <typename return_type = result_type>
-  constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
-    Key const& key) const
-  {
-    return static_cast<result_type>(key);
-  }
-
- private:
-  uint32_t m_seed{0};
-};
-
 }  // namespace cudf::hashing::detail
diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp
index 08e1a1d03c0..94d6dfe2c39 100644
--- a/cpp/include/cudf/hashing/detail/hashing.hpp
+++ b/cpp/include/cudf/hashing/detail/hashing.hpp
@@ -27,15 +27,15 @@ namespace cudf {
 namespace hashing {
 namespace detail {
 
-std::unique_ptr<column> murmur_hash3_32(table_view const& input,
-                                        uint32_t seed,
-                                        rmm::cuda_stream_view,
-                                        rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> murmurhash3_x86_32(table_view const& input,
+                                           uint32_t seed,
+                                           rmm::cuda_stream_view,
+                                           rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
-                                              uint32_t seed,
-                                              rmm::cuda_stream_view,
-                                              rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> spark_murmurhash3_x86_32(table_view const& input,
+                                                 uint32_t seed,
+                                                 rmm::cuda_stream_view,
+                                                 rmm::mr::device_memory_resource* mr);
 
 std::unique_ptr<column> md5(table_view const& input,
                             rmm::cuda_stream_view stream,
diff --git a/cpp/include/cudf/hashing/detail/murmur_hash32.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
similarity index 84%
rename from cpp/include/cudf/hashing/detail/murmur_hash32.cuh
rename to cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
index 1a63f8c45ca..6cf0b0fe817 100644
--- a/cpp/include/cudf/hashing/detail/murmur_hash32.cuh
+++ b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
@@ -28,7 +28,7 @@
 
 namespace cudf::hashing::detail {
 
-// MurmurHash3_32 implementation from
+// MurmurHash3_x86_32 implementation from
 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
 //-----------------------------------------------------------------------------
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
@@ -38,11 +38,11 @@ namespace cudf::hashing::detail {
 // compile and run any of them on any platform, but your performance with the
 // non-native version will be less than optimal.
 template <typename Key>
-struct MurmurHash3_32 {
+struct MurmurHash3_x86_32 {
   using result_type = hash_value_type;
 
-  constexpr MurmurHash3_32() = default;
-  constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
+  constexpr MurmurHash3_x86_32() = default;
+  constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}
 
   [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
   {
@@ -130,25 +130,25 @@ struct MurmurHash3_32 {
 };
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
+hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
 {
   return compute(static_cast<uint8_t>(key));
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
+hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
 {
   return compute(normalize_nans_and_zeros(key));
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
+hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
 {
   return compute(normalize_nans_and_zeros(key));
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
   auto const data = reinterpret_cast<std::byte const*>(key.data());
@@ -157,35 +157,35 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
   return compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
   return compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
   return compute(key.value());
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<cudf::list_view>::operator()(
   cudf::list_view const& key) const
 {
   CUDF_UNREACHABLE("List column hashing is not supported");
 }
 
 template <>
-hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
+hash_value_type __device__ inline MurmurHash3_x86_32<cudf::struct_view>::operator()(
   cudf::struct_view const& key) const
 {
   CUDF_UNREACHABLE("Direct hashing of struct_view is not supported");
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index e1455bd325c..6c50e1d5998 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -36,7 +36,7 @@ namespace cudf {
 // forward declaration
 namespace hashing::detail {
 template <typename T>
-class MurmurHash3_32;
+class MurmurHash3_x86_32;
 }  // namespace hashing::detail
 namespace detail {
 template <typename T>
@@ -273,7 +273,7 @@ enum class nullable_join : bool { YES, NO };
 class hash_join {
  public:
   using impl_type = typename cudf::detail::hash_join<
-    cudf::hashing::detail::MurmurHash3_32<cudf::hash_value_type>>;  ///< Implementation type
+    cudf::hashing::detail::MurmurHash3_x86_32<cudf::hash_value_type>>;  ///< Implementation type
 
   hash_join() = delete;
   ~hash_join();
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 1aa0f21fca2..5fe9dcbdf1b 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -20,8 +20,8 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/assert.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/lists/detail/dremel.hpp>
 #include <cudf/lists/list_device_view.cuh>
 #include <cudf/lists/lists_column_device_view.cuh>
diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp
index 60116e389a3..dda23a2ba5b 100644
--- a/cpp/include/nvtext/minhash.hpp
+++ b/cpp/include/nvtext/minhash.hpp
@@ -40,7 +40,7 @@ namespace nvtext {
  * @throw std::invalid_argument if hash_function is not HASH_MURMUR3
  *
  * @param input Strings column to compute minhash
- * @param seed  Seed value used for the MurmurHash3_32 algorithm
+ * @param seed  Seed value used for the MurmurHash3_x86_32 algorithm
  * @param width The character width used for apply substrings;
  *              Default is 4 characters.
  * @param hash_function Hash algorithm to use;
@@ -72,7 +72,7 @@ std::unique_ptr<cudf::column> minhash(
  * @throw std::overflow_error if `seeds * input.size()` exceeds the column size limit
  *
  * @param input Strings column to compute minhash
- * @param seeds Seed values used for the MurmurHash3_32 algorithm
+ * @param seeds Seed values used for the MurmurHash3_x86_32 algorithm
  * @param width The character width used for apply substrings;
  *              Default is 4 characters.
  * @param hash_function Hash algorithm to use;
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 586d18150c0..506832881a9 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -36,7 +36,7 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index 941a116bcc4..439b1c2d066 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -21,7 +21,7 @@
 #include <hash/managed.cuh>
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
index 875d17587c6..68e02ef3cf4 100644
--- a/cpp/src/hash/hashing.cu
+++ b/cpp/src/hash/hashing.cu
@@ -30,8 +30,8 @@ std::unique_ptr<column> hash(table_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   switch (hash_function) {
-    case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, seed, stream, mr);
-    case (hash_id::HASH_SPARK_MURMUR3): return spark_murmur_hash3_32(input, seed, stream, mr);
+    case (hash_id::HASH_MURMUR3): return murmurhash3_x86_32(input, seed, stream, mr);
+    case (hash_id::HASH_SPARK_MURMUR3): return spark_murmurhash3_x86_32(input, seed, stream, mr);
     case (hash_id::HASH_MD5): return md5(input, stream, mr);
     default: CUDF_FAIL("Unsupported hash function.");
   }
diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmurhash3_x86_32.cu
similarity index 71%
rename from cpp/src/hash/murmur_hash.cu
rename to cpp/src/hash/murmurhash3_x86_32.cu
index 5765b83311e..a6ab301a86e 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmurhash3_x86_32.cu
@@ -17,7 +17,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/hashing/detail/hashing.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
@@ -30,10 +30,10 @@ namespace cudf {
 namespace hashing {
 namespace detail {
 
-std::unique_ptr<column> murmur_hash3_32(table_view const& input,
-                                        uint32_t seed,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> murmurhash3_x86_32(table_view const& input,
+                                           uint32_t seed,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   auto output = make_numeric_column(data_type(type_to_id<hash_value_type>()),
                                     input.num_rows(),
@@ -52,20 +52,20 @@ std::unique_ptr<column> murmur_hash3_32(table_view const& input,
   thrust::tabulate(rmm::exec_policy(stream),
                    output_view.begin<hash_value_type>(),
                    output_view.end<hash_value_type>(),
-                   row_hasher.device_hasher<MurmurHash3_32>(nullable, seed));
+                   row_hasher.device_hasher<MurmurHash3_x86_32>(nullable, seed));
 
   return output;
 }
 
 }  // namespace detail
 
-std::unique_ptr<column> murmur_hash3_32(table_view const& input,
-                                        uint32_t seed,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> murmurhash3_x86_32(table_view const& input,
+                                           uint32_t seed,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::murmur_hash3_32(input, seed, stream, mr);
+  return detail::murmurhash3_x86_32(input, seed, stream, mr);
 }
 
 }  // namespace hashing
diff --git a/cpp/src/hash/spark_murmur_hash.cu b/cpp/src/hash/spark_murmurhash3_x86_32.cu
similarity index 86%
rename from cpp/src/hash/spark_murmur_hash.cu
rename to cpp/src/hash/spark_murmurhash3_x86_32.cu
index b996cc5bec5..7568f2167a4 100644
--- a/cpp/src/hash/spark_murmur_hash.cu
+++ b/cpp/src/hash/spark_murmurhash3_x86_32.cu
@@ -36,11 +36,11 @@ namespace {
 using spark_hash_value_type = int32_t;
 
 template <typename Key, CUDF_ENABLE_IF(not cudf::is_nested<Key>())>
-struct SparkMurmurHash3_32 {
+struct SparkMurmurHash3_x86_32 {
   using result_type = spark_hash_value_type;
 
-  constexpr SparkMurmurHash3_32() = default;
-  constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
+  constexpr SparkMurmurHash3_x86_32() = default;
+  constexpr SparkMurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}
 
   [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
   {
@@ -131,55 +131,56 @@ struct SparkMurmurHash3_32 {
 };
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<bool>::operator()(bool const& key) const
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<bool>::operator()(
+  bool const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<int8_t>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<int8_t>::operator()(
   int8_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<uint8_t>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<uint8_t>::operator()(
   uint8_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<int16_t>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<int16_t>::operator()(
   int16_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<uint16_t>::operator()(
   uint16_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<float>::operator()(
   float const& key) const
 {
   return compute<float>(normalize_nans(key));
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<double>::operator()(
   double const& key) const
 {
   return compute<double>(normalize_nans(key));
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
   auto const data = reinterpret_cast<std::byte const*>(key.data());
@@ -188,21 +189,21 @@ spark_hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
   return compute<uint64_t>(key.value());
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal64>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
   return compute<uint64_t>(key.value());
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::operator()(
+spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
   // Generates the Spark MurmurHash3 hash value, mimicking the conversion:
@@ -266,9 +267,9 @@ spark_hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>
  * null.
  *
  * For additional differences such as special tail processing and decimal type
- * handling, refer to the SparkMurmurHash3_32 functor.
+ * handling, refer to the SparkMurmurHash3_x86_32 functor.
  *
- * @tparam hash_function Hash functor to use for hashing elements. Must be SparkMurmurHash3_32.
+ * @tparam hash_function Hash functor to use for hashing elements. Must be SparkMurmurHash3_x86_32.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  */
 template <template <typename> class hash_function, typename Nullate>
@@ -361,8 +362,8 @@ class spark_murmur_device_row_hasher {
   {
     // Error out if passed an unsupported hash_function
     static_assert(
-      std::is_base_of_v<SparkMurmurHash3_32<int>, hash_function<int>>,
-      "spark_murmur_device_row_hasher only supports the SparkMurmurHash3_32 hash function");
+      std::is_base_of_v<SparkMurmurHash3_x86_32<int>, hash_function<int>>,
+      "spark_murmur_device_row_hasher only supports the SparkMurmurHash3_x86_32 hash function");
   }
 
   Nullate const _check_nulls;
@@ -394,10 +395,10 @@ void check_hash_compatibility(table_view const& input)
 
 }  // namespace
 
-std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
-                                              uint32_t seed,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> spark_murmurhash3_x86_32(table_view const& input,
+                                                 uint32_t seed,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource* mr)
 {
   auto output = make_numeric_column(data_type(type_to_id<spark_hash_value_type>()),
                                     input.num_rows(),
@@ -420,20 +421,21 @@ std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
     rmm::exec_policy(stream),
     output_view.begin<spark_hash_value_type>(),
     output_view.end<spark_hash_value_type>(),
-    row_hasher.device_hasher<SparkMurmurHash3_32, spark_murmur_device_row_hasher>(nullable, seed));
+    row_hasher.device_hasher<SparkMurmurHash3_x86_32, spark_murmur_device_row_hasher>(nullable,
+                                                                                      seed));
 
   return output;
 }
 
 }  // namespace detail
 
-std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
-                                              uint32_t seed,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> spark_murmurhash3_x86_32(table_view const& input,
+                                                 uint32_t seed,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::spark_murmur_hash3_32(input, seed, stream, mr);
+  return detail::spark_murmurhash3_x86_32(input, seed, stream, mr);
 }
 
 }  // namespace hashing
diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh
index d61884f7514..87075a39ea3 100644
--- a/cpp/src/hash/unordered_multiset.cuh
+++ b/cpp/src/hash/unordered_multiset.cuh
@@ -20,7 +20,7 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index f34d1aab7eb..167ae332ac7 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -20,7 +20,7 @@
 #include <io/utilities/parsing_utils.cuh>
 
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/span.hpp>
@@ -185,7 +185,7 @@ __device__ field_descriptor next_field_descriptor(char const* begin,
                          false}
       : [&]() {
           auto const key_range = get_next_key(begin, end, opts.quotechar);
-          auto const key_hash  = cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{}(
+          auto const key_hash  = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
             cudf::string_view(key_range.first, key_range.second - key_range.first));
           auto const hash_col = col_map.find(key_hash);
           // Fall back to field index if not found (parsing error)
@@ -506,7 +506,7 @@ __global__ void collect_keys_info_kernel(parse_options_view const options,
       keys_info->column(0).element<uint64_t>(idx) = field_range.key_begin - data.begin();
       keys_info->column(1).element<uint16_t>(idx) = len;
       keys_info->column(2).element<uint32_t>(idx) =
-        cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{}(
+        cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
           cudf::string_view(field_range.key_begin, len));
     }
   }
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index 3a089f0dfdd..81474ed9ef3 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -23,8 +23,8 @@
 #include <cudf/detail/scatter.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 97fd8047277..72e38fd2e1c 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -63,7 +63,7 @@ struct hash_functor {
   column_device_view const& col;
   __device__ auto operator()(size_type idx) const
   {
-    return cudf::hashing::detail::MurmurHash3_32<T>{}(col.element<T>(idx));
+    return cudf::hashing::detail::MurmurHash3_x86_32<T>{}(col.element<T>(idx));
   }
 };
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 18dd442a9e4..a870d973dc1 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -18,8 +18,7 @@
 
 #include <io/utilities/column_buffer.hpp>
 
-#include <cudf/hashing/detail/hash_functions.cuh>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 
 namespace cudf {
 namespace io {
@@ -48,7 +47,7 @@ inline __device__ void gpuOutputString(volatile page_state_s* s,
     uint32_t constexpr hash_seed = 33;
     cudf::string_view const sv{ptr, static_cast<size_type>(len)};
     *static_cast<uint32_t*>(dstv) =
-      cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{hash_seed}(sv);
+      cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{hash_seed}(sv);
   } else {
     // Output string descriptor
     auto* dst   = static_cast<string_index_pair*>(dstv);
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index e4d131219a3..4c1b1ed98b1 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cudf/detail/join.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/join.hpp>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 2543d3a99cc..e1e8f65a674 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -22,7 +22,7 @@
 #include <cudf/detail/scatter.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/partitioning.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
@@ -724,6 +724,35 @@ struct dispatch_map_type {
 
 namespace detail {
 namespace {
+
+/**
+ * @brief  This hash function simply returns the value that is asked to be hash
+ * reinterpreted as the result_type of the functor.
+ */
+template <typename Key>
+struct IdentityHash {
+  using result_type = uint32_t;
+  IdentityHash()    = default;
+  constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
+
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
+  {
+    CUDF_UNREACHABLE("IdentityHash does not support this data type");
+  }
+
+  template <typename return_type = result_type>
+  constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
+    Key const& key) const
+  {
+    return static_cast<result_type>(key);
+  }
+
+ private:
+  uint32_t m_seed{0};
+};
+
 template <template <typename> class hash_function>
 std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
   table_view const& input,
@@ -789,10 +818,10 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
         if (!is_numeric(input.column(column_id).type()))
           CUDF_FAIL("IdentityHash does not support this data type");
       }
-      return detail::hash_partition<cudf::hashing::detail::IdentityHash>(
+      return detail::hash_partition<cudf::detail::IdentityHash>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     case (hash_id::HASH_MURMUR3):
-      return detail::hash_partition<cudf::hashing::detail::MurmurHash3_32>(
+      return detail::hash_partition<cudf::hashing::detail::MurmurHash3_x86_32>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     default: CUDF_FAIL("Unsupported hash function in hash_partition");
   }
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index ee8da4b6e06..e37f0686ac3 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -16,7 +16,6 @@
 
 #include <join/join_common_utils.cuh>
 
-#include <cudf/detail/join.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp
index 4e887b1199b..0cd2d8f4b14 100644
--- a/cpp/src/stream_compaction/stream_compaction_common.hpp
+++ b/cpp/src/stream_compaction/stream_compaction_common.hpp
@@ -15,7 +15,6 @@
  */
 #pragma once
 
-#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index 3276fcba9d5..1cf57dde475 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -24,7 +24,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sizes_to_offsets_iterator.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/strings/detail/strings_children.cuh>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -298,7 +298,7 @@ struct character_ngram_hash_fn : base_character_ngram_fn<character_ngram_hash_fn
 
   __device__ void process_ngram(cudf::string_view d_str, cudf::size_type offset) const
   {
-    auto const hasher = cudf::detail::MurmurHash3_32<cudf::string_view>{0};
+    auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{0};
     d_hashes[offset]  = hasher(d_str);
   }
 };
diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu
index 6961e39096a..05210b60154 100644
--- a/cpp/src/text/minhash.cu
+++ b/cpp/src/text/minhash.cu
@@ -25,7 +25,7 @@
 #include <cudf/detail/sequence.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -92,7 +92,7 @@ struct minhash_fn {
       // computing the substrings for each seed
       for (std::size_t seed_idx = 0; seed_idx < seeds.size(); ++seed_idx) {
         auto const hasher =
-          cudf::hashing::detail::MurmurHash3_32<cudf::string_view>{seeds[seed_idx]};
+          cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{seeds[seed_idx]};
         auto const hvalue = hasher(hash_str);
         cuda::atomic_ref<cudf::hash_value_type, cuda::thread_scope_block> ref{
           *(d_output + seed_idx)};
diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu
index 700dbd54e0d..413fb2497c0 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cu
+++ b/cpp/src/text/subword/bpe_tokenizer.cu
@@ -22,7 +22,6 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/strings/detail/combine.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
 #include <cudf/utilities/default_stream.hpp>
diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh
index 72f861ab887..b34f2dc4468 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cuh
+++ b/cpp/src/text/subword/bpe_tokenizer.cuh
@@ -21,7 +21,7 @@
 #include <hash/hash_allocator.cuh>
 
 #include <cudf/column/column.hpp>
-#include <cudf/hashing/detail/murmur_hash32.cuh>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 
 #include <cuco/static_map.cuh>
 
@@ -41,7 +41,7 @@ using merge_pairs_map_type = cuco::static_map<cudf::hash_value_type,
                                               cuda::thread_scope_device,
                                               hash_table_allocator_type>;
 
-using string_hasher_type = cudf::hashing::detail::MurmurHash3_32<cudf::string_view>;
+using string_hasher_type = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>;
 
 }  // namespace detail
 
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e15ebc95824..68ea3df3f3c 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -164,7 +164,8 @@ ConfigureTest(DATETIME_OPS_TEST datetime/datetime_ops_test.cpp)
 # ##################################################################################################
 # * hashing tests ---------------------------------------------------------------------------------
 ConfigureTest(
-  HASHING_TEST hashing/murmur3_test.cpp hashing/spark_murmur3_test.cpp hashing/md5_test.cpp
+  HASHING_TEST hashing/murmurhash3_x86_32_test.cpp hashing/spark_murmurhash3_x86_32_test.cpp
+  hashing/md5_test.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/tests/hashing/murmur3_test.cpp b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp
similarity index 91%
rename from cpp/tests/hashing/murmur3_test.cpp
rename to cpp/tests/hashing/murmurhash3_x86_32_test.cpp
index 999f656dcc0..a4b16550398 100644
--- a/cpp/tests/hashing/murmur3_test.cpp
+++ b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp
@@ -55,8 +55,8 @@ TEST_F(MurmurHashTest, MultiValue)
   auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col});
   auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col});
 
-  auto const output1 = cudf::hashing::murmur_hash3_32(input1);
-  auto const output2 = cudf::hashing::murmur_hash3_32(input2);
+  auto const output1 = cudf::hashing::murmurhash3_x86_32(input1);
+  auto const output2 = cudf::hashing::murmurhash3_x86_32(input2);
 
   EXPECT_EQ(input1.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
@@ -112,8 +112,8 @@ TEST_F(MurmurHashTest, MultiValueNulls)
   auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1});
   auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2});
 
-  auto const output1 = cudf::hashing::murmur_hash3_32(input1);
-  auto const output2 = cudf::hashing::murmur_hash3_32(input2);
+  auto const output1 = cudf::hashing::murmurhash3_x86_32(input1);
+  auto const output2 = cudf::hashing::murmurhash3_x86_32(input2);
 
   EXPECT_EQ(input1.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
@@ -139,7 +139,7 @@ TEST_F(MurmurHashTest, BasicList)
                           -1023787369,
                           -1023787369};
 
-  auto const output = cudf::hashing::murmur_hash3_32(input);
+  auto const output = cudf::hashing::murmurhash3_x86_32(input);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 
   auto const expect_seeded = ICW{1607594268u,
@@ -155,7 +155,7 @@ TEST_F(MurmurHashTest, BasicList)
                                  1756855002u,
                                  1756855002u};
 
-  auto const seeded_output = cudf::hashing::murmur_hash3_32(input, 15);
+  auto const seeded_output = cudf::hashing::murmurhash3_x86_32(input, 15);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
 }
 
@@ -179,7 +179,7 @@ TEST_F(MurmurHashTest, NullableList)
                     -1205248335,
                     -2023148682};
 
-  auto const output = cudf::hashing::murmur_hash3_32(cudf::table_view({col}));
+  auto const output = cudf::hashing::murmurhash3_x86_32(cudf::table_view({col}));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 
   auto const expect_seeded = ICW{2271820643u,
@@ -194,7 +194,7 @@ TEST_F(MurmurHashTest, NullableList)
                                  595138041u,
                                  2271820578u};
 
-  auto const seeded_output = cudf::hashing::murmur_hash3_32(cudf::table_view({col}), 31);
+  auto const seeded_output = cudf::hashing::murmurhash3_x86_32(cudf::table_view({col}), 31);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
 }
 
@@ -236,7 +236,7 @@ TEST_F(MurmurHashTest, ListOfStruct)
                                                                  -319840811,
                                                                  -319840811};
 
-  auto const output = cudf::hashing::murmur_hash3_32(cudf::table_view({*list_column}));
+  auto const output = cudf::hashing::murmurhash3_x86_32(cudf::table_view({*list_column}));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 
   auto expect_seeded = cudf::test::fixed_width_column_wrapper<uint32_t>{81710442u,
@@ -257,7 +257,8 @@ TEST_F(MurmurHashTest, ListOfStruct)
                                                                         1696730835u,
                                                                         1696730835u};
 
-  auto const seeded_output = cudf::hashing::murmur_hash3_32(cudf::table_view({*list_column}), 619);
+  auto const seeded_output =
+    cudf::hashing::murmurhash3_x86_32(cudf::table_view({*list_column}), 619);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
 }
 
@@ -304,7 +305,7 @@ TEST_F(MurmurHashTest, ListOfEmptyStruct)
                                                                  3954409052u,
                                                                  3954409052u};
 
-  auto output = cudf::hashing::murmur_hash3_32(cudf::table_view({*list_column}));
+  auto output = cudf::hashing::murmurhash3_x86_32(cudf::table_view({*list_column}));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 }
 
@@ -329,7 +330,7 @@ TEST_F(MurmurHashTest, EmptyDeepList)
   auto expect = cudf::test::fixed_width_column_wrapper<uint32_t>{
     2271818677u, 2271818677u, 2271818614u, 2271818614u};
 
-  auto output = cudf::hashing::murmur_hash3_32(cudf::table_view({*list_column}));
+  auto output = cudf::hashing::murmurhash3_x86_32(cudf::table_view({*list_column}));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 }
 
@@ -344,8 +345,8 @@ TYPED_TEST(MurmurHashTestTyped, Equality)
   auto const input = cudf::table_view({col});
 
   // Hash of same input should be equal
-  auto const output1 = cudf::hashing::murmur_hash3_32(input);
-  auto const output2 = cudf::hashing::murmur_hash3_32(input);
+  auto const output1 = cudf::hashing::murmurhash3_x86_32(input);
+  auto const output2 = cudf::hashing::murmurhash3_x86_32(input);
 
   EXPECT_EQ(input.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
@@ -362,8 +363,8 @@ TYPED_TEST(MurmurHashTestTyped, EqualityNulls)
   auto const input1 = cudf::table_view({col1});
   auto const input2 = cudf::table_view({col2});
 
-  auto const output1 = cudf::hashing::murmur_hash3_32(input1);
-  auto const output2 = cudf::hashing::murmur_hash3_32(input2);
+  auto const output1 = cudf::hashing::murmurhash3_x86_32(input1);
+  auto const output2 = cudf::hashing::murmurhash3_x86_32(input2);
 
   EXPECT_EQ(input1.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
@@ -393,9 +394,9 @@ TYPED_TEST(MurmurHashTestFloatTyped, TestExtremes)
   auto const table_col_neg_zero = cudf::table_view({col_neg_zero});
   auto const table_col_neg_nan  = cudf::table_view({col_neg_nan});
 
-  auto const hash_col          = cudf::hashing::murmur_hash3_32(table_col);
-  auto const hash_col_neg_zero = cudf::hashing::murmur_hash3_32(table_col_neg_zero);
-  auto const hash_col_neg_nan  = cudf::hashing::murmur_hash3_32(table_col_neg_nan);
+  auto const hash_col          = cudf::hashing::murmurhash3_x86_32(table_col);
+  auto const hash_col_neg_zero = cudf::hashing::murmurhash3_x86_32(table_col_neg_zero);
+  auto const hash_col_neg_nan  = cudf::hashing::murmurhash3_x86_32(table_col_neg_nan);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_col, *hash_col_neg_zero, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_col, *hash_col_neg_nan, verbosity);
diff --git a/cpp/tests/hashing/spark_murmur3_test.cpp b/cpp/tests/hashing/spark_murmurhash3_x86_32_test.cpp
similarity index 90%
rename from cpp/tests/hashing/spark_murmur3_test.cpp
rename to cpp/tests/hashing/spark_murmurhash3_x86_32_test.cpp
index af970d817ce..c228c1e6378 100644
--- a/cpp/tests/hashing/spark_murmur3_test.cpp
+++ b/cpp/tests/hashing/spark_murmurhash3_x86_32_test.cpp
@@ -37,8 +37,8 @@ TYPED_TEST(SparkMurmurHashTestTyped, Equality)
   auto const input = cudf::table_view({col});
 
   // Hash of same input should be equal
-  auto const spark_output1 = cudf::hashing::spark_murmur_hash3_32(input, 0);
-  auto const spark_output2 = cudf::hashing::spark_murmur_hash3_32(input);
+  auto const spark_output1 = cudf::hashing::spark_murmurhash3_x86_32(input, 0);
+  auto const spark_output2 = cudf::hashing::spark_murmurhash3_x86_32(input);
 
   EXPECT_EQ(input.num_rows(), spark_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(spark_output1->view(), spark_output2->view());
@@ -55,8 +55,8 @@ TYPED_TEST(SparkMurmurHashTestTyped, EqualityNulls)
   auto const input1 = cudf::table_view({col1});
   auto const input2 = cudf::table_view({col2});
 
-  auto const spark_output1 = cudf::hashing::spark_murmur_hash3_32(input1, 0);
-  auto const spark_output2 = cudf::hashing::spark_murmur_hash3_32(input2);
+  auto const spark_output1 = cudf::hashing::spark_murmurhash3_x86_32(input1, 0);
+  auto const spark_output2 = cudf::hashing::spark_murmurhash3_x86_32(input2);
 
   EXPECT_EQ(input1.num_rows(), spark_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(spark_output1->view(), spark_output2->view());
@@ -87,8 +87,8 @@ TYPED_TEST(SparkMurmurHashTestFloatTyped, TestExtremes)
   auto const table_col_neg_nan  = cudf::table_view({col_neg_nan});
 
   // Spark hash is sensitive to 0 and -0
-  auto const spark_col         = cudf::hashing::spark_murmur_hash3_32(table_col, 0);
-  auto const spark_col_neg_nan = cudf::hashing::spark_murmur_hash3_32(table_col_neg_nan);
+  auto const spark_col         = cudf::hashing::spark_murmurhash3_x86_32(table_col, 0);
+  auto const spark_col_neg_nan = cudf::hashing::spark_murmurhash3_x86_32(table_col_neg_nan);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*spark_col, *spark_col_neg_nan);
 }
@@ -144,8 +144,8 @@ TEST_F(SparkMurmurHashTest, MultiValueNulls)
 
   auto const input1        = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1});
   auto const input2        = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2});
-  auto const spark_output1 = cudf::hashing::spark_murmur_hash3_32(input1, 0);
-  auto const spark_output2 = cudf::hashing::spark_murmur_hash3_32(input2);
+  auto const spark_output1 = cudf::hashing::spark_murmurhash3_x86_32(input1, 0);
+  auto const spark_output2 = cudf::hashing::spark_murmurhash3_x86_32(input2);
 
   EXPECT_EQ(input1.num_rows(), spark_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(spark_output1->view(), spark_output2->view());
@@ -297,27 +297,34 @@ TEST_F(SparkMurmurHashTest, MultiValueWithSeeds)
     numeric::scale_type{-11});
 
   auto const hash_structs =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({structs_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({structs_col}), 42);
   auto const hash_strings =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({strings_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({strings_col}), 42);
   auto const hash_doubles =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({doubles_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({doubles_col}), 42);
   auto const hash_timestamps =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({timestamps_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({timestamps_col}), 42);
   auto const hash_decimal64 =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({decimal64_col}), 42);
-  auto const hash_longs  = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({longs_col}), 42);
-  auto const hash_floats = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({floats_col}), 42);
-  auto const hash_dates  = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({dates_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({decimal64_col}), 42);
+  auto const hash_longs =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({longs_col}), 42);
+  auto const hash_floats =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({floats_col}), 42);
+  auto const hash_dates =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({dates_col}), 42);
   auto const hash_decimal32 =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({decimal32_col}), 42);
-  auto const hash_ints   = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({ints_col}), 42);
-  auto const hash_shorts = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({shorts_col}), 42);
-  auto const hash_bytes  = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({bytes_col}), 42);
-  auto const hash_bools1 = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({bools_col1}), 42);
-  auto const hash_bools2 = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({bools_col2}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({decimal32_col}), 42);
+  auto const hash_ints = cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({ints_col}), 42);
+  auto const hash_shorts =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({shorts_col}), 42);
+  auto const hash_bytes =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({bytes_col}), 42);
+  auto const hash_bools1 =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({bools_col1}), 42);
+  auto const hash_bools2 =
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({bools_col2}), 42);
   auto const hash_decimal128 =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({decimal128_col}), 42);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({decimal128_col}), 42);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_structs, hash_structs_expected, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_strings, hash_strings_expected, verbosity);
@@ -349,7 +356,7 @@ TEST_F(SparkMurmurHashTest, MultiValueWithSeeds)
                                                 bytes_col,
                                                 bools_col2,
                                                 decimal128_col});
-  auto const hash_combined  = cudf::hashing::spark_murmur_hash3_32(combined_table, 42);
+  auto const hash_combined  = cudf::hashing::spark_murmurhash3_x86_32(combined_table, 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_combined, hash_combined_expected, verbosity);
 }
 
@@ -375,7 +382,7 @@ TEST_F(SparkMurmurHashTest, StringsWithSeed)
      "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"});
 
   auto const hash_strings =
-    cudf::hashing::spark_murmur_hash3_32(cudf::table_view({strings_col}), 314);
+    cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({strings_col}), 314);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_strings, hash_strings_expected_seed_314, verbosity);
 }
@@ -451,7 +458,7 @@ TEST_F(SparkMurmurHashTest, ListValues)
                                                                 -912918097,
                                                                 -912918097};
 
-  auto output = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({*list_column}), 42);
+  auto output = cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({*list_column}), 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 }
 
@@ -503,7 +510,7 @@ TEST_F(SparkMurmurHashTest, StructOfListValues)
   auto expect = cudf::test::fixed_width_column_wrapper<int32_t>{
     42, 59727262, -559580957, -559580957, -559580957, -559580957, 170038658};
 
-  auto output = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({struct_column}), 42);
+  auto output = cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({struct_column}), 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
 }
 
@@ -556,14 +563,14 @@ TEST_F(SparkMurmurHashTest, ListOfStructValues)
 
   // TODO: Lists of structs are not yet supported. Once support is added,
   // remove this EXPECT_THROW and uncomment the rest of this test.
-  EXPECT_THROW(cudf::hashing::spark_murmur_hash3_32(cudf::table_view({*list_column}), 42),
+  EXPECT_THROW(cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({*list_column}), 42),
                cudf::logic_error);
 
   /*
   auto expect = cudf::test::fixed_width_column_wrapper<int32_t>{
     59727262, 42, 42, -559580957, -559580957, -912918097, 1092624418, 170038658};
 
-  auto output = cudf::hashing::spark_murmur_hash3_32(cudf::table_view({*list_column}), 42);
+  auto output = cudf::hashing::spark_murmurhash3_x86_32(cudf::table_view({*list_column}), 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
   */
 }

From 9d7598a9f93e424d9502f558d444e28ca68cb89b Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 13 Jul 2023 20:03:23 -0400
Subject: [PATCH 6/8] remove forward reference

---
 .../cudf/hashing/detail/default_hash.cuh      |  3 --
 cpp/src/hash/spark_murmurhash3_x86_32.cu      | 40 +++++++++----------
 cpp/src/partitioning/partitioning.cu          |  4 +-
 cpp/src/text/generate_ngrams.cu               |  1 -
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/cpp/include/cudf/hashing/detail/default_hash.cuh b/cpp/include/cudf/hashing/detail/default_hash.cuh
index 21aad0ce91e..90d467a31cb 100644
--- a/cpp/include/cudf/hashing/detail/default_hash.cuh
+++ b/cpp/include/cudf/hashing/detail/default_hash.cuh
@@ -20,9 +20,6 @@
 
 namespace cudf::hashing::detail {
 
-template <typename Key>
-struct MurmurHash3_x86_32;
-
 template <typename Key>
 using default_hash = MurmurHash3_x86_32<Key>;
 
diff --git a/cpp/src/hash/spark_murmurhash3_x86_32.cu b/cpp/src/hash/spark_murmurhash3_x86_32.cu
index 7568f2167a4..15435723c54 100644
--- a/cpp/src/hash/spark_murmurhash3_x86_32.cu
+++ b/cpp/src/hash/spark_murmurhash3_x86_32.cu
@@ -36,11 +36,11 @@ namespace {
 using spark_hash_value_type = int32_t;
 
 template <typename Key, CUDF_ENABLE_IF(not cudf::is_nested<Key>())>
-struct SparkMurmurHash3_x86_32 {
+struct Spark_MurmurHash3_x86_32 {
   using result_type = spark_hash_value_type;
 
-  constexpr SparkMurmurHash3_x86_32() = default;
-  constexpr SparkMurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}
+  constexpr Spark_MurmurHash3_x86_32() = default;
+  constexpr Spark_MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}
 
   [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
   {
@@ -131,56 +131,56 @@ struct SparkMurmurHash3_x86_32 {
 };
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<bool>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<bool>::operator()(
   bool const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<int8_t>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<int8_t>::operator()(
   int8_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<uint8_t>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<uint8_t>::operator()(
   uint8_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<int16_t>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<int16_t>::operator()(
   int16_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<uint16_t>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<uint16_t>::operator()(
   uint16_t const& key) const
 {
   return compute<uint32_t>(key);
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<float>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<float>::operator()(
   float const& key) const
 {
   return compute<float>(normalize_nans(key));
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<double>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<double>::operator()(
   double const& key) const
 {
   return compute<double>(normalize_nans(key));
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<cudf::string_view>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
   auto const data = reinterpret_cast<std::byte const*>(key.data());
@@ -189,21 +189,21 @@ spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<cudf::string_vie
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal32>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<numeric::decimal32>::operator()(
   numeric::decimal32 const& key) const
 {
   return compute<uint64_t>(key.value());
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal64>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<numeric::decimal64>::operator()(
   numeric::decimal64 const& key) const
 {
   return compute<uint64_t>(key.value());
 }
 
 template <>
-spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal128>::operator()(
+spark_hash_value_type __device__ inline Spark_MurmurHash3_x86_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
   // Generates the Spark MurmurHash3 hash value, mimicking the conversion:
@@ -267,9 +267,9 @@ spark_hash_value_type __device__ inline SparkMurmurHash3_x86_32<numeric::decimal
  * null.
  *
  * For additional differences such as special tail processing and decimal type
- * handling, refer to the SparkMurmurHash3_x86_32 functor.
+ * handling, refer to the Spark_MurmurHash3_x86_32 functor.
  *
- * @tparam hash_function Hash functor to use for hashing elements. Must be SparkMurmurHash3_x86_32.
+ * @tparam hash_function Hash functor to use for hashing elements. Must be Spark_MurmurHash3_x86_32.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  */
 template <template <typename> class hash_function, typename Nullate>
@@ -362,8 +362,8 @@ class spark_murmur_device_row_hasher {
   {
     // Error out if passed an unsupported hash_function
     static_assert(
-      std::is_base_of_v<SparkMurmurHash3_x86_32<int>, hash_function<int>>,
-      "spark_murmur_device_row_hasher only supports the SparkMurmurHash3_x86_32 hash function");
+      std::is_base_of_v<Spark_MurmurHash3_x86_32<int>, hash_function<int>>,
+      "spark_murmur_device_row_hasher only supports the Spark_MurmurHash3_x86_32 hash function");
   }
 
   Nullate const _check_nulls;
@@ -421,8 +421,8 @@ std::unique_ptr<column> spark_murmurhash3_x86_32(table_view const& input,
     rmm::exec_policy(stream),
     output_view.begin<spark_hash_value_type>(),
     output_view.end<spark_hash_value_type>(),
-    row_hasher.device_hasher<SparkMurmurHash3_x86_32, spark_murmur_device_row_hasher>(nullable,
-                                                                                      seed));
+    row_hasher.device_hasher<Spark_MurmurHash3_x86_32, spark_murmur_device_row_hasher>(nullable,
+                                                                                       seed));
 
   return output;
 }
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index e1e8f65a674..cadd85284b8 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -726,8 +726,8 @@ namespace detail {
 namespace {
 
 /**
- * @brief  This hash function simply returns the value that is asked to be hash
- * reinterpreted as the result_type of the functor.
+ * @brief This hash function simply returns the input value cast to the
+ * result_type of the functor.
  */
 template <typename Key>
 struct IdentityHash {
diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index 1cf57dde475..78354c75c25 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -20,7 +20,6 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy_if.cuh>
-#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sizes_to_offsets_iterator.cuh>

From c71df3ab54de4ec7ab0fe68d637f3b62737bbb04 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Fri, 14 Jul 2023 08:27:59 -0400
Subject: [PATCH 7/8] fix from merge changes

---
 cpp/src/io/orc/dict_enc.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu
index c57962700f6..e06b28bc52c 100644
--- a/cpp/src/io/orc/dict_enc.cu
+++ b/cpp/src/io/orc/dict_enc.cu
@@ -98,7 +98,7 @@ struct hash_functor {
   column_device_view const& col;
   __device__ auto operator()(size_type idx) const
   {
-    return cudf::detail::MurmurHash3_32<string_view>{}(col.element<string_view>(idx));
+    return cudf::hashing::detail::MurmurHash3_x86_32<string_view>{}(col.element<string_view>(idx));
   }
 };
 

From e907c0d28d7aaee38fdaac34e9ca593622bf97f5 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Fri, 14 Jul 2023 14:51:05 -0400
Subject: [PATCH 8/8] add doxygen and remove m_seed

---
 cpp/include/cudf/hashing/detail/default_hash.cuh | 9 +++++++++
 cpp/src/partitioning/partitioning.cu             | 9 +++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/cpp/include/cudf/hashing/detail/default_hash.cuh b/cpp/include/cudf/hashing/detail/default_hash.cuh
index 90d467a31cb..37e13d8842f 100644
--- a/cpp/include/cudf/hashing/detail/default_hash.cuh
+++ b/cpp/include/cudf/hashing/detail/default_hash.cuh
@@ -20,6 +20,15 @@
 
 namespace cudf::hashing::detail {
 
+/**
+ * @brief The default hash algorithm for use within libcudf internal functions
+ *
+ * This is declared here so it may be changed to another algorithm without modifying
+ * all those places that use it. Internal function implementations are encourage to
+ * use the `cudf::hashing::detail::default_hash` where possible.
+ *
+ * @tparam Key The key type for use by the hash class
+ */
 template <typename Key>
 using default_hash = MurmurHash3_x86_32<Key>;
 
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index cadd85284b8..0d94db110b4 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -731,9 +731,9 @@ namespace {
  */
 template <typename Key>
 struct IdentityHash {
-  using result_type = uint32_t;
-  IdentityHash()    = default;
-  constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
+  using result_type        = uint32_t;
+  constexpr IdentityHash() = default;
+  constexpr IdentityHash(uint32_t) {}
 
   template <typename return_type = result_type>
   constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
@@ -748,9 +748,6 @@ struct IdentityHash {
   {
     return static_cast<result_type>(key);
   }
-
- private:
-  uint32_t m_seed{0};
 };
 
 template <template <typename> class hash_function>