From 1376774180c4cc62db2f9a51dc859144550dbd03 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanlee@nvidia.com>
Date: Wed, 26 Jan 2022 16:31:20 -0800
Subject: [PATCH 1/4] Merge kernels into compute_bytes and cleanup kernel code

---
 .../cudf/detail/utilities/hash_functions.cuh  | 163 ++++++------------
 1 file changed, 50 insertions(+), 113 deletions(-)
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index b5ca5a3590e..f9023fcf32a 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -150,36 +150,47 @@ struct MurmurHash3_32 {
   template <typename TKey>
   result_type __device__ inline compute(TKey const& key) const
   {
-    constexpr int len         = sizeof(argument_type);
-    uint8_t const* const data = reinterpret_cast<uint8_t const*>(&key);
-    constexpr int nblocks     = len / 4;
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(TKey));
+  }
+
+  result_type __device__ compute_bytes(std::byte const* const data, cudf::size_type const len) const
+  {
+    cudf::size_type const nblocks     = len / 4;
+    cudf::size_type const tail_offset = nblocks * 4;
+    result_type h1                    = m_seed;
+    constexpr uint32_t c1             = 0xcc9e2d51;
+    constexpr uint32_t c2             = 0x1b873593;
+    constexpr uint32_t c3             = 0xe6546b64;
+    constexpr uint32_t rot_c1         = 15;
+    constexpr uint32_t rot_c2         = 13;
+    auto getblock32                   = [] __device__(uint32_t const* p, int i) -> uint32_t {
+      // Individual byte reads for unaligned accesses (very likely for strings)
+      auto q = (uint8_t const*)(p + i);
+      return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+    };
 
-    uint32_t h1           = m_seed;
-    constexpr uint32_t c1 = 0xcc9e2d51;
-    constexpr uint32_t c2 = 0x1b873593;
     //----------
     // body
-    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data + nblocks * 4);
-    for (int i = -nblocks; i; i++) {
-      uint32_t k1 = blocks[i];  // getblock32(blocks,i);
+    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data);
+    for (int i = 0; i < nblocks; i++) {
+      uint32_t k1 = getblock32(blocks, i);
       k1 *= c1;
-      k1 = rotl32(k1, 15);
+      k1 = rotl32(k1, rot_c1);
       k1 *= c2;
       h1 ^= k1;
-      h1 = rotl32(h1, 13);
-      h1 = h1 * 5 + 0xe6546b64;
+      h1 = rotl32(h1, rot_c2);
+      h1 = h1 * 5 + c3;
     }
     //----------
     // tail
-    uint8_t const* tail = reinterpret_cast<uint8_t const*>(data + nblocks * 4);
-    uint32_t k1         = 0;
+    uint32_t k1 = 0;
     switch (len & 3) {
-      case 3: k1 ^= tail[2] << 16;
-      case 2: k1 ^= tail[1] << 8;
+      case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16;
+      case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8;
       case 1:
-        k1 ^= tail[0];
+        k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
         k1 *= c1;
-        k1 = rotl32(k1, 15);
+        k1 = rotl32(k1, rot_c1);
         k1 *= c2;
         h1 ^= k1;
     };
@@ -207,49 +218,7 @@ template <>
 hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
-  auto const len        = key.size_bytes();
-  uint8_t const* data   = reinterpret_cast<uint8_t const*>(key.data());
-  int const nblocks     = len / 4;
-  result_type h1        = m_seed;
-  constexpr uint32_t c1 = 0xcc9e2d51;
-  constexpr uint32_t c2 = 0x1b873593;
-  auto getblock32       = [] __device__(uint32_t const* p, int i) -> uint32_t {
-    // Individual byte reads for unaligned accesses (very likely)
-    auto q = (uint8_t const*)(p + i);
-    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
-  };
-
-  //----------
-  // body
-  uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data + nblocks * 4);
-  for (int i = -nblocks; i; i++) {
-    uint32_t k1 = getblock32(blocks, i);
-    k1 *= c1;
-    k1 = rotl32(k1, 15);
-    k1 *= c2;
-    h1 ^= k1;
-    h1 = rotl32(h1, 13);
-    h1 = h1 * 5 + 0xe6546b64;
-  }
-  //----------
-  // tail
-  uint8_t const* tail = reinterpret_cast<uint8_t const*>(data + nblocks * 4);
-  uint32_t k1         = 0;
-  switch (len & 3) {
-    case 3: k1 ^= tail[2] << 16;
-    case 2: k1 ^= tail[1] << 8;
-    case 1:
-      k1 ^= tail[0];
-      k1 *= c1;
-      k1 = rotl32(k1, 15);
-      k1 *= c2;
-      h1 ^= k1;
-  };
-  //----------
-  // finalization
-  h1 ^= len;
-  h1 = fmix32(h1);
-  return h1;
+  return this->compute_bytes(reinterpret_cast<std::byte const*>(key.data()), key.size_bytes());
 }
 
 template <>
@@ -346,23 +315,30 @@ struct SparkMurmurHash3_32 {
 
   result_type __device__ compute_bytes(std::byte const* const data, cudf::size_type const len) const
   {
-    constexpr cudf::size_type block_size = sizeof(uint32_t) / sizeof(std::byte);
-    cudf::size_type const nblocks        = len / block_size;
-    uint32_t h1                          = m_seed;
-    constexpr uint32_t c1                = 0xcc9e2d51;
-    constexpr uint32_t c2                = 0x1b873593;
+    cudf::size_type const nblocks = len / 4;
+    uint32_t h1                   = m_seed;
+    constexpr uint32_t c1         = 0xcc9e2d51;
+    constexpr uint32_t c2         = 0x1b873593;
+    constexpr uint32_t c3         = 0xe6546b64;
+    constexpr uint32_t rot_c1     = 15;
+    constexpr uint32_t rot_c2     = 13;
+    auto getblock32               = [] __device__(uint32_t const* p, int i) -> uint32_t {
+      // Individual byte reads for unaligned accesses (very likely for strings)
+      auto q = (uint8_t const*)(p + i);
+      return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+    };
 
     //----------
     // Process all four-byte chunks
     uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data);
     for (cudf::size_type i = 0; i < nblocks; i++) {
-      uint32_t k1 = blocks[i];
+      uint32_t k1 = getblock32(blocks, i);
       k1 *= c1;
-      k1 = rotl32(k1, 15);
+      k1 = rotl32(k1, rot_c1);
       k1 *= c2;
       h1 ^= k1;
-      h1 = rotl32(h1, 13);
-      h1 = h1 * 5 + 0xe6546b64;
+      h1 = rotl32(h1, rot_c2);
+      h1 = h1 * 5 + c3;
     }
     //----------
     // Process remaining bytes that do not fill a four-byte chunk using Spark's approach
@@ -374,11 +350,11 @@ struct SparkMurmurHash3_32 {
       // signedness when casting byte-to-int, but C++ does not.
       uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
       k1 *= c1;
-      k1 = rotl32(k1, 15);
+      k1 = rotl32(k1, rot_c1);
       k1 *= c2;
       h1 ^= k1;
-      h1 = rotl32(h1, 13);
-      h1 = h1 * 5 + 0xe6546b64;
+      h1 = rotl32(h1, rot_c2);
+      h1 = h1 * 5 + c3;
     }
     //----------
     // finalization
@@ -501,46 +477,7 @@ template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
-  auto const len        = key.size_bytes();
-  int8_t const* data    = reinterpret_cast<int8_t const*>(key.data());
-  int const nblocks     = len / 4;
-  result_type h1        = m_seed;
-  constexpr uint32_t c1 = 0xcc9e2d51;
-  constexpr uint32_t c2 = 0x1b873593;
-  auto getblock32       = [] __device__(uint32_t const* p, int i) -> uint32_t {
-    // Individual byte reads for unaligned accesses (very likely)
-    auto q = (const uint8_t*)(p + i);
-    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
-  };
-
-  //----------
-  // body
-  uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data + nblocks * 4);
-  for (int i = -nblocks; i; i++) {
-    uint32_t k1 = getblock32(blocks, i);
-    k1 *= c1;
-    k1 = rotl32(k1, 15);
-    k1 *= c2;
-    h1 ^= k1;
-    h1 = rotl32(h1, 13);
-    h1 = h1 * 5 + 0xe6546b64;
-  }
-  //----------
-  // Spark's byte by byte tail processing
-  for (int i = nblocks * 4; i < len; i++) {
-    uint32_t k1 = data[i];
-    k1 *= c1;
-    k1 = rotl32(k1, 15);
-    k1 *= c2;
-    h1 ^= k1;
-    h1 = rotl32(h1, 13);
-    h1 = h1 * 5 + 0xe6546b64;
-  }
-  //----------
-  // finalization
-  h1 ^= len;
-  h1 = fmix32(h1);
-  return h1;
+  return this->compute_bytes(reinterpret_cast<std::byte const*>(key.data()), key.size_bytes());
 }
 
 template <>

From db648d779c1c3243886afa31db0e9358b86bf38c Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanlee@nvidia.com>
Date: Fri, 28 Jan 2022 14:52:08 -0800
Subject: [PATCH 2/4] formatting and code cleanup

---
 .../cudf/detail/utilities/hash_functions.cuh  | 123 ++++++++++--------
 1 file changed, 67 insertions(+), 56 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index f9023fcf32a..de1c444989a 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -105,6 +105,14 @@ struct MurmurHash3_32 {
     return h;
   }
 
+  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
+                                                      cudf::size_type offset) const
+  {
+    // Individual byte reads for unaligned accesses (very likely for strings)
+    auto const q = reinterpret_cast<uint8_t const*>(data + offset);
+    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+  }
+
   /* Copyright 2005-2014 Daniel James.
    *
    * Use, modification and distribution is subject to the Boost Software
@@ -122,7 +130,7 @@ struct MurmurHash3_32 {
    *
    * @returns A hash value that intelligently combines the lhs and rhs hash values
    */
-  __device__ inline result_type hash_combine(result_type lhs, result_type rhs)
+  [[nodiscard]] __device__ inline result_type hash_combine(result_type lhs, result_type rhs)
   {
     result_type combined{lhs};
 
@@ -131,7 +139,10 @@ struct MurmurHash3_32 {
     return combined;
   }
 
-  result_type __device__ inline operator()(Key const& key) const { return compute(key); }
+  [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
+  {
+    return compute(key);
+  }
 
   // compute wrapper for floating point types
   template <typename T, std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
@@ -147,33 +158,27 @@ struct MurmurHash3_32 {
     }
   }
 
-  template <typename TKey>
-  result_type __device__ inline compute(TKey const& key) const
+  template <typename T>
+  result_type __device__ inline compute(T const& key) const
   {
-    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(TKey));
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
   }
 
-  result_type __device__ compute_bytes(std::byte const* const data, cudf::size_type const len) const
+  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
   {
-    cudf::size_type const nblocks     = len / 4;
-    cudf::size_type const tail_offset = nblocks * 4;
-    result_type h1                    = m_seed;
-    constexpr uint32_t c1             = 0xcc9e2d51;
-    constexpr uint32_t c2             = 0x1b873593;
-    constexpr uint32_t c3             = 0xe6546b64;
-    constexpr uint32_t rot_c1         = 15;
-    constexpr uint32_t rot_c2         = 13;
-    auto getblock32                   = [] __device__(uint32_t const* p, int i) -> uint32_t {
-      // Individual byte reads for unaligned accesses (very likely for strings)
-      auto q = (uint8_t const*)(p + i);
-      return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
-    };
-
-    //----------
-    // body
-    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data);
-    for (int i = 0; i < nblocks; i++) {
-      uint32_t k1 = getblock32(blocks, i);
+    constexpr cudf::size_type BLOCK_SIZE = 4;
+    cudf::size_type const nblocks        = len / BLOCK_SIZE;
+    cudf::size_type const tail_offset    = nblocks * BLOCK_SIZE;
+    result_type h1                       = m_seed;
+    constexpr uint32_t c1                = 0xcc9e2d51;
+    constexpr uint32_t c2                = 0x1b873593;
+    constexpr uint32_t c3                = 0xe6546b64;
+    constexpr uint32_t rot_c1            = 15;
+    constexpr uint32_t rot_c2            = 13;
+
+    // Process all four-byte chunks.
+    for (cudf::size_type i = 0; i < nblocks; i++) {
+      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
       k1 *= c1;
       k1 = rotl32(k1, rot_c1);
       k1 *= c2;
@@ -181,10 +186,10 @@ struct MurmurHash3_32 {
       h1 = rotl32(h1, rot_c2);
       h1 = h1 * 5 + c3;
     }
-    //----------
-    // tail
+
+    // Process remaining bytes that do not fill a four-byte chunk.
     uint32_t k1 = 0;
-    switch (len & 3) {
+    switch (len % 4) {
       case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16;
       case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8;
       case 1:
@@ -194,8 +199,8 @@ struct MurmurHash3_32 {
         k1 *= c2;
         h1 ^= k1;
     };
-    //----------
-    // finalization
+
+    // Finalize hash.
     h1 ^= len;
     h1 = fmix32(h1);
     return h1;
@@ -218,7 +223,9 @@ template <>
 hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
-  return this->compute_bytes(reinterpret_cast<std::byte const*>(key.data()), key.size_bytes());
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return this->compute_bytes(data, len);
 }
 
 template <>
@@ -307,32 +314,34 @@ struct SparkMurmurHash3_32 {
     }
   }
 
-  template <typename TKey>
-  result_type __device__ inline compute(TKey const& key) const
+  template <typename T>
+  result_type __device__ inline compute(T const& key) const
   {
-    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(TKey));
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
   }
 
-  result_type __device__ compute_bytes(std::byte const* const data, cudf::size_type const len) const
+  [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
+                                                      cudf::size_type offset) const
   {
-    cudf::size_type const nblocks = len / 4;
-    uint32_t h1                   = m_seed;
-    constexpr uint32_t c1         = 0xcc9e2d51;
-    constexpr uint32_t c2         = 0x1b873593;
-    constexpr uint32_t c3         = 0xe6546b64;
-    constexpr uint32_t rot_c1     = 15;
-    constexpr uint32_t rot_c2     = 13;
-    auto getblock32               = [] __device__(uint32_t const* p, int i) -> uint32_t {
-      // Individual byte reads for unaligned accesses (very likely for strings)
-      auto q = (uint8_t const*)(p + i);
-      return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
-    };
+    // Individual byte reads for unaligned accesses (very likely for strings)
+    auto q = reinterpret_cast<uint8_t const*>(data + offset);
+    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+  }
 
-    //----------
-    // Process all four-byte chunks
-    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data);
+  result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
+  {
+    constexpr cudf::size_type BLOCK_SIZE = 4;
+    cudf::size_type const nblocks        = len / BLOCK_SIZE;
+    result_type h1                       = m_seed;
+    constexpr uint32_t c1                = 0xcc9e2d51;
+    constexpr uint32_t c2                = 0x1b873593;
+    constexpr uint32_t c3                = 0xe6546b64;
+    constexpr uint32_t rot_c1            = 15;
+    constexpr uint32_t rot_c2            = 13;
+
+    // Process all four-byte chunks.
     for (cudf::size_type i = 0; i < nblocks; i++) {
-      uint32_t k1 = getblock32(blocks, i);
+      uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
       k1 *= c1;
       k1 = rotl32(k1, rot_c1);
       k1 *= c2;
@@ -340,9 +349,9 @@ struct SparkMurmurHash3_32 {
       h1 = rotl32(h1, rot_c2);
       h1 = h1 * 5 + c3;
     }
-    //----------
+
     // Process remaining bytes that do not fill a four-byte chunk using Spark's approach
-    // (does not conform to normal MurmurHash3)
+    // (does not conform to normal MurmurHash3).
     for (cudf::size_type i = nblocks * 4; i < len; i++) {
       // We require a two-step cast to get the k1 value from the byte. First,
       // we must cast to a signed int8_t. Then, the sign bit is preserved when
@@ -356,8 +365,8 @@ struct SparkMurmurHash3_32 {
       h1 = rotl32(h1, rot_c2);
       h1 = h1 * 5 + c3;
     }
-    //----------
-    // finalization
+
+    // Finalize hash.
     h1 ^= len;
     h1 = fmix32(h1);
     return h1;
@@ -477,7 +486,9 @@ template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
   cudf::string_view const& key) const
 {
-  return this->compute_bytes(reinterpret_cast<std::byte const*>(key.data()), key.size_bytes());
+  auto const data = reinterpret_cast<std::byte const*>(key.data());
+  auto const len  = key.size_bytes();
+  return this->compute_bytes(data, len);
 }
 
 template <>

From 291ccb97d8f93bdeb47e443b8460ce1d74200d9e Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanlee@nvidia.com>
Date: Fri, 28 Jan 2022 15:33:29 -0800
Subject: [PATCH 3/4] more formatting changes

---
 .../cudf/detail/utilities/hash_functions.cuh   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index de1c444989a..69a9179aa96 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -37,8 +37,8 @@ namespace detail {
 template <typename T>
 T __device__ inline normalize_nans_and_zeros(T const& key)
 {
-  if constexpr (is_floating_point<T>()) {
-    if (isnan(key)) {
+  if constexpr (cudf::is_floating_point<T>()) {
+    if (std::isnan(key)) {
       return std::numeric_limits<T>::quiet_NaN();
     } else if (key == T{0.0}) {
       return T{0.0};
@@ -84,8 +84,7 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
 // non-native version will be less than optimal.
 template <typename Key>
 struct MurmurHash3_32 {
-  using argument_type = Key;
-  using result_type   = hash_value_type;
+  using result_type = hash_value_type;
 
   MurmurHash3_32() = default;
   constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
@@ -108,7 +107,8 @@ struct MurmurHash3_32 {
   [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
                                                       cudf::size_type offset) const
   {
-    // Individual byte reads for unaligned accesses (very likely for strings)
+    // Read a 4-byte value from the data pointer as individual bytes for safe
+    // unaligned access (very likely for string types).
     auto const q = reinterpret_cast<uint8_t const*>(data + offset);
     return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
   }
@@ -139,6 +139,7 @@ struct MurmurHash3_32 {
     return combined;
   }
 
+  // TODO Do we need this operator() and/or compute? Probably not both.
   [[nodiscard]] result_type __device__ inline operator()(Key const& key) const
   {
     return compute(key);
@@ -150,7 +151,7 @@ struct MurmurHash3_32 {
   {
     if (key == T{0.0}) {
       return compute(T{0.0});
-    } else if (isnan(key)) {
+    } else if (std::isnan(key)) {
       T nan = std::numeric_limits<T>::quiet_NaN();
       return compute(nan);
     } else {
@@ -279,8 +280,7 @@ hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
 
 template <typename Key>
 struct SparkMurmurHash3_32 {
-  using argument_type = Key;
-  using result_type   = hash_value_type;
+  using result_type = hash_value_type;
 
   SparkMurmurHash3_32() = default;
   constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}
@@ -306,7 +306,7 @@ struct SparkMurmurHash3_32 {
   template <typename T, std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
   hash_value_type __device__ inline compute_floating_point(T const& key) const
   {
-    if (isnan(key)) {
+    if (std::isnan(key)) {
       T nan = std::numeric_limits<T>::quiet_NaN();
       return compute(nan);
     } else {

From 5eaa8e6a32520f0ac43104ff7fe795375cb04efb Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 2 Feb 2022 17:25:11 -0600
Subject: [PATCH 4/4] Apply suggestions from code review

---
 cpp/include/cudf/detail/utilities/hash_functions.cuh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 69a9179aa96..51d58383de4 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -109,8 +109,8 @@ struct MurmurHash3_32 {
   {
     // Read a 4-byte value from the data pointer as individual bytes for safe
     // unaligned access (very likely for string types).
-    auto const q = reinterpret_cast<uint8_t const*>(data + offset);
-    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+    auto const block = reinterpret_cast<uint8_t const*>(data + offset);
+    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
   }
 
   /* Copyright 2005-2014 Daniel James.
@@ -324,8 +324,8 @@ struct SparkMurmurHash3_32 {
                                                       cudf::size_type offset) const
   {
     // Individual byte reads for unaligned accesses (very likely for strings)
-    auto q = reinterpret_cast<uint8_t const*>(data + offset);
-    return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
+    auto block = reinterpret_cast<uint8_t const*>(data + offset);
+    return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
   }
 
   result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const