Initial MD5 implementation

rapidsai · Jul 21, 2020 · 0d3845c · 0d3845c
1 parent b626432
commit 0d3845c
Show file tree

Hide file tree

Showing 7 changed files with 669 additions and 7 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -423,6 +423,7 @@ add_library(cudf
             src/stream_compaction/drop_duplicates.cu
             src/datetime/datetime_ops.cu
             src/hash/hashing.cu
+            src/hash/hash_constants.cu
             src/partitioning/partitioning.cu
             src/quantiles/quantile.cu
             src/quantiles/quantiles.cu

diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,221 @@
 
 #pragma once
 
+#include <cstdint>
 #include <cudf/strings/string_view.cuh>
+#include <hash/hash_constants.hpp>
+
+#include "cuda_runtime_api.h"
+#include "cudf/types.hpp"
+#include "driver_types.h"
+#include "vector_types.h"
 
 using hash_value_type = uint32_t;
 
+namespace cudf {
+namespace detail {
+
+  /**
+   * @brief Helper function, left rotate bit value the value n bits
+   */
+  CUDA_HOST_DEVICE_CALLABLE uint32_t left_rotate(uint32_t value, uint32_t shift)
+  {
+    return (value << shift) | (value >> (32-shift));
+  }
+
+  /**
+   * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk,
+   * updating the hash value so far. Does not zero out the buffer contents.
+   */
+  void CUDA_HOST_DEVICE_CALLABLE
+  md5_hash_step(md5_intermediate_data* hash_state,
+                const md5_hash_constants_type* hash_constants,
+                const md5_shift_constants_type* shift_constants)
+  {
+    uint32_t A = hash_state->hash_value[0];
+    uint32_t B = hash_state->hash_value[1];
+    uint32_t C = hash_state->hash_value[2];
+    uint32_t D = hash_state->hash_value[3];
+
+    uint32_t* buffer_ints = (uint32_t*)hash_state->buffer;
+
+    for(unsigned int j = 0; j < 64; j++) {
+      uint32_t F, g;
+      switch(j / 16) {
+        case 0 :
+          F = (B & C) | ((~B)&D); // D ^ (B & (C ^ D))
+          g = j;
+        break;
+        case 1 :
+          F = (D & B) | ((~D)&C);
+          g = (5 * j + 1) % 16;
+        break;
+        case 2 :
+          F = B ^ C ^ D;
+          g = (3 * j + 5) % 16;
+        break;
+        case 3 :
+          F = C ^ (B | (~D));
+          g = (7 * j) % 16;
+        break;
+      }
+
+      F = F + A + hash_constants[j] + buffer_ints[g];
+
+      A = D;
+      D = C;
+      C = B;
+      B = B + left_rotate(F, shift_constants[((j/16) * 4) + (j%4)]);
+    }
+
+    hash_state->hash_value[0] += A;
+    hash_state->hash_value[1] += B;
+    hash_state->hash_value[2] += C;
+    hash_state->hash_value[3] += D;
+
+    hash_state->buffer_length = 0;
+  }
+
+  template <typename Key>
+  struct MD5Hash {
+    using argument_type = Key;
+
+    /**
+     * @brief Core MD5 element processing function
+     */
+    template <typename TKey>
+    void CUDA_HOST_DEVICE_CALLABLE
+    process(TKey const& key,
+            const uint32_t len,
+            md5_intermediate_data* hash_state,
+            const md5_hash_constants_type* hash_constants,
+            const md5_shift_constants_type* shift_constants) const
+    {
+      uint8_t* data = (uint8_t*)&key;
+      hash_state->message_length += len;
+
+      if(hash_state->buffer_length + len < 64) {
+        thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length);
+        hash_state->buffer_length += len;
+      } else {
+        uint32_t copylen = 64 - hash_state->buffer_length;
+
+        thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length);
+        md5_hash_step(hash_state, hash_constants, shift_constants);
+
+        while(len > 64 + copylen) {
+          thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer);
+          md5_hash_step(hash_state, hash_constants, shift_constants);
+          copylen += 64;
+        }
+
+        thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer);
+        hash_state->buffer_length = len-copylen;
+      }
+    }
+
+    template <typename T, typename std::enable_if_t<is_fixed_width<T>()>* = nullptr>
+    void CUDA_HOST_DEVICE_CALLABLE
+    operator()(T const& key,
+               md5_intermediate_data* hash_state,
+               const md5_hash_constants_type* hash_constants,
+               const md5_shift_constants_type* shift_constants) const
+    {
+      process(key, size_of(key), hash_state, hash_constants, shift_constants);
+    }
+
+    template <typename T, typename std::enable_if_t<!is_fixed_width<T>()>* = nullptr>
+    void CUDA_HOST_DEVICE_CALLABLE
+    operator()(T const& key,
+               md5_intermediate_data* hash_state,
+               const md5_hash_constants_type* hash_constants,
+               const md5_shift_constants_type* shift_constants) const
+    {
+      CUDF_FAIL("Unsupported hash type");
+    }
+
+    void CUDA_HOST_DEVICE_CALLABLE
+    operator()(Key const& key,
+               md5_intermediate_data* hash_state,
+               const md5_hash_constants_type* hash_constants,
+               const md5_shift_constants_type* shift_constants) const {}
+  };
+
+  /**
+   * @brief Specialization of MD5Hash operator for strings.
+   */
+  template <>
+  void CUDA_HOST_DEVICE_CALLABLE
+  MD5Hash<cudf::string_view>::operator()(cudf::string_view const& key,
+                                         md5_intermediate_data* hash_state,
+                                         const md5_hash_constants_type* hash_constants,
+                                         const md5_shift_constants_type* shift_constants) const
+  {
+    const uint32_t len = (uint32_t)key.size_bytes();
+    const uint8_t* data = (const uint8_t*)key.data();
+
+    hash_state->message_length += len;
+
+    if(hash_state->buffer_length + len < 64) {
+      thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length);
+      hash_state->buffer_length += len;
+    } else {
+      uint32_t copylen = 64 - hash_state->buffer_length;
+      thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length);
+      md5_hash_step(hash_state, hash_constants, shift_constants);
+
+      while(len > 64 + copylen) {
+        thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer);
+        md5_hash_step(hash_state, hash_constants, shift_constants);
+        copylen += 64;
+      }
+
+      thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer);
+      hash_state->buffer_length = len-copylen;
+    }
+  }
+
+  /**
+   * @brief Finalize MD5 hash including converstion to hex string.
+   */
+  void CUDA_HOST_DEVICE_CALLABLE
+  finalize_md5_hash(md5_intermediate_data* hash_state,
+                    char* result_location,
+                    const md5_hash_constants_type* hash_constants,
+                    const md5_shift_constants_type* shift_constants,
+                    const hex_to_char_mapping_type* hex_char_map)
+  {
+    uint64_t full_length =  (uint64_t)hash_state->message_length;
+    full_length = full_length << 3;
+    thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length, 1, 0x80);
+
+    if(hash_state->buffer_length <= 55) {
+      thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1,
+                     (55 - hash_state->buffer_length), 0x00);
+    } else {
+      thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1,
+                     (64 - hash_state->buffer_length), 0x00);
+      md5_hash_step(hash_state, hash_constants, shift_constants);
+
+      thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00);
+    }
+
+    thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer+56);
+    md5_hash_step(hash_state, hash_constants, shift_constants);
+
+    u_char final_hash[32];
+    uint8_t* hash_result = (uint8_t*)hash_state->hash_value;
+    for(int i = 0; i < 16; i++) {
+      final_hash[i*2] = hex_char_map[(hash_result[i] >> 4) & 0xf];
+      final_hash[i*2+1] = hex_char_map[hash_result[i] & 0xf];
+    }
+
+    thrust::copy_n(thrust::seq, final_hash, 32, result_location);
+  }
+
+}  // namespace detail
+}  // namespace cudf
+
 // MurmurHash3_32 implementation from
 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
 //-----------------------------------------------------------------------------
@@ -250,4 +461,4 @@ struct IdentityHash {
 };
 
 template <typename Key>
-using default_hash = MurmurHash3_32<Key>;
+using default_hash = MurmurHash3_32<Key>;
diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu
@@ -0,0 +1,96 @@
+/* Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cudf/column/column_device_view.cuh>
+#include "./hash_constants.hpp"
+
+#include <mutex>
+
+namespace cudf {
+namespace detail {
+
+const hex_to_char_mapping_type g_hex_to_char_mapping[] = {
+  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+};
+const md5_shift_constants_type g_md5_shift_constants[] = {
+  7, 12, 17, 22, 5,  9, 14, 20, 4, 11, 16, 23 , 6, 10, 15, 21,
+};
+
+const md5_hash_constants_type g_md5_hash_constants[] = {
+  0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+  0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+  0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+  0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+  0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+  0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+  0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+  0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+std::mutex g_hex_to_char_mapping_mutex;
+std::mutex g_md5_hash_constants_mutex;
+std::mutex g_md5_shift_constants_mutex;
+
+hex_to_char_mapping_type* d_hex_to_char_mapping = nullptr;
+md5_hash_constants_type* d_md5_hash_constants = nullptr;
+md5_shift_constants_type* d_md5_shift_constants = nullptr;
+
+__device__ hex_to_char_mapping_type hex_to_char_mapping[sizeof(g_hex_to_char_mapping)];
+__device__ md5_hash_constants_type md5_hash_constants[sizeof(g_md5_hash_constants)];
+__device__ md5_shift_constants_type md5_shift_constants[sizeof(g_md5_shift_constants)];
+
+/**
+ * @copydoc cudf::detail::get_hex_to_char_mapping
+ */
+const hex_to_char_mapping_type* get_hex_to_char_mapping()
+{
+  std::lock_guard<std::mutex> guard(g_hex_to_char_mapping_mutex);
+  if (!d_hex_to_char_mapping) {
+    CUDA_TRY(cudaMemcpyToSymbol(
+    hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping)));
+    CUDA_TRY(cudaGetSymbolAddress((void**)&d_hex_to_char_mapping, hex_to_char_mapping));
+  }
+  return d_hex_to_char_mapping;
+}
+
+/**
+ * @copydoc cudf::detail::get_md5_hash_constants
+ */
+const md5_hash_constants_type* get_md5_hash_constants()
+{
+  std::lock_guard<std::mutex> guard(g_md5_hash_constants_mutex);
+  if (!d_md5_hash_constants) {
+    CUDA_TRY(cudaMemcpyToSymbol(
+      md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants)));
+    CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_hash_constants, md5_hash_constants));
+  }
+  return d_md5_hash_constants;
+}
+
+/**
+ * @copydoc cudf::detail::get_md5_shift_constants
+ */
+const md5_shift_constants_type* get_md5_shift_constants()
+{
+  std::lock_guard<std::mutex> guard(g_md5_shift_constants_mutex);
+  if (!d_md5_shift_constants) {
+    CUDA_TRY(cudaMemcpyToSymbol(
+      md5_shift_constants, g_md5_shift_constants, sizeof(g_md5_shift_constants)));
+    CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_shift_constants, md5_shift_constants));
+  }
+  return d_md5_shift_constants;
+}
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/hash/hash_constants.cuh b/cpp/src/hash/hash_constants.cuh
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+namespace cudf {
+namespace detail {
+
+}  // namespace detail
+}  // namespace cudf