Skip to content

Commit

Permalink
Merge pull request #5438 from rwlee/rwlee/md5
Browse files Browse the repository at this point in the history
[REVIEW] Add MD5 to existing hashing functionality
  • Loading branch information
Keith Kraus authored Aug 13, 2020
2 parents d24d0ab + 7379b23 commit 8aae2e4
Show file tree
Hide file tree
Showing 11 changed files with 569 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
- PR #5666 Add `filter_characters_of_type` strings API
- PR #5778 Add support for `cudf::table` to `arrow::Table` and `arrow::Table` to `cudf::table`
- PR #5673 Always build and test with per-thread default stream enabled in the GPU CI build
- PR #5438 Add MD5 hash support
- PR #5704 Initial `fixed_point` Column Support
- PR #5716 Add `double_type_dispatcher` to libcudf
- PR #5739 Add `nvtext::detokenize` API
Expand Down
14 changes: 13 additions & 1 deletion cpp/include/cudf/detail/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,9 +37,21 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

std::unique_ptr<column> murmur_hash3_32(
table_view const& input,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

std::unique_ptr<column> md5_hash(
table_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

} // namespace detail
} // namespace cudf
228 changes: 227 additions & 1 deletion cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION.
* Copyright (c) 2017-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,10 +16,236 @@

#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/strings/string_view.cuh>
#include <hash/hash_constants.hpp>

#include "cudf/types.hpp"

using hash_value_type = uint32_t;

namespace cudf {
namespace detail {
/**
* Modified GPU implementation of
* https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
* Copyright (c) 2015 Barry Clark
* Licensed under the MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destination)
{
// Transform 0xABCD1234 => 0x0000ABCD00001234 => 0x0B0A0D0C02010403
uint64_t x = num;
x = ((x & 0xFFFF0000) << 16) | ((x & 0xFFFF));
x = ((x & 0xF0000000F) << 8) | ((x & 0xF0000000F0) >> 4) | ((x & 0xF0000000F00) << 16) |
((x & 0xF0000000F000) << 4);

// Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits
uint64_t offsets = (((x + 0x0606060606060606) >> 4) & 0x0101010101010101) * 0x27;

x |= 0x3030303030303030;
x += offsets;
thrust::copy_n(thrust::seq, reinterpret_cast<uint8_t*>(&x), 8, destination);
}

struct MD5Hash {
/**
* @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk,
* updating the hash value so far. Does not zero out the buffer contents.
*/
void __device__ hash_step(md5_intermediate_data* hash_state) const
{
uint32_t A = hash_state->hash_value[0];
uint32_t B = hash_state->hash_value[1];
uint32_t C = hash_state->hash_value[2];
uint32_t D = hash_state->hash_value[3];

for (unsigned int j = 0; j < 64; j++) {
uint32_t F;
uint32_t g;
switch (j / 16) {
case 0:
F = (B & C) | ((~B) & D);
g = j;
break;
case 1:
F = (D & B) | ((~D) & C);
g = (5 * j + 1) % 16;
break;
case 2:
F = B ^ C ^ D;
g = (3 * j + 5) % 16;
break;
case 3:
F = C ^ (B | (~D));
g = (7 * j) % 16;
break;
}

uint32_t buffer_element_as_int;
std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4);
F = F + A + md5_hash_constants[j] + buffer_element_as_int;
A = D;
D = C;
C = B;
B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]);
}

hash_state->hash_value[0] += A;
hash_state->hash_value[1] += B;
hash_state->hash_value[2] += C;
hash_state->hash_value[3] += D;

hash_state->buffer_length = 0;
}

/**
* @brief Core MD5 element processing function
*/
template <typename TKey>
void __device__ process(TKey const& key, md5_intermediate_data* hash_state) const
{
uint32_t const len = sizeof(TKey);
uint8_t const* data = reinterpret_cast<uint8_t const*>(&key);
hash_state->message_length += len;

// 64 bytes for the number of bytes processed in a given step
constexpr int md5_chunk_size = 64;
if (hash_state->buffer_length + len < md5_chunk_size) {
thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length);
hash_state->buffer_length += len;
} else {
uint32_t copylen = md5_chunk_size - hash_state->buffer_length;

thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length);
hash_step(hash_state);

while (len > md5_chunk_size + copylen) {
thrust::copy_n(thrust::seq, data + copylen, md5_chunk_size, hash_state->buffer);
hash_step(hash_state);
copylen += md5_chunk_size;
}

thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer);
hash_state->buffer_length = len - copylen;
}
}

void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const
{
auto const full_length = (static_cast<uint64_t>(hash_state->message_length)) << 3;
thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80);

// 64 bytes for the number of bytes processed in a given step
constexpr int md5_chunk_size = 64;
// 8 bytes for the total message length, appended to the end of the last chunk processed
constexpr int message_length_size = 8;
// 1 byte for the end of the message flag
constexpr int end_of_message_size = 1;
if (hash_state->buffer_length + message_length_size + end_of_message_size <= md5_chunk_size) {
thrust::fill_n(
thrust::seq,
hash_state->buffer + hash_state->buffer_length + 1,
(md5_chunk_size - message_length_size - end_of_message_size - hash_state->buffer_length),
0x00);
} else {
thrust::fill_n(thrust::seq,
hash_state->buffer + hash_state->buffer_length + 1,
(md5_chunk_size - hash_state->buffer_length),
0x00);
hash_step(hash_state);

thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00);
}

thrust::copy_n(thrust::seq,
reinterpret_cast<uint8_t const*>(&full_length),
message_length_size,
hash_state->buffer + md5_chunk_size - message_length_size);
hash_step(hash_state);

#pragma unroll
for (int i = 0; i < 4; ++i)
uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i));
}

template <typename T, typename std::enable_if_t<is_chrono<T>()>* = nullptr>
void __device__ operator()(column_device_view col,
size_type row_index,
md5_intermediate_data* hash_state) const
{
release_assert(false && "MD5 Unsupported chrono type column");
}

template <typename T, typename std::enable_if_t<!is_fixed_width<T>()>* = nullptr>
void __device__ operator()(column_device_view col,
size_type row_index,
md5_intermediate_data* hash_state) const
{
release_assert(false && "MD5 Unsupported non-fixed-width type column");
}

template <typename T, typename std::enable_if_t<is_floating_point<T>()>* = nullptr>
void __device__ operator()(column_device_view col,
size_type row_index,
md5_intermediate_data* hash_state) const
{
T const& key = col.element<T>(row_index);
if (isnan(key)) {
T nan = std::numeric_limits<T>::quiet_NaN();
process(nan, hash_state);
} else if (key == T{0.0}) {
process(T{0.0}, hash_state);
} else {
process(key, hash_state);
}
}

template <typename T,
typename std::enable_if_t<is_fixed_width<T>() && !is_floating_point<T>() &&
!is_chrono<T>()>* = nullptr>
void CUDA_DEVICE_CALLABLE operator()(column_device_view col,
size_type row_index,
md5_intermediate_data* hash_state) const
{
process(col.element<T>(row_index), hash_state);
}
};

template <>
void CUDA_DEVICE_CALLABLE MD5Hash::operator()<string_view>(column_device_view col,
size_type row_index,
md5_intermediate_data* hash_state) const
{
string_view key = col.element<string_view>(row_index);
uint32_t const len = static_cast<uint32_t>(key.size_bytes());
uint8_t const* data = reinterpret_cast<uint8_t const*>(key.data());

hash_state->message_length += len;

if (hash_state->buffer_length + len < 64) {
thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length);
hash_state->buffer_length += len;
} else {
uint32_t copylen = 64 - hash_state->buffer_length;
thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length);
hash_step(hash_state);

while (len > 64 + copylen) {
thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer);
hash_step(hash_state);
copylen += 64;
}

thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer);
hash_state->buffer_length = len - copylen;
}
}

} // namespace detail
} // namespace cudf

// MurmurHash3_32 implementation from
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
//-----------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,7 @@ namespace cudf {
* @returns A column where each row is the hash of a column from the input
*/
std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

Expand Down
9 changes: 9 additions & 0 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,5 +279,14 @@ inline bool operator==(data_type const& lhs, data_type const& rhs) { return lhs.
*/
std::size_t size_of(data_type t);

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5 ///< MD5 hash function
};

/** @} */
} // namespace cudf
64 changes: 64 additions & 0 deletions cpp/src/hash/hash_constants.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

namespace cudf {
namespace detail {

struct md5_intermediate_data {
uint64_t message_length = 0;
uint32_t buffer_length = 0;
uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};
uint8_t buffer[64];
};

// Type for the shift constants table.
using md5_shift_constants_type = uint32_t;

__device__ __constant__ md5_shift_constants_type md5_shift_constants[16] = {
7,
12,
17,
22,
5,
9,
14,
20,
4,
11,
16,
23,
6,
10,
15,
21,
};

// Type for the hash constants table.
using md5_hash_constants_type = uint32_t;

__device__ __constant__ md5_hash_constants_type md5_hash_constants[64] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
};
} // namespace detail
} // namespace cudf
Loading

0 comments on commit 8aae2e4

Please sign in to comment.