From 9d158e808106a2f0aa1a01a48f20009b2bd28478 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Sep 2021 18:10:53 -0700 Subject: [PATCH 01/54] Improve comments and naming. --- .../cudf/detail/utilities/hash_functions.cuh | 16 +++++++++------- cpp/src/hash/md5_hash.cu | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 6eab13ae9af..24b8e0edb02 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -88,7 +88,7 @@ void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* ha uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; - // 64 bytes for the number of byt es processed in a given step + // 64 bytes are processed in each hash step constexpr int md5_chunk_size = 64; if (hash_state->buffer_length + len < md5_chunk_size) { std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); @@ -210,18 +210,20 @@ MD5ListHasher::operator()(column_device_view data_col, hash_state->message_length += len; - if (hash_state->buffer_length + len < 64) { + // 64 bytes are processed in each hash step + constexpr int md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { - uint32_t copylen = 64 - hash_state->buffer_length; + uint32_t copylen = md5_chunk_size - hash_state->buffer_length; std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); - while (len > 64 + copylen) { - std::memcpy(hash_state->buffer, data + copylen, 64); + while (len > md5_chunk_size + copylen) { + std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); md5_hash_step(hash_state); - copylen += 64; + copylen += md5_chunk_size; } std::memcpy(hash_state->buffer, data + copylen, len - copylen); @@ -240,7 +242,7 @@ struct MD5Hash { auto const full_length = (static_cast(hash_state->message_length)) << 3; thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - // 64 bytes for the number of bytes processed in a given step + // 64 bytes are processed in each hash step constexpr int md5_chunk_size = 64; // 8 bytes for the total message length, appended to the end of the last chunk processed constexpr int message_length_size = 8; diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 973f3204c37..2b1ad9168da 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -44,6 +44,7 @@ std::unique_ptr md5_hash(table_view const& input, rmm::mr::device_memory_resource* mr) { if (input.num_columns() == 0 || input.num_rows() == 0) { + // Return the MD5 hash of a zero-length input. const string_scalar string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); auto output = make_column_from_scalar(string_128bit, input.num_rows(), stream, mr); return output; From 5630eed191dfac220b27c84f10ace98850c3b835 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Sep 2021 18:11:24 -0700 Subject: [PATCH 02/54] Remove unused seed from MD5Hash constructor. --- cpp/include/cudf/detail/utilities/hash_functions.cuh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 24b8e0edb02..7a09280a388 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -235,7 +235,7 @@ MD5ListHasher::operator()(column_device_view data_col, struct MD5Hash { MD5Hash() = default; - constexpr MD5Hash(uint32_t seed) : m_seed(seed) {} + constexpr MD5Hash(uint32_t seed) {} void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const { @@ -307,9 +307,6 @@ struct MD5Hash { { md5_process(col.element(row_index), hash_state); } - - private: - uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; template <> From f23735d94df539561fc4c33c256438fba1595118 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Sep 2021 18:11:53 -0700 Subject: [PATCH 03/54] Expand detail namespace. --- cpp/src/hash/md5_hash.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 2b1ad9168da..c894cbaa60e 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -27,6 +27,9 @@ #include namespace cudf { + +namespace detail { + namespace { // MD5 supported leaf data type check @@ -37,8 +40,6 @@ bool md5_type_check(data_type dt) } // namespace -namespace detail { - std::unique_ptr md5_hash(table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) From ed56ef0205ba757c445b32810317c2645e17ed53 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Sep 2021 18:28:59 -0700 Subject: [PATCH 04/54] Remove seed and default constructors. --- cpp/include/cudf/detail/utilities/hash_functions.cuh | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 7a09280a388..cbfba9f77c8 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -234,9 +234,6 @@ MD5ListHasher::operator()(column_device_view data_col, } struct MD5Hash { - MD5Hash() = default; - constexpr MD5Hash(uint32_t seed) {} - void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const { auto const full_length = (static_cast(hash_state->message_length)) << 3; From 03bfe6d30aea7bb591436c1e30ae82c823d6cb3f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Sep 2021 18:41:59 -0700 Subject: [PATCH 05/54] Move MD5 implementations from hash_functions.cuh to md5_hash.cu because they are only used in that file. --- .../cudf/detail/utilities/hash_functions.cuh | 290 ------------------ cpp/src/hash/md5_hash.cu | 289 +++++++++++++++++ 2 files changed, 289 insertions(+), 290 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index cbfba9f77c8..b87c53cc913 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -27,88 +27,6 @@ using hash_value_type = uint32_t; namespace cudf { namespace detail { -namespace { -/** - * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, - * updating the hash value so far. Does not zero out the buffer contents. - */ -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) -{ - uint32_t A = hash_state->hash_value[0]; - uint32_t B = hash_state->hash_value[1]; - uint32_t C = hash_state->hash_value[2]; - uint32_t D = hash_state->hash_value[3]; - - for (unsigned int j = 0; j < 64; j++) { - uint32_t F; - uint32_t g; - switch (j / 16) { - case 0: - F = (B & C) | ((~B) & D); - g = j; - break; - case 1: - F = (D & B) | ((~D) & C); - g = (5 * j + 1) % 16; - break; - case 2: - F = B ^ C ^ D; - g = (3 * j + 5) % 16; - break; - case 3: - F = C ^ (B | (~D)); - g = (7 * j) % 16; - break; - } - - uint32_t buffer_element_as_int; - std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); - F = F + A + md5_hash_constants[j] + buffer_element_as_int; - A = D; - D = C; - C = B; - B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); - } - - hash_state->hash_value[0] += A; - hash_state->hash_value[1] += B; - hash_state->hash_value[2] += C; - hash_state->hash_value[3] += D; - - hash_state->buffer_length = 0; -} - -/** - * @brief Core MD5 element processing function - */ -template -void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) -{ - uint32_t const len = sizeof(TKey); - uint8_t const* data = reinterpret_cast(&key); - hash_state->message_length += len; - - // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; - if (hash_state->buffer_length + len < md5_chunk_size) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - while (len > md5_chunk_size + copylen) { - std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); - md5_hash_step(hash_state); - copylen += md5_chunk_size; - } - - std::memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; - } -} /** * Normalization of floating point NANs and zeros helper @@ -124,7 +42,6 @@ T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros_helper(T key) return key; } } -} // namespace /** * Modified GPU implementation of @@ -149,213 +66,6 @@ void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destina std::memcpy(destination, reinterpret_cast(&x), 8); } -struct MD5ListHasher { - template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const - { - cudf_assert(false && "MD5 Unsupported chrono type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const - { - cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const - { - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) { - md5_process(normalize_nans_and_zeros_helper(data_col.element(i)), hash_state); - } - } - } - - template < - typename T, - std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> - void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const - { - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) md5_process(data_col.element(i), hash_state); - } - } -}; - -template <> -void CUDA_DEVICE_CALLABLE -MD5ListHasher::operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const -{ - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) { - string_view key = data_col.element(i); - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - - hash_state->message_length += len; - - // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; - if (hash_state->buffer_length + len < md5_chunk_size) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - while (len > md5_chunk_size + copylen) { - std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); - md5_hash_step(hash_state); - copylen += md5_chunk_size; - } - - std::memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; - } - } - } -} - -struct MD5Hash { - void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const - { - auto const full_length = (static_cast(hash_state->message_length)) << 3; - thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - - // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; - // 8 bytes for the total message length, appended to the end of the last chunk processed - constexpr int message_length_size = 8; - // 1 byte for the end of the message flag - constexpr int end_of_message_size = 1; - if (hash_state->buffer_length + message_length_size + end_of_message_size <= md5_chunk_size) { - thrust::fill_n( - thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (md5_chunk_size - message_length_size - end_of_message_size - hash_state->buffer_length), - 0x00); - } else { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (md5_chunk_size - hash_state->buffer_length), - 0x00); - md5_hash_step(hash_state); - - thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); - } - - std::memcpy(hash_state->buffer + md5_chunk_size - message_length_size, - reinterpret_cast(&full_length), - message_length_size); - md5_hash_step(hash_state); - -#pragma unroll - for (int i = 0; i < 4; ++i) - uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const - { - cudf_assert(false && "MD5 Unsupported chrono type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const - { - cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const - { - md5_process(normalize_nans_and_zeros_helper(col.element(row_index)), hash_state); - } - - template < - typename T, - std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> - void CUDA_DEVICE_CALLABLE operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const - { - md5_process(col.element(row_index), hash_state); - } -}; - -template <> -void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const -{ - string_view key = col.element(row_index); - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - - hash_state->message_length += len; - - if (hash_state->buffer_length + len < 64) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - uint32_t copylen = 64 - hash_state->buffer_length; - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - while (len > 64 + copylen) { - std::memcpy(hash_state->buffer, data + copylen, 64); - md5_hash_step(hash_state); - copylen += 64; - } - - std::memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; - } -} - -template <> -void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const -{ - static constexpr size_type offsets_column_index{0}; - static constexpr size_type data_column_index{1}; - - column_device_view offsets = col.child(offsets_column_index); - column_device_view data = col.child(data_column_index); - - if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); - - cudf::type_dispatcher(data.type(), - MD5ListHasher{}, - data, - offsets.element(row_index), - offsets.element(row_index + 1), - hash_state); -} } // namespace detail } // namespace cudf diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c894cbaa60e..206d1dcefe0 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -31,6 +31,87 @@ namespace cudf { namespace detail { namespace { +/** + * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, + * updating the hash value so far. Does not zero out the buffer contents. + */ +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) +{ + uint32_t A = hash_state->hash_value[0]; + uint32_t B = hash_state->hash_value[1]; + uint32_t C = hash_state->hash_value[2]; + uint32_t D = hash_state->hash_value[3]; + + for (unsigned int j = 0; j < 64; j++) { + uint32_t F; + uint32_t g; + switch (j / 16) { + case 0: + F = (B & C) | ((~B) & D); + g = j; + break; + case 1: + F = (D & B) | ((~D) & C); + g = (5 * j + 1) % 16; + break; + case 2: + F = B ^ C ^ D; + g = (3 * j + 5) % 16; + break; + case 3: + F = C ^ (B | (~D)); + g = (7 * j) % 16; + break; + } + + uint32_t buffer_element_as_int; + std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); + F = F + A + md5_hash_constants[j] + buffer_element_as_int; + A = D; + D = C; + C = B; + B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); + } + + hash_state->hash_value[0] += A; + hash_state->hash_value[1] += B; + hash_state->hash_value[2] += C; + hash_state->hash_value[3] += D; + + hash_state->buffer_length = 0; +} + +/** + * @brief Core MD5 element processing function + */ +template +void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) +{ + uint32_t const len = sizeof(TKey); + uint8_t const* data = reinterpret_cast(&key); + hash_state->message_length += len; + + // 64 bytes are processed in each hash step + constexpr int md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); + hash_state->buffer_length += len; + } else { + uint32_t copylen = md5_chunk_size - hash_state->buffer_length; + + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); + md5_hash_step(hash_state); + + while (len > md5_chunk_size + copylen) { + std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); + md5_hash_step(hash_state); + copylen += md5_chunk_size; + } + + std::memcpy(hash_state->buffer, data + copylen, len - copylen); + hash_state->buffer_length = len - copylen; + } +} // MD5 supported leaf data type check bool md5_type_check(data_type dt) @@ -38,6 +119,214 @@ bool md5_type_check(data_type dt) return !is_chrono(dt) && (is_fixed_width(dt) || (dt.id() == type_id::STRING)); } +struct MD5ListHasher { + template ()>* = nullptr> + void __device__ operator()(column_device_view data_col, + size_type offset_begin, + size_type offset_end, + md5_intermediate_data* hash_state) const + { + cudf_assert(false && "MD5 Unsupported chrono type column"); + } + + template ()>* = nullptr> + void __device__ operator()(column_device_view data_col, + size_type offset_begin, + size_type offset_end, + md5_intermediate_data* hash_state) const + { + cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); + } + + template ()>* = nullptr> + void __device__ operator()(column_device_view data_col, + size_type offset_begin, + size_type offset_end, + md5_intermediate_data* hash_state) const + { + for (int i = offset_begin; i < offset_end; i++) { + if (!data_col.is_null(i)) { + md5_process(normalize_nans_and_zeros_helper(data_col.element(i)), hash_state); + } + } + } + + template < + typename T, + std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> + void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, + size_type offset_begin, + size_type offset_end, + md5_intermediate_data* hash_state) const + { + for (int i = offset_begin; i < offset_end; i++) { + if (!data_col.is_null(i)) md5_process(data_col.element(i), hash_state); + } + } +}; + +template <> +void CUDA_DEVICE_CALLABLE +MD5ListHasher::operator()(column_device_view data_col, + size_type offset_begin, + size_type offset_end, + md5_intermediate_data* hash_state) const +{ + for (int i = offset_begin; i < offset_end; i++) { + if (!data_col.is_null(i)) { + string_view key = data_col.element(i); + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); + + hash_state->message_length += len; + + // 64 bytes are processed in each hash step + constexpr int md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); + hash_state->buffer_length += len; + } else { + uint32_t copylen = md5_chunk_size - hash_state->buffer_length; + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); + md5_hash_step(hash_state); + + while (len > md5_chunk_size + copylen) { + std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); + md5_hash_step(hash_state); + copylen += md5_chunk_size; + } + + std::memcpy(hash_state->buffer, data + copylen, len - copylen); + hash_state->buffer_length = len - copylen; + } + } + } +} + +struct MD5Hash { + void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const + { + auto const full_length = (static_cast(hash_state->message_length)) << 3; + thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); + + // 64 bytes are processed in each hash step + constexpr int md5_chunk_size = 64; + // 8 bytes for the total message length, appended to the end of the last chunk processed + constexpr int message_length_size = 8; + // 1 byte for the end of the message flag + constexpr int end_of_message_size = 1; + if (hash_state->buffer_length + message_length_size + end_of_message_size <= md5_chunk_size) { + thrust::fill_n( + thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (md5_chunk_size - message_length_size - end_of_message_size - hash_state->buffer_length), + 0x00); + } else { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (md5_chunk_size - hash_state->buffer_length), + 0x00); + md5_hash_step(hash_state); + + thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); + } + + std::memcpy(hash_state->buffer + md5_chunk_size - message_length_size, + reinterpret_cast(&full_length), + message_length_size); + md5_hash_step(hash_state); + +#pragma unroll + for (int i = 0; i < 4; ++i) + uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); + } + + template ()>* = nullptr> + void __device__ operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const + { + cudf_assert(false && "MD5 Unsupported chrono type column"); + } + + template ()>* = nullptr> + void __device__ operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const + { + cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); + } + + template ()>* = nullptr> + void __device__ operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const + { + md5_process(normalize_nans_and_zeros_helper(col.element(row_index)), hash_state); + } + + template < + typename T, + std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> + void CUDA_DEVICE_CALLABLE operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const + { + md5_process(col.element(row_index), hash_state); + } +}; + +template <> +void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const +{ + string_view key = col.element(row_index); + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); + + hash_state->message_length += len; + + if (hash_state->buffer_length + len < 64) { + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); + md5_hash_step(hash_state); + + while (len > 64 + copylen) { + std::memcpy(hash_state->buffer, data + copylen, 64); + md5_hash_step(hash_state); + copylen += 64; + } + + std::memcpy(hash_state->buffer, data + copylen, len - copylen); + hash_state->buffer_length = len - copylen; + } +} + +template <> +void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const +{ + static constexpr size_type offsets_column_index{0}; + static constexpr size_type data_column_index{1}; + + column_device_view offsets = col.child(offsets_column_index); + column_device_view data = col.child(data_column_index); + + if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); + + cudf::type_dispatcher(data.type(), + MD5ListHasher{}, + data, + offsets.element(row_index), + offsets.element(row_index + 1), + hash_state); +} + } // namespace std::unique_ptr md5_hash(table_view const& input, From 7945efe4c91577ed566e7c4ba295bec801ac1be0 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 20 Sep 2021 10:27:45 -0700 Subject: [PATCH 06/54] Change to unsigned types for md5_chunk_size. Co-authored-by: Vyas Ramasubramani --- cpp/src/hash/md5_hash.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 206d1dcefe0..19f154e4ef2 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -87,12 +87,12 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) template void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) { - uint32_t const len = sizeof(TKey); + uint32_t constexpr len = sizeof(TKey); uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; + uint32_t constexpr md5_chunk_size = 64; if (hash_state->buffer_length + len < md5_chunk_size) { std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; @@ -181,7 +181,7 @@ MD5ListHasher::operator()(column_device_view data_col, hash_state->message_length += len; // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; + uint32_t constexpr md5_chunk_size = 64; if (hash_state->buffer_length + len < md5_chunk_size) { std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; From 81bf12909da4f605cdedf95749530c2defb6a384 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 20 Sep 2021 10:30:17 -0700 Subject: [PATCH 07/54] Remove unused parameter names. Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/src/hash/md5_hash.cu | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 19f154e4ef2..7634b37ffb8 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -121,19 +121,19 @@ bool md5_type_check(data_type dt) struct MD5ListHasher { template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const + void __device__ operator()(column_device_view, + size_type, + size_type, + md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported chrono type column"); } template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const + void __device__ operator()(column_device_view, + size_type, + size_type, + md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); } @@ -242,17 +242,17 @@ struct MD5Hash { } template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const + void __device__ operator()(column_device_view, + size_type, + md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported chrono type column"); } template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const + void __device__ operator()(column_device_view, + size_type, + md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); } From ab3779b4f578497e91be68215ca86a84426c9e1c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 28 Sep 2021 10:17:26 -0700 Subject: [PATCH 08/54] clang-format. --- cpp/src/hash/md5_hash.cu | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 7634b37ffb8..2eb4a0b8a4e 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -87,8 +87,8 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) template void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) { - uint32_t constexpr len = sizeof(TKey); - uint8_t const* data = reinterpret_cast(&key); + uint32_t constexpr len = sizeof(TKey); + uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; // 64 bytes are processed in each hash step @@ -242,17 +242,13 @@ struct MD5Hash { } template ()>* = nullptr> - void __device__ operator()(column_device_view, - size_type, - md5_intermediate_data*) const + void __device__ operator()(column_device_view, size_type, md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported chrono type column"); } template ()>* = nullptr> - void __device__ operator()(column_device_view, - size_type, - md5_intermediate_data*) const + void __device__ operator()(column_device_view, size_type, md5_intermediate_data*) const { cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); } From d2c4a0c6f04ffe0c2939e9753bf8713bde5ce321 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 28 Sep 2021 10:17:46 -0700 Subject: [PATCH 09/54] Replace magic number with named constant. --- cpp/src/hash/md5_hash.cu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 2eb4a0b8a4e..1f901fbdca4 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -283,18 +283,19 @@ void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view co hash_state->message_length += len; - if (hash_state->buffer_length + len < 64) { + uint32_t constexpr md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { - uint32_t copylen = 64 - hash_state->buffer_length; + uint32_t copylen = md5_chunk_size - hash_state->buffer_length; std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); - while (len > 64 + copylen) { - std::memcpy(hash_state->buffer, data + copylen, 64); + while (len > md5_chunk_size + copylen) { + std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); md5_hash_step(hash_state); - copylen += 64; + copylen += md5_chunk_size; } std::memcpy(hash_state->buffer, data + copylen, len - copylen); From bd766bfb0e3ab6c64ed796714ab77dbc526461b4 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 19:59:04 -0700 Subject: [PATCH 10/54] Use memcpy instead of std::memcpy, add comments. --- cpp/src/hash/md5_hash.cu | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 1f901fbdca4..fff016022d7 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -65,7 +65,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) } uint32_t buffer_element_as_int; - std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); + memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); F = F + A + md5_hash_constants[j] + buffer_element_as_int; A = D; D = C; @@ -83,6 +83,9 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) /** * @brief Core MD5 element processing function + * + * This accepts arbitrary data, handles it as bytes, and calls the hash step + * when the buffer is filled up to message_chunk_size bytes. */ template void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) @@ -93,22 +96,29 @@ void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* ha // 64 bytes are processed in each hash step uint32_t constexpr md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); + // The buffer will not be filled by this data. We copy the new data into + // the buffer but do not trigger a hash step yet. + memcpy(hash_state->buffer + hash_state->buffer_length, data, len); hash_state->buffer_length += len; } else { + // The buffer will be filled by this data. Copy a chunk of the data to fill + // the buffer and trigger a hash step. uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); + memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); md5_hash_step(hash_state); + // Take buffer-sized chunks of the data and do a hash step on each chunk. while (len > md5_chunk_size + copylen) { - std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); + memcpy(hash_state->buffer, data + copylen, md5_chunk_size); md5_hash_step(hash_state); copylen += md5_chunk_size; } - std::memcpy(hash_state->buffer, data + copylen, len - copylen); + // The remaining data chunk does not fill the buffer. We copy the data into + // the buffer but do not trigger a hash step yet. + memcpy(hash_state->buffer, data + copylen, len - copylen); hash_state->buffer_length = len - copylen; } } From e961f7c0e75574b52339ac757bc6f0738cb09ab4 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 21:42:58 -0700 Subject: [PATCH 11/54] Intermediate stage with optional iterator experiments on device. --- .../cudf/column/column_device_view.cuh | 20 +- cpp/src/hash/md5_hash.cu | 287 ++++++++---------- 2 files changed, 134 insertions(+), 173 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 5950edabbfc..42594e70c35 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -565,7 +565,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const + CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this, has_nulls}}; @@ -605,7 +605,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - auto optional_begin(contains_nulls::YES) const + CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::YES) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this}}; @@ -644,7 +644,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - auto optional_begin(contains_nulls::NO) const + CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::NO) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this}}; @@ -1250,9 +1250,12 @@ struct optional_accessor { * @brief constructor * @param[in] _col column device view of cudf column */ - optional_accessor(column_device_view const& _col) : col{_col} + CUDA_HOST_DEVICE_CALLABLE optional_accessor(column_device_view const& _col) : col{_col} { - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); +#ifndef __CUDA_ARCH__ + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), + "Data types do not match."); +#endif } CUDA_DEVICE_CALLABLE @@ -1275,11 +1278,14 @@ struct optional_accessor { * @brief constructor * @param[in] _col column device view of cudf column */ - optional_accessor(column_device_view const& _col, bool with_nulls) + CUDA_HOST_DEVICE_CALLABLE optional_accessor(column_device_view const& _col, bool with_nulls) : col{_col}, has_nulls{with_nulls} { - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); +#ifndef __CUDA_ARCH__ + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), + "Data types do not match."); if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } +#endif } CUDA_DEVICE_CALLABLE diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index fff016022d7..4b8d47713d7 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -15,10 +15,14 @@ */ #include #include +#include #include +#include +#include #include #include #include +#include #include #include @@ -87,11 +91,10 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) * This accepts arbitrary data, handles it as bytes, and calls the hash step * when the buffer is filled up to message_chunk_size bytes. */ -template -void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* hash_state) +void CUDA_DEVICE_CALLABLE md5_process(uint8_t const* data, + uint32_t len, + md5_intermediate_data* hash_state) { - uint32_t constexpr len = sizeof(TKey); - uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; // 64 bytes are processed in each hash step @@ -123,6 +126,14 @@ void CUDA_DEVICE_CALLABLE md5_process(TKey const& key, md5_intermediate_data* ha } } +template +void CUDA_DEVICE_CALLABLE md5_process_fixed_width(T const& key, md5_intermediate_data* hash_state) +{ + uint8_t const* data = reinterpret_cast(&key); + uint32_t constexpr len = sizeof(T); + md5_process(data, len, hash_state); +} + // MD5 supported leaf data type check bool md5_type_check(data_type dt) { @@ -130,91 +141,66 @@ bool md5_type_check(data_type dt) } struct MD5ListHasher { - template ()>* = nullptr> - void __device__ operator()(column_device_view, - size_type, - size_type, - md5_intermediate_data*) const - { - cudf_assert(false && "MD5 Unsupported chrono type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view, - size_type, - size_type, - md5_intermediate_data*) const - { - cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const - { - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) { - md5_process(normalize_nans_and_zeros_helper(data_col.element(i)), hash_state); - } - } - } - - template < - typename T, - std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> + template () && !is_chrono()) || + std::is_same_v)> void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, size_type offset_begin, size_type offset_end, md5_intermediate_data* hash_state) const { - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) md5_process(data_col.element(i), hash_state); + for (size_type i = offset_begin; i < offset_end; i++) { + if (!data_col.is_null(i)) { + auto const key = data_col.element(i); + if constexpr (is_floating_point()) { + md5_process_fixed_width(normalize_nans_and_zeros_helper(key), hash_state); + } else if constexpr (is_fixed_width() && !is_chrono()) { + md5_process_fixed_width(key, hash_state); + } else if constexpr (std::is_same_v) { + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); + md5_process(data, len, hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); + } + } } - } -}; - -template <> -void CUDA_DEVICE_CALLABLE -MD5ListHasher::operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - md5_intermediate_data* hash_state) const -{ - for (int i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) { - string_view key = data_col.element(i); - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - - hash_state->message_length += len; - - // 64 bytes are processed in each hash step - uint32_t constexpr md5_chunk_size = 64; - if (hash_state->buffer_length + len < md5_chunk_size) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - while (len > md5_chunk_size + copylen) { - std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); - md5_hash_step(hash_state); - copylen += md5_chunk_size; + /* + auto const begin = data_col.optional_begin(cudf::contains_nulls::YES{}); + auto const row_begin = begin + offset_begin; + auto const row_end = begin + offset_end; + for (auto it = row_begin; it != row_end; it++) { + auto const element = *it; + if (element) { + if constexpr (is_floating_point()) { + md5_process_fixed_width(normalize_nans_and_zeros_helper(*element), hash_state); + } else if constexpr (is_fixed_width() && !is_chrono()) { + md5_process_fixed_width(*element, hash_state); + } else if constexpr (std::is_same_v) { + string_view const key = *element; + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); + md5_process(data, len, hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); } - - std::memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; } } + */ } -} + + template () || is_chrono()) && + !std::is_same_v)> + void CUDA_DEVICE_CALLABLE + operator()(column_device_view, size_type, size_type, md5_intermediate_data*) const + { + cudf_assert(false && "Unsupported type for hash function."); + } +}; struct MD5Hash { - void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const + void CUDA_DEVICE_CALLABLE finalize(md5_intermediate_data* hash_state, char* result_location) const { auto const full_length = (static_cast(hash_state->message_length)) << 3; thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); @@ -246,92 +232,62 @@ struct MD5Hash { message_length_size); md5_hash_step(hash_state); -#pragma unroll for (int i = 0; i < 4; ++i) uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); } - template ()>* = nullptr> - void __device__ operator()(column_device_view, size_type, md5_intermediate_data*) const - { - cudf_assert(false && "MD5 Unsupported chrono type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view, size_type, md5_intermediate_data*) const - { - cudf_assert(false && "MD5 Unsupported non-fixed-width type column"); - } - - template ()>* = nullptr> - void __device__ operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const - { - md5_process(normalize_nans_and_zeros_helper(col.element(row_index)), hash_state); - } - - template < - typename T, - std::enable_if_t() && !is_floating_point() && !is_chrono()>* = nullptr> + template () && !is_chrono()) || + std::is_same_v)> void CUDA_DEVICE_CALLABLE operator()(column_device_view col, size_type row_index, md5_intermediate_data* hash_state) const { - md5_process(col.element(row_index), hash_state); - } -}; - -template <> -void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state) const -{ - string_view key = col.element(row_index); - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - - hash_state->message_length += len; - - uint32_t constexpr md5_chunk_size = 64; - if (hash_state->buffer_length + len < md5_chunk_size) { - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - std::memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - while (len > md5_chunk_size + copylen) { - std::memcpy(hash_state->buffer, data + copylen, md5_chunk_size); - md5_hash_step(hash_state); - copylen += md5_chunk_size; + auto const key = col.element(row_index); + if constexpr (is_floating_point()) { + md5_process_fixed_width(normalize_nans_and_zeros_helper(key), hash_state); + } else if constexpr (is_fixed_width() && !is_chrono()) { + md5_process_fixed_width(key, hash_state); + } else if constexpr (std::is_same_v) { + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); + md5_process(data, len, hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); } + } - std::memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; + template () || is_chrono()) && + !std::is_same_v)> + void CUDA_DEVICE_CALLABLE operator()(column_device_view, size_type, md5_intermediate_data*) const + { + cudf_assert(false && "Unsupported type for hash function."); } -} +}; template <> void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, size_type row_index, md5_intermediate_data* hash_state) const { - static constexpr size_type offsets_column_index{0}; - static constexpr size_type data_column_index{1}; + /* + // I want to get a lists_column_device_view but that is not constructible on device. + auto const lists_col = lists_column_device_view(col); + auto const data = lists_col.child(); + auto const offsets = lists_col.offsets(); + */ - column_device_view offsets = col.child(offsets_column_index); - column_device_view data = col.child(data_column_index); + // Alternative that works, but reimplements getters from lists_column_device_view: + auto const data = col.child(lists_column_view::child_column_index); + auto const offsets = col.child(lists_column_view::offsets_column_index); if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); - cudf::type_dispatcher(data.type(), - MD5ListHasher{}, - data, - offsets.element(row_index), - offsets.element(row_index + 1), - hash_state); + auto const offset_begin = offsets.element(row_index); + auto const offset_end = offsets.element(row_index + 1); + + cudf::type_dispatcher(data.type(), MD5ListHasher{}, data, offset_begin, offset_end, hash_state); } } // namespace @@ -342,29 +298,32 @@ std::unique_ptr md5_hash(table_view const& input, { if (input.num_columns() == 0 || input.num_rows() == 0) { // Return the MD5 hash of a zero-length input. - const string_scalar string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); - auto output = make_column_from_scalar(string_128bit, input.num_rows(), stream, mr); - return output; + string_scalar const string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); + return make_column_from_scalar(string_128bit, input.num_rows(), stream, mr); } // Accepts string and fixed width columns, or single layer list columns holding those types - CUDF_EXPECTS( - std::all_of(input.begin(), - input.end(), - [](auto col) { - return md5_type_check(col.type()) || - (col.type().id() == type_id::LIST && md5_type_check(col.child(1).type())); - }), - "MD5 unsupported column type"); - + CUDF_EXPECTS(std::all_of(input.begin(), + input.end(), + [](auto const& col) { + if (col.type().id() == type_id::LIST) { + return md5_type_check(lists_column_view(col).child().type()); + } + return md5_type_check(col.type()); + }), + "Unsupported column type for hash function."); + + // Digest size in bytes + auto constexpr digest_size = 32; // Result column allocation and creation - auto begin = thrust::make_constant_iterator(32); + auto begin = thrust::make_constant_iterator(digest_size); auto offsets_column = cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr); - auto chars_column = strings::detail::create_chars_child_column(input.num_rows() * 32, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); + auto chars_column = + strings::detail::create_chars_child_column(input.num_rows() * digest_size, stream, mr); + auto chars_view = chars_column->mutable_view(); + auto d_chars = chars_view.data(); rmm::device_buffer null_mask{0, stream, mr}; @@ -377,17 +336,13 @@ std::unique_ptr md5_hash(table_view const& input, [d_chars, device_input = *device_input] __device__(auto row_index) { md5_intermediate_data hash_state; MD5Hash hasher = MD5Hash{}; - for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (device_input.column(col_index).is_valid(row_index)) { + for (auto const& col : device_input) { + if (col.is_valid(row_index)) { cudf::type_dispatcher( - device_input.column(col_index).type(), - hasher, - device_input.column(col_index), - row_index, - &hash_state); + col.type(), hasher, col, row_index, &hash_state); } } - hasher.finalize(&hash_state, d_chars + (row_index * 32)); + hasher.finalize(&hash_state, d_chars + (row_index * digest_size)); }); return make_strings_column( From 65446ed10b33c56b97678459226f22fd958fd3d3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 21:52:38 -0700 Subject: [PATCH 12/54] Revert changes to column_device_view.cuh. --- .../cudf/column/column_device_view.cuh | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 42594e70c35..5950edabbfc 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -565,7 +565,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const + auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this, has_nulls}}; @@ -605,7 +605,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::YES) const + auto optional_begin(contains_nulls::YES) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this}}; @@ -644,7 +644,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @throws cudf::logic_error if column datatype and Element type mismatch. */ template ())> - CUDA_HOST_DEVICE_CALLABLE auto optional_begin(contains_nulls::NO) const + auto optional_begin(contains_nulls::NO) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this}}; @@ -1250,12 +1250,9 @@ struct optional_accessor { * @brief constructor * @param[in] _col column device view of cudf column */ - CUDA_HOST_DEVICE_CALLABLE optional_accessor(column_device_view const& _col) : col{_col} + optional_accessor(column_device_view const& _col) : col{_col} { -#ifndef __CUDA_ARCH__ - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), - "Data types do not match."); -#endif + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); } CUDA_DEVICE_CALLABLE @@ -1278,14 +1275,11 @@ struct optional_accessor { * @brief constructor * @param[in] _col column device view of cudf column */ - CUDA_HOST_DEVICE_CALLABLE optional_accessor(column_device_view const& _col, bool with_nulls) + optional_accessor(column_device_view const& _col, bool with_nulls) : col{_col}, has_nulls{with_nulls} { -#ifndef __CUDA_ARCH__ - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), - "Data types do not match."); + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } -#endif } CUDA_DEVICE_CALLABLE From b6d166baea4af76e74af290bdfaa7f3fd38198ee Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 22:28:01 -0700 Subject: [PATCH 13/54] Clean up duplication in typed element processing. --- cpp/src/hash/md5_hash.cu | 95 +++++++++++++--------------------------- 1 file changed, 30 insertions(+), 65 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 4b8d47713d7..5b9513ac30d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -17,12 +17,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -91,9 +91,9 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) * This accepts arbitrary data, handles it as bytes, and calls the hash step * when the buffer is filled up to message_chunk_size bytes. */ -void CUDA_DEVICE_CALLABLE md5_process(uint8_t const* data, - uint32_t len, - md5_intermediate_data* hash_state) +void CUDA_DEVICE_CALLABLE md5_process_bytes(uint8_t const* data, + uint32_t len, + md5_intermediate_data* hash_state) { hash_state->message_length += len; @@ -126,14 +126,33 @@ void CUDA_DEVICE_CALLABLE md5_process(uint8_t const* data, } } +/** + * @brief MD5 typed element processor. + * + * This accepts typed data, normalizes it, and performs processing on raw bytes. + */ template -void CUDA_DEVICE_CALLABLE md5_process_fixed_width(T const& key, md5_intermediate_data* hash_state) +void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_state) { - uint8_t const* data = reinterpret_cast(&key); - uint32_t constexpr len = sizeof(T); - md5_process(data, len, hash_state); + if constexpr (is_fixed_width() && !is_chrono()) { + if constexpr (is_floating_point()) { + auto const normalized_key = normalize_nans_and_zeros_helper(key); + uint8_t const* data = reinterpret_cast(&normalized_key); + uint32_t constexpr len = sizeof(T); + md5_process_bytes(data, len, hash_state); + } else { + uint8_t const* data = reinterpret_cast(&key); + uint32_t constexpr len = sizeof(T); + md5_process_bytes(data, len, hash_state); + } + } else if constexpr (std::is_same_v) { + uint8_t const* data = reinterpret_cast(key.data()); + uint32_t len = static_cast(key.size_bytes()); + md5_process_bytes(data, len, hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); + } } - // MD5 supported leaf data type check bool md5_type_check(data_type dt) { @@ -150,43 +169,8 @@ struct MD5ListHasher { md5_intermediate_data* hash_state) const { for (size_type i = offset_begin; i < offset_end; i++) { - if (!data_col.is_null(i)) { - auto const key = data_col.element(i); - if constexpr (is_floating_point()) { - md5_process_fixed_width(normalize_nans_and_zeros_helper(key), hash_state); - } else if constexpr (is_fixed_width() && !is_chrono()) { - md5_process_fixed_width(key, hash_state); - } else if constexpr (std::is_same_v) { - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - md5_process(data, len, hash_state); - } else { - cudf_assert(false && "Unsupported type for hash function."); - } - } - } - /* - auto const begin = data_col.optional_begin(cudf::contains_nulls::YES{}); - auto const row_begin = begin + offset_begin; - auto const row_end = begin + offset_end; - for (auto it = row_begin; it != row_end; it++) { - auto const element = *it; - if (element) { - if constexpr (is_floating_point()) { - md5_process_fixed_width(normalize_nans_and_zeros_helper(*element), hash_state); - } else if constexpr (is_fixed_width() && !is_chrono()) { - md5_process_fixed_width(*element, hash_state); - } else if constexpr (std::is_same_v) { - string_view const key = *element; - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - md5_process(data, len, hash_state); - } else { - cudf_assert(false && "Unsupported type for hash function."); - } - } + if (data_col.is_valid(i)) { md5_process(data_col.element(i), hash_state); } } - */ } template (row_index); - if constexpr (is_floating_point()) { - md5_process_fixed_width(normalize_nans_and_zeros_helper(key), hash_state); - } else if constexpr (is_fixed_width() && !is_chrono()) { - md5_process_fixed_width(key, hash_state); - } else if constexpr (std::is_same_v) { - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); - md5_process(data, len, hash_state); - } else { - cudf_assert(false && "Unsupported type for hash function."); - } + md5_process(col.element(row_index), hash_state); } template (column_device_view col, size_type row_index, md5_intermediate_data* hash_state) const { - /* - // I want to get a lists_column_device_view but that is not constructible on device. - auto const lists_col = lists_column_device_view(col); - auto const data = lists_col.child(); - auto const offsets = lists_col.offsets(); - */ - - // Alternative that works, but reimplements getters from lists_column_device_view: auto const data = col.child(lists_column_view::child_column_index); auto const offsets = col.child(lists_column_view::offsets_column_index); From 60da68411c240ac3228bec0f7a34a60a17752a00 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 22:32:58 -0700 Subject: [PATCH 14/54] More cleanup. --- cpp/src/hash/md5_hash.cu | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 5b9513ac30d..c6b9aea8656 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -136,9 +136,9 @@ void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_ { if constexpr (is_fixed_width() && !is_chrono()) { if constexpr (is_floating_point()) { - auto const normalized_key = normalize_nans_and_zeros_helper(key); - uint8_t const* data = reinterpret_cast(&normalized_key); - uint32_t constexpr len = sizeof(T); + T const normalized_key = normalize_nans_and_zeros_helper(key); + uint8_t const* data = reinterpret_cast(&normalized_key); + uint32_t constexpr len = sizeof(T); md5_process_bytes(data, len, hash_state); } else { uint8_t const* data = reinterpret_cast(&key); @@ -153,6 +153,7 @@ void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_ cudf_assert(false && "Unsupported type for hash function."); } } + // MD5 supported leaf data type check bool md5_type_check(data_type dt) { @@ -211,9 +212,9 @@ struct MD5Hash { thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); } - std::memcpy(hash_state->buffer + md5_chunk_size - message_length_size, - reinterpret_cast(&full_length), - message_length_size); + memcpy(hash_state->buffer + md5_chunk_size - message_length_size, + reinterpret_cast(&full_length), + message_length_size); md5_hash_step(hash_state); for (int i = 0; i < 4; ++i) From 3940daadb2b0755d447fb210e963234dc4793652 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 07:25:19 -0700 Subject: [PATCH 15/54] Simplify message length. --- cpp/src/hash/md5_hash.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c6b9aea8656..e27955904a2 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -187,7 +187,6 @@ struct MD5ListHasher { struct MD5Hash { void CUDA_DEVICE_CALLABLE finalize(md5_intermediate_data* hash_state, char* result_location) const { - auto const full_length = (static_cast(hash_state->message_length)) << 3; thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); // 64 bytes are processed in each hash step @@ -212,6 +211,7 @@ struct MD5Hash { thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); } + uint64_t const full_length = hash_state->message_length * 8; memcpy(hash_state->buffer + md5_chunk_size - message_length_size, reinterpret_cast(&full_length), message_length_size); From 6af54511fb46c5969e329243470b45a6b195fb2e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 07:41:14 -0700 Subject: [PATCH 16/54] Make normalization pass-through for non-floating fixed-width types. --- .../cudf/detail/utilities/hash_functions.cuh | 17 +++++++++-------- cpp/src/hash/md5_hash.cu | 14 ++++---------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index b87c53cc913..3dc46326e26 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -31,16 +31,17 @@ namespace detail { /** * Normalization of floating point NANs and zeros helper */ -template ::value>* = nullptr> -T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros_helper(T key) +template +T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros_helper(T const& key) { - if (isnan(key)) { - return std::numeric_limits::quiet_NaN(); - } else if (key == T{0.0}) { - return T{0.0}; - } else { - return key; + if constexpr (is_floating_point()) { + if (isnan(key)) { + return std::numeric_limits::quiet_NaN(); + } else if (key == T{0.0}) { + return T{0.0}; + } } + return key; } /** diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index e27955904a2..b82f25512e7 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -135,16 +135,10 @@ template void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_state) { if constexpr (is_fixed_width() && !is_chrono()) { - if constexpr (is_floating_point()) { - T const normalized_key = normalize_nans_and_zeros_helper(key); - uint8_t const* data = reinterpret_cast(&normalized_key); - uint32_t constexpr len = sizeof(T); - md5_process_bytes(data, len, hash_state); - } else { - uint8_t const* data = reinterpret_cast(&key); - uint32_t constexpr len = sizeof(T); - md5_process_bytes(data, len, hash_state); - } + T const normalized_key = normalize_nans_and_zeros_helper(key); + uint8_t const* data = reinterpret_cast(&normalized_key); + uint32_t constexpr len = sizeof(T); + md5_process_bytes(data, len, hash_state); } else if constexpr (std::is_same_v) { uint8_t const* data = reinterpret_cast(key.data()); uint32_t len = static_cast(key.size_bytes()); From b33ea933580307b5cdb5ef03185ee332704dad4c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 07:41:25 -0700 Subject: [PATCH 17/54] Prefer char over uint8_t. --- cpp/src/hash/md5_hash.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index b82f25512e7..172dfff173e 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -91,7 +91,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) * This accepts arbitrary data, handles it as bytes, and calls the hash step * when the buffer is filled up to message_chunk_size bytes. */ -void CUDA_DEVICE_CALLABLE md5_process_bytes(uint8_t const* data, +void CUDA_DEVICE_CALLABLE md5_process_bytes(char const* data, uint32_t len, md5_intermediate_data* hash_state) { @@ -136,12 +136,12 @@ void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_ { if constexpr (is_fixed_width() && !is_chrono()) { T const normalized_key = normalize_nans_and_zeros_helper(key); - uint8_t const* data = reinterpret_cast(&normalized_key); + char const* data = reinterpret_cast(&normalized_key); uint32_t constexpr len = sizeof(T); md5_process_bytes(data, len, hash_state); } else if constexpr (std::is_same_v) { - uint8_t const* data = reinterpret_cast(key.data()); - uint32_t len = static_cast(key.size_bytes()); + char const* data = reinterpret_cast(key.data()); + uint32_t len = static_cast(key.size_bytes()); md5_process_bytes(data, len, hash_state); } else { cudf_assert(false && "Unsupported type for hash function."); @@ -207,7 +207,7 @@ struct MD5Hash { uint64_t const full_length = hash_state->message_length * 8; memcpy(hash_state->buffer + md5_chunk_size - message_length_size, - reinterpret_cast(&full_length), + reinterpret_cast(&full_length), message_length_size); md5_hash_step(hash_state); From af42f7eb43a48c51d86fabc4c30a959876e5decc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 08:22:01 -0700 Subject: [PATCH 18/54] Improve helper functions. --- .../cudf/detail/utilities/hash_functions.cuh | 4 +- cpp/src/hash/md5_hash.cu | 46 +++++++++++-------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 3dc46326e26..277a06908c6 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -29,10 +29,10 @@ namespace cudf { namespace detail { /** - * Normalization of floating point NANs and zeros helper + * Normalization of floating point NaNs and zeros, passthrough for all other values. */ template -T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros_helper(T const& key) +T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros(T const& key) { if constexpr (is_floating_point()) { if (isnan(key)) { diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 172dfff173e..258be2a1a8d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -126,6 +126,21 @@ void CUDA_DEVICE_CALLABLE md5_process_bytes(char const* data, } } +template +auto CUDA_DEVICE_CALLABLE get_data(Key const& k) +{ + if constexpr (is_fixed_width() && !is_chrono()) { + return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); + } else { + cudf_assert(false && "Unsupported type."); + } +} + +auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) +{ + return thrust::make_pair(s.data(), s.size_bytes()); +} + /** * @brief MD5 typed element processor. * @@ -134,24 +149,9 @@ void CUDA_DEVICE_CALLABLE md5_process_bytes(char const* data, template void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_state) { - if constexpr (is_fixed_width() && !is_chrono()) { - T const normalized_key = normalize_nans_and_zeros_helper(key); - char const* data = reinterpret_cast(&normalized_key); - uint32_t constexpr len = sizeof(T); - md5_process_bytes(data, len, hash_state); - } else if constexpr (std::is_same_v) { - char const* data = reinterpret_cast(key.data()); - uint32_t len = static_cast(key.size_bytes()); - md5_process_bytes(data, len, hash_state); - } else { - cudf_assert(false && "Unsupported type for hash function."); - } -} - -// MD5 supported leaf data type check -bool md5_type_check(data_type dt) -{ - return !is_chrono(dt) && (is_fixed_width(dt) || (dt.id() == type_id::STRING)); + auto const normalized_key = normalize_nans_and_zeros(key); + auto const [data, size] = get_data(normalized_key); + md5_process_bytes(data, size, hash_state); } struct MD5ListHasher { @@ -250,6 +250,12 @@ void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, cudf::type_dispatcher(data.type(), MD5ListHasher{}, data, offset_begin, offset_end, hash_state); } +// MD5 supported leaf data type check +constexpr inline bool md5_leaf_type_check(data_type dt) +{ + return (is_fixed_width(dt) && !is_chrono(dt)) || dt.id() == type_id::STRING; +} + } // namespace std::unique_ptr md5_hash(table_view const& input, @@ -267,9 +273,9 @@ std::unique_ptr md5_hash(table_view const& input, input.end(), [](auto const& col) { if (col.type().id() == type_id::LIST) { - return md5_type_check(lists_column_view(col).child().type()); + return md5_leaf_type_check(lists_column_view(col).child().type()); } - return md5_type_check(col.type()); + return md5_leaf_type_check(col.type()); }), "Unsupported column type for hash function."); From f3038e1d8ad54034f86683e89aa515c773496e60 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 08:22:07 -0700 Subject: [PATCH 19/54] Refactor finalize. --- cpp/src/hash/md5_hash.cu | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 258be2a1a8d..c22aaaa673d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -181,34 +181,33 @@ struct MD5ListHasher { struct MD5Hash { void CUDA_DEVICE_CALLABLE finalize(md5_intermediate_data* hash_state, char* result_location) const { - thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - // 64 bytes are processed in each hash step constexpr int md5_chunk_size = 64; // 8 bytes for the total message length, appended to the end of the last chunk processed constexpr int message_length_size = 8; // 1 byte for the end of the message flag constexpr int end_of_message_size = 1; - if (hash_state->buffer_length + message_length_size + end_of_message_size <= md5_chunk_size) { - thrust::fill_n( - thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (md5_chunk_size - message_length_size - end_of_message_size - hash_state->buffer_length), - 0x00); + + auto const padding_begin = thrust::fill_n( + thrust::seq, hash_state->buffer + hash_state->buffer_length, end_of_message_size, 0x80); + auto const buffer_end = hash_state->buffer + md5_chunk_size; + auto const message_end = buffer_end - message_length_size; + + if (padding_begin <= message_end) { + // The message size fits in this hash step. Pad up to the point where the message size + // goes with zeros. + thrust::fill(thrust::seq, padding_begin, message_end, 0x00); } else { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (md5_chunk_size - hash_state->buffer_length), - 0x00); + // The message size will be processed in a separate hash step. Pad the remainder of the buffer + // with zeros for this hash step. + thrust::fill(thrust::seq, padding_begin, buffer_end, 0x00); md5_hash_step(hash_state); - - thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); + // Pad up to the point where the message size goes with zeros. + thrust::fill(thrust::seq, hash_state->buffer, message_end, 0x00); } uint64_t const full_length = hash_state->message_length * 8; - memcpy(hash_state->buffer + md5_chunk_size - message_length_size, - reinterpret_cast(&full_length), - message_length_size); + memcpy(message_end, reinterpret_cast(&full_length), message_length_size); md5_hash_step(hash_state); for (int i = 0; i < 4; ++i) From e8c6e3efb637ec1e67abf450f81dd9648840d9d8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 08:23:32 -0700 Subject: [PATCH 20/54] Rename message length variable. --- cpp/src/hash/md5_hash.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c22aaaa673d..95ad5e254d9 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -206,8 +206,9 @@ struct MD5Hash { thrust::fill(thrust::seq, hash_state->buffer, message_end, 0x00); } - uint64_t const full_length = hash_state->message_length * 8; - memcpy(message_end, reinterpret_cast(&full_length), message_length_size); + uint64_t const message_length_in_bits = hash_state->message_length * 8; + memcpy( + message_end, reinterpret_cast(&message_length_in_bits), message_length_size); md5_hash_step(hash_state); for (int i = 0; i < 4; ++i) From c6867e6a21526eda6f825d97d33d60844b4b47a3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 08:34:38 -0700 Subject: [PATCH 21/54] Additional simplifications to finalize. --- cpp/src/hash/md5_hash.cu | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 95ad5e254d9..dd889995852 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -183,15 +183,17 @@ struct MD5Hash { { // 64 bytes are processed in each hash step constexpr int md5_chunk_size = 64; - // 8 bytes for the total message length, appended to the end of the last chunk processed - constexpr int message_length_size = 8; - // 1 byte for the end of the message flag - constexpr int end_of_message_size = 1; + // Add a one bit flag (10000000) to signal the end of the message + uint8_t constexpr end_of_message = 0x80; + // The message length is appended to the end of the last chunk processed + uint64_t const message_length_in_bits = hash_state->message_length * 8; - auto const padding_begin = thrust::fill_n( - thrust::seq, hash_state->buffer + hash_state->buffer_length, end_of_message_size, 0x80); - auto const buffer_end = hash_state->buffer + md5_chunk_size; - auto const message_end = buffer_end - message_length_size; + auto const padding_begin = thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length, + sizeof(end_of_message), + end_of_message); + auto const buffer_end = hash_state->buffer + md5_chunk_size; + auto const message_end = buffer_end - sizeof(message_length_in_bits); if (padding_begin <= message_end) { // The message size fits in this hash step. Pad up to the point where the message size @@ -206,9 +208,7 @@ struct MD5Hash { thrust::fill(thrust::seq, hash_state->buffer, message_end, 0x00); } - uint64_t const message_length_in_bits = hash_state->message_length * 8; - memcpy( - message_end, reinterpret_cast(&message_length_in_bits), message_length_size); + memcpy(message_end, &message_length_in_bits, sizeof(message_length_in_bits)); md5_hash_step(hash_state); for (int i = 0; i < 4; ++i) From 4e8817e6569df97a9a87d1163c1db75f2918783d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 1 Oct 2021 08:47:28 -0700 Subject: [PATCH 22/54] Simplify padding. --- cpp/src/hash/md5_hash.cu | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index dd889995852..d3d22e024d9 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -188,31 +188,29 @@ struct MD5Hash { // The message length is appended to the end of the last chunk processed uint64_t const message_length_in_bits = hash_state->message_length * 8; - auto const padding_begin = thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length, - sizeof(end_of_message), - end_of_message); - auto const buffer_end = hash_state->buffer + md5_chunk_size; - auto const message_end = buffer_end - sizeof(message_length_in_bits); - - if (padding_begin <= message_end) { - // The message size fits in this hash step. Pad up to the point where the message size - // goes with zeros. - thrust::fill(thrust::seq, padding_begin, message_end, 0x00); - } else { + auto padding_begin = thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length, + sizeof(end_of_message), + end_of_message); + auto const buffer_end = hash_state->buffer + md5_chunk_size; + auto const message_end = buffer_end - sizeof(message_length_in_bits); + + if (padding_begin > message_end) { // The message size will be processed in a separate hash step. Pad the remainder of the buffer // with zeros for this hash step. thrust::fill(thrust::seq, padding_begin, buffer_end, 0x00); md5_hash_step(hash_state); - // Pad up to the point where the message size goes with zeros. - thrust::fill(thrust::seq, hash_state->buffer, message_end, 0x00); + padding_begin = hash_state->buffer; } + // Pad up to the point where the message size goes with zeros. + thrust::fill(thrust::seq, padding_begin, message_end, 0x00); memcpy(message_end, &message_length_in_bits, sizeof(message_length_in_bits)); md5_hash_step(hash_state); - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; ++i) { uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); + } } template Date: Mon, 11 Oct 2021 11:49:39 -0700 Subject: [PATCH 23/54] Consolidate MD5 hash constants into .cu file, remove includes. --- .../cudf/detail/utilities/hash_functions.cuh | 1 - cpp/src/hash/hash_constants.hpp | 64 ------------------- cpp/src/hash/md5_hash.cu | 39 ++++++++++- 3 files changed, 38 insertions(+), 66 deletions(-) delete mode 100644 cpp/src/hash/hash_constants.hpp diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 277a06908c6..07d9233dae8 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -21,7 +21,6 @@ #include #include #include -#include using hash_value_type = uint32_t; diff --git a/cpp/src/hash/hash_constants.hpp b/cpp/src/hash/hash_constants.hpp deleted file mode 100644 index 0a5a9e0be93..00000000000 --- a/cpp/src/hash/hash_constants.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -namespace cudf { -namespace detail { - -struct md5_intermediate_data { - uint64_t message_length = 0; - uint32_t buffer_length = 0; - uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; - uint8_t buffer[64]; -}; - -// Type for the shift constants table. -using md5_shift_constants_type = uint32_t; - -__device__ __constant__ md5_shift_constants_type md5_shift_constants[16] = { - 7, - 12, - 17, - 22, - 5, - 9, - 14, - 20, - 4, - 11, - 16, - 23, - 6, - 10, - 15, - 21, -}; - -// Type for the hash constants table. -using md5_hash_constants_type = uint32_t; - -__device__ __constant__ md5_hash_constants_type md5_hash_constants[64] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, -}; -} // namespace detail -} // namespace cudf diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index d3d22e024d9..4febfeac03c 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -35,6 +34,44 @@ namespace cudf { namespace detail { namespace { + +static const __device__ __constant__ uint32_t md5_shift_constants[16] = { + 7, + 12, + 17, + 22, + 5, + 9, + 14, + 20, + 4, + 11, + 16, + 23, + 6, + 10, + 15, + 21, +}; + +static const __device__ __constant__ uint32_t md5_hash_constants[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +struct md5_intermediate_data { + uint64_t message_length = 0; + uint32_t buffer_length = 0; + uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + uint8_t buffer[64]; +}; + /** * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. From 37cb283da0d6d245b01ea0b1899d4fb40b33c9e9 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 12 Oct 2021 14:25:53 -0700 Subject: [PATCH 24/54] Implement hash_circular_buffer. --- cpp/src/hash/md5_hash.cu | 176 +++++++++++++++++++++------------------ 1 file changed, 94 insertions(+), 82 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 4febfeac03c..8f21653d8ae 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -29,6 +29,8 @@ #include #include +#include + namespace cudf { namespace detail { @@ -65,18 +67,77 @@ static const __device__ __constant__ uint32_t md5_hash_constants[64] = { 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, }; -struct md5_intermediate_data { +template +struct hash_circular_buffer { + T storage[capacity]; + T* cur; + + CUDA_DEVICE_CALLABLE hash_circular_buffer() : cur(storage) {} + + CUDA_DEVICE_CALLABLE T* begin() { return storage; } + CUDA_DEVICE_CALLABLE const T* begin() const { return storage; } + + CUDA_DEVICE_CALLABLE T* end() { return &storage[capacity]; } + CUDA_DEVICE_CALLABLE const T* end() const { return &storage[capacity]; } + + CUDA_DEVICE_CALLABLE int size() const + { + return std::distance(begin(), static_cast(cur)); + } + + CUDA_DEVICE_CALLABLE int available_space() const { return capacity - size(); } + + template + CUDA_DEVICE_CALLABLE void put(T const* in, int size, hash_step_callable hash_step) + { + int space = available_space(); + int copy_start = 0; + while (size >= space) { + // The buffer will be filled by this chunk of data. Copy a chunk of the + // data to fill the buffer and trigger a hash step. + memcpy(cur, in + copy_start, space); + hash_step(); + size -= space; + copy_start += space; + cur = begin(); + space = available_space(); + } + // The buffer will not be filled by the remaining data. That is, `size >= 0 + // && size < capacity`. We copy the remaining data into the buffer but do + // not trigger a hash step. + memcpy(cur, in + copy_start, size); + cur += size; + } + + template + CUDA_DEVICE_CALLABLE void pad(int space_to_leave, hash_step_callable hash_step) + { + int space = available_space(); + if (space_to_leave > space) { + memset(cur, 0x00, space); + hash_step(); + cur = begin(); + space = available_space(); + } + memset(cur, 0x00, space - space_to_leave); + cur += space - space_to_leave; + } + + CUDA_DEVICE_CALLABLE T& operator[](size_t idx) { return storage[idx]; } + CUDA_DEVICE_CALLABLE const T& operator[](size_t idx) const { return storage[idx]; } +}; + +struct md5_hash_state { uint64_t message_length = 0; - uint32_t buffer_length = 0; uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; - uint8_t buffer[64]; + hash_circular_buffer buffer; }; /** * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. */ -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state) { uint32_t A = hash_state->hash_value[0]; uint32_t B = hash_state->hash_value[1]; @@ -106,7 +167,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) } uint32_t buffer_element_as_int; - memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); + memcpy(&buffer_element_as_int, &hash_state->buffer[g * 4], 4); F = F + A + md5_hash_constants[j] + buffer_element_as_int; A = D; D = C; @@ -118,8 +179,6 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) hash_state->hash_value[1] += B; hash_state->hash_value[2] += C; hash_state->hash_value[3] += D; - - hash_state->buffer_length = 0; } /** @@ -128,46 +187,20 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state) * This accepts arbitrary data, handles it as bytes, and calls the hash step * when the buffer is filled up to message_chunk_size bytes. */ -void CUDA_DEVICE_CALLABLE md5_process_bytes(char const* data, +void CUDA_DEVICE_CALLABLE md5_process_bytes(uint8_t const* data, uint32_t len, - md5_intermediate_data* hash_state) + md5_hash_state* hash_state) { hash_state->message_length += len; - - // 64 bytes are processed in each hash step - uint32_t constexpr md5_chunk_size = 64; - - if (hash_state->buffer_length + len < md5_chunk_size) { - // The buffer will not be filled by this data. We copy the new data into - // the buffer but do not trigger a hash step yet. - memcpy(hash_state->buffer + hash_state->buffer_length, data, len); - hash_state->buffer_length += len; - } else { - // The buffer will be filled by this data. Copy a chunk of the data to fill - // the buffer and trigger a hash step. - uint32_t copylen = md5_chunk_size - hash_state->buffer_length; - memcpy(hash_state->buffer + hash_state->buffer_length, data, copylen); - md5_hash_step(hash_state); - - // Take buffer-sized chunks of the data and do a hash step on each chunk. - while (len > md5_chunk_size + copylen) { - memcpy(hash_state->buffer, data + copylen, md5_chunk_size); - md5_hash_step(hash_state); - copylen += md5_chunk_size; - } - - // The remaining data chunk does not fill the buffer. We copy the data into - // the buffer but do not trigger a hash step yet. - memcpy(hash_state->buffer, data + copylen, len - copylen); - hash_state->buffer_length = len - copylen; - } + auto hash_step = [hash_state]() { md5_hash_step(hash_state); }; + hash_state->buffer.put(data, len, hash_step); } template auto CUDA_DEVICE_CALLABLE get_data(Key const& k) { if constexpr (is_fixed_width() && !is_chrono()) { - return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); + return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); } else { cudf_assert(false && "Unsupported type."); } @@ -175,7 +208,7 @@ auto CUDA_DEVICE_CALLABLE get_data(Key const& k) auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) { - return thrust::make_pair(s.data(), s.size_bytes()); + return thrust::make_pair(reinterpret_cast(s.data()), s.size_bytes()); } /** @@ -183,8 +216,8 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) * * This accepts typed data, normalizes it, and performs processing on raw bytes. */ -template -void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_state) +template +void CUDA_DEVICE_CALLABLE md5_process(Key const& key, md5_hash_state* hash_state) { auto const normalized_key = normalize_nans_and_zeros(key); auto const [data, size] = get_data(normalized_key); @@ -192,58 +225,37 @@ void CUDA_DEVICE_CALLABLE md5_process(T const& key, md5_intermediate_data* hash_ } struct MD5ListHasher { - template () && !is_chrono()) || - std::is_same_v)> + template void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, size_type offset_begin, size_type offset_end, - md5_intermediate_data* hash_state) const + md5_hash_state* hash_state) const { - for (size_type i = offset_begin; i < offset_end; i++) { - if (data_col.is_valid(i)) { md5_process(data_col.element(i), hash_state); } + if constexpr ((is_fixed_width() && !is_chrono()) || + std::is_same_v) { + for (size_type i = offset_begin; i < offset_end; i++) { + if (data_col.is_valid(i)) { md5_process(data_col.element(i), hash_state); } + } + } else { + cudf_assert(false && "Unsupported type."); } } - - template () || is_chrono()) && - !std::is_same_v)> - void CUDA_DEVICE_CALLABLE - operator()(column_device_view, size_type, size_type, md5_intermediate_data*) const - { - cudf_assert(false && "Unsupported type for hash function."); - } }; struct MD5Hash { - void CUDA_DEVICE_CALLABLE finalize(md5_intermediate_data* hash_state, char* result_location) const + void CUDA_DEVICE_CALLABLE finalize(md5_hash_state* hash_state, char* result_location) const { - // 64 bytes are processed in each hash step - constexpr int md5_chunk_size = 64; // Add a one bit flag (10000000) to signal the end of the message uint8_t constexpr end_of_message = 0x80; // The message length is appended to the end of the last chunk processed uint64_t const message_length_in_bits = hash_state->message_length * 8; - auto padding_begin = thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length, - sizeof(end_of_message), - end_of_message); - auto const buffer_end = hash_state->buffer + md5_chunk_size; - auto const message_end = buffer_end - sizeof(message_length_in_bits); - - if (padding_begin > message_end) { - // The message size will be processed in a separate hash step. Pad the remainder of the buffer - // with zeros for this hash step. - thrust::fill(thrust::seq, padding_begin, buffer_end, 0x00); - md5_hash_step(hash_state); - padding_begin = hash_state->buffer; - } - // Pad up to the point where the message size goes with zeros. - thrust::fill(thrust::seq, padding_begin, message_end, 0x00); - - memcpy(message_end, &message_length_in_bits, sizeof(message_length_in_bits)); - md5_hash_step(hash_state); + auto hash_step = [hash_state]() { md5_hash_step(hash_state); }; + hash_state->buffer.put(&end_of_message, sizeof(end_of_message), hash_step); + hash_state->buffer.pad(sizeof(message_length_in_bits), hash_step); + hash_state->buffer.put(reinterpret_cast(&message_length_in_bits), + sizeof(message_length_in_bits), + hash_step); for (int i = 0; i < 4; ++i) { uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); @@ -255,7 +267,7 @@ struct MD5Hash { std::is_same_v)> void CUDA_DEVICE_CALLABLE operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state) const + md5_hash_state* hash_state) const { md5_process(col.element(row_index), hash_state); } @@ -263,7 +275,7 @@ struct MD5Hash { template () || is_chrono()) && !std::is_same_v)> - void CUDA_DEVICE_CALLABLE operator()(column_device_view, size_type, md5_intermediate_data*) const + void CUDA_DEVICE_CALLABLE operator()(column_device_view, size_type, md5_hash_state*) const { cudf_assert(false && "Unsupported type for hash function."); } @@ -272,7 +284,7 @@ struct MD5Hash { template <> void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state) const + md5_hash_state* hash_state) const { auto const data = col.child(lists_column_view::child_column_index); auto const offsets = col.child(lists_column_view::offsets_column_index); @@ -335,7 +347,7 @@ std::unique_ptr md5_hash(table_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), [d_chars, device_input = *device_input] __device__(auto row_index) { - md5_intermediate_data hash_state; + md5_hash_state hash_state; MD5Hash hasher = MD5Hash{}; for (auto const& col : device_input) { if (col.is_valid(row_index)) { From a3be6583f2ac702887b662fc7299565e741620c2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 10:56:42 -0700 Subject: [PATCH 25/54] Remove non-const accessor. --- cpp/src/hash/md5_hash.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 8f21653d8ae..25435894cb7 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -123,8 +123,7 @@ struct hash_circular_buffer { cur += space - space_to_leave; } - CUDA_DEVICE_CALLABLE T& operator[](size_t idx) { return storage[idx]; } - CUDA_DEVICE_CALLABLE const T& operator[](size_t idx) const { return storage[idx]; } + CUDA_DEVICE_CALLABLE const T& operator[](int idx) const { return storage[idx]; } }; struct md5_hash_state { From 088fa899ace43076229cc38f65e5b8d850741e0c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 11:58:48 -0700 Subject: [PATCH 26/54] Replace SFINAE with constexpr if. --- cpp/src/hash/md5_hash.cu | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 25435894cb7..ea82e0226e1 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -261,22 +261,17 @@ struct MD5Hash { } } - template () && !is_chrono()) || - std::is_same_v)> + template void CUDA_DEVICE_CALLABLE operator()(column_device_view col, size_type row_index, md5_hash_state* hash_state) const { - md5_process(col.element(row_index), hash_state); - } - - template () || is_chrono()) && - !std::is_same_v)> - void CUDA_DEVICE_CALLABLE operator()(column_device_view, size_type, md5_hash_state*) const - { - cudf_assert(false && "Unsupported type for hash function."); + if constexpr ((is_fixed_width() && !is_chrono()) || + std::is_same_v) { + md5_process(col.element(row_index), hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); + } } }; From 331107b626cfcf8d681474678d442f4055a00a41 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 12:55:44 -0700 Subject: [PATCH 27/54] Refactor buffer and hash_step callbacks. --- cpp/src/hash/md5_hash.cu | 54 +++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index ea82e0226e1..97a2b10a49d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -67,28 +67,28 @@ static const __device__ __constant__ uint32_t md5_hash_constants[64] = { 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, }; -template +template struct hash_circular_buffer { - T storage[capacity]; - T* cur; + uint8_t storage[capacity]; + uint8_t* cur; CUDA_DEVICE_CALLABLE hash_circular_buffer() : cur(storage) {} - CUDA_DEVICE_CALLABLE T* begin() { return storage; } - CUDA_DEVICE_CALLABLE const T* begin() const { return storage; } + CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } + CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } - CUDA_DEVICE_CALLABLE T* end() { return &storage[capacity]; } - CUDA_DEVICE_CALLABLE const T* end() const { return &storage[capacity]; } + CUDA_DEVICE_CALLABLE uint8_t* end() { return &storage[capacity]; } + CUDA_DEVICE_CALLABLE const uint8_t* end() const { return &storage[capacity]; } CUDA_DEVICE_CALLABLE int size() const { - return std::distance(begin(), static_cast(cur)); + return std::distance(begin(), static_cast(cur)); } CUDA_DEVICE_CALLABLE int available_space() const { return capacity - size(); } template - CUDA_DEVICE_CALLABLE void put(T const* in, int size, hash_step_callable hash_step) + CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size, hash_step_callable hash_step) { int space = available_space(); int copy_start = 0; @@ -123,13 +123,30 @@ struct hash_circular_buffer { cur += space - space_to_leave; } - CUDA_DEVICE_CALLABLE const T& operator[](int idx) const { return storage[idx]; } + CUDA_DEVICE_CALLABLE const uint8_t& operator[](int idx) const { return storage[idx]; } }; +// Forward declarations +struct md5_hash_state; +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state); + struct md5_hash_state { uint64_t message_length = 0; uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; - hash_circular_buffer buffer; + hash_circular_buffer<64> buffer; + + CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size, bool extend_message_length) + { + auto hash_step = [this]() { md5_hash_step(this); }; + this->buffer.put(in, size, hash_step); + if (extend_message_length) { message_length += size; } + } + + CUDA_DEVICE_CALLABLE void pad(int space_to_leave) + { + auto hash_step = [this]() { md5_hash_step(this); }; + this->buffer.pad(space_to_leave, hash_step); + } }; /** @@ -190,9 +207,7 @@ void CUDA_DEVICE_CALLABLE md5_process_bytes(uint8_t const* data, uint32_t len, md5_hash_state* hash_state) { - hash_state->message_length += len; - auto hash_step = [hash_state]() { md5_hash_step(hash_state); }; - hash_state->buffer.put(data, len, hash_step); + hash_state->put(data, len, true); } template @@ -249,12 +264,11 @@ struct MD5Hash { // The message length is appended to the end of the last chunk processed uint64_t const message_length_in_bits = hash_state->message_length * 8; - auto hash_step = [hash_state]() { md5_hash_step(hash_state); }; - hash_state->buffer.put(&end_of_message, sizeof(end_of_message), hash_step); - hash_state->buffer.pad(sizeof(message_length_in_bits), hash_step); - hash_state->buffer.put(reinterpret_cast(&message_length_in_bits), - sizeof(message_length_in_bits), - hash_step); + hash_state->put(&end_of_message, sizeof(end_of_message), false); + hash_state->pad(sizeof(message_length_in_bits)); + hash_state->put(reinterpret_cast(&message_length_in_bits), + sizeof(message_length_in_bits), + false); for (int i = 0; i < 4; ++i) { uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); From af98bab26c8eaa3cb06ea2d3f9602afa68f1cc61 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 13:09:19 -0700 Subject: [PATCH 28/54] Move processing functions around. --- cpp/src/hash/md5_hash.cu | 72 +++++++++++++++------------------------- 1 file changed, 27 insertions(+), 45 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 97a2b10a49d..fd2ebbe4fbd 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -126,6 +126,21 @@ struct hash_circular_buffer { CUDA_DEVICE_CALLABLE const uint8_t& operator[](int idx) const { return storage[idx]; } }; +template +auto CUDA_DEVICE_CALLABLE get_data(Key const& k) +{ + if constexpr (is_fixed_width() && !is_chrono()) { + return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); + } else { + cudf_assert(false && "Unsupported type."); + } +} + +auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) +{ + return thrust::make_pair(reinterpret_cast(s.data()), s.size_bytes()); +} + // Forward declarations struct md5_hash_state; void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state); @@ -135,18 +150,26 @@ struct md5_hash_state { uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; hash_circular_buffer<64> buffer; - CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size, bool extend_message_length) + void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) { auto hash_step = [this]() { md5_hash_step(this); }; this->buffer.put(in, size, hash_step); if (extend_message_length) { message_length += size; } } - CUDA_DEVICE_CALLABLE void pad(int space_to_leave) + void CUDA_DEVICE_CALLABLE pad(int space_to_leave) { auto hash_step = [this]() { md5_hash_step(this); }; this->buffer.pad(space_to_leave, hash_step); } + + template + void CUDA_DEVICE_CALLABLE process(Key const& key) + { + auto const normalized_key = normalize_nans_and_zeros(key); + auto const [data, size] = get_data(normalized_key); + put(data, size, true); + } }; /** @@ -197,47 +220,6 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state) hash_state->hash_value[3] += D; } -/** - * @brief Core MD5 element processing function - * - * This accepts arbitrary data, handles it as bytes, and calls the hash step - * when the buffer is filled up to message_chunk_size bytes. - */ -void CUDA_DEVICE_CALLABLE md5_process_bytes(uint8_t const* data, - uint32_t len, - md5_hash_state* hash_state) -{ - hash_state->put(data, len, true); -} - -template -auto CUDA_DEVICE_CALLABLE get_data(Key const& k) -{ - if constexpr (is_fixed_width() && !is_chrono()) { - return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); - } else { - cudf_assert(false && "Unsupported type."); - } -} - -auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) -{ - return thrust::make_pair(reinterpret_cast(s.data()), s.size_bytes()); -} - -/** - * @brief MD5 typed element processor. - * - * This accepts typed data, normalizes it, and performs processing on raw bytes. - */ -template -void CUDA_DEVICE_CALLABLE md5_process(Key const& key, md5_hash_state* hash_state) -{ - auto const normalized_key = normalize_nans_and_zeros(key); - auto const [data, size] = get_data(normalized_key); - md5_process_bytes(data, size, hash_state); -} - struct MD5ListHasher { template void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, @@ -248,7 +230,7 @@ struct MD5ListHasher { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { for (size_type i = offset_begin; i < offset_end; i++) { - if (data_col.is_valid(i)) { md5_process(data_col.element(i), hash_state); } + if (data_col.is_valid(i)) { hash_state->process(data_col.element(i)); } } } else { cudf_assert(false && "Unsupported type."); @@ -282,7 +264,7 @@ struct MD5Hash { { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { - md5_process(col.element(row_index), hash_state); + hash_state->process(col.element(row_index)); } else { cudf_assert(false && "Unsupported type for hash function."); } From e604f14f0608670e2ddaba15ab9b4f815a1db827 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 14:51:17 -0700 Subject: [PATCH 29/54] MD5Hasher now owns its hash state. --- cpp/src/hash/md5_hash.cu | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index fd2ebbe4fbd..13fcf6bd97d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -239,21 +239,23 @@ struct MD5ListHasher { }; struct MD5Hash { - void CUDA_DEVICE_CALLABLE finalize(md5_hash_state* hash_state, char* result_location) const + md5_hash_state hash_state; + + void CUDA_DEVICE_CALLABLE finalize(char* result_location) { // Add a one bit flag (10000000) to signal the end of the message uint8_t constexpr end_of_message = 0x80; // The message length is appended to the end of the last chunk processed - uint64_t const message_length_in_bits = hash_state->message_length * 8; + uint64_t const message_length_in_bits = this->hash_state.message_length * 8; - hash_state->put(&end_of_message, sizeof(end_of_message), false); - hash_state->pad(sizeof(message_length_in_bits)); - hash_state->put(reinterpret_cast(&message_length_in_bits), - sizeof(message_length_in_bits), - false); + this->hash_state.put(&end_of_message, sizeof(end_of_message), false); + this->hash_state.pad(sizeof(message_length_in_bits)); + this->hash_state.put(reinterpret_cast(&message_length_in_bits), + sizeof(message_length_in_bits), + false); for (int i = 0; i < 4; ++i) { - uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); + uint32ToLowercaseHexString(this->hash_state.hash_value[i], result_location + (8 * i)); } } @@ -337,15 +339,14 @@ std::unique_ptr md5_hash(table_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), [d_chars, device_input = *device_input] __device__(auto row_index) { - md5_hash_state hash_state; MD5Hash hasher = MD5Hash{}; for (auto const& col : device_input) { if (col.is_valid(row_index)) { cudf::type_dispatcher( - col.type(), hasher, col, row_index, &hash_state); + col.type(), hasher, col, row_index, &hasher.hash_state); } } - hasher.finalize(&hash_state, d_chars + (row_index * digest_size)); + hasher.finalize(d_chars + (row_index * digest_size)); }); return make_strings_column( From 7ff0e7284e10ef4a63a0a2a6f615ef50560b2744 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 21:09:33 -0700 Subject: [PATCH 30/54] Use HasherDispatcher to avoid unexpected memory access error with type dispatch. --- cpp/src/hash/md5_hash.cu | 64 ++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 13fcf6bd97d..37f5638efe1 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -238,7 +238,35 @@ struct MD5ListHasher { } }; -struct MD5Hash { +template +struct HasherDispatcher { + Hasher* hasher; + column_device_view col; + + template + void CUDA_DEVICE_CALLABLE operator()(size_type row_index) const + { + if constexpr ((is_fixed_width() && !is_chrono()) || + std::is_same_v) { + hasher->hash_state.process(col.element(row_index)); + } else if constexpr (std::is_same_v) { + auto const data = col.child(lists_column_view::child_column_index); + auto const offsets = col.child(lists_column_view::offsets_column_index); + + if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); + + auto const offset_begin = offsets.element(row_index); + auto const offset_end = offsets.element(row_index + 1); + + cudf::type_dispatcher( + data.type(), MD5ListHasher{}, data, offset_begin, offset_end, &hasher->hash_state); + } else { + cudf_assert(false && "Unsupported type for hash function."); + } + } +}; + +struct MD5Hasher { md5_hash_state hash_state; void CUDA_DEVICE_CALLABLE finalize(char* result_location) @@ -258,37 +286,8 @@ struct MD5Hash { uint32ToLowercaseHexString(this->hash_state.hash_value[i], result_location + (8 * i)); } } - - template - void CUDA_DEVICE_CALLABLE operator()(column_device_view col, - size_type row_index, - md5_hash_state* hash_state) const - { - if constexpr ((is_fixed_width() && !is_chrono()) || - std::is_same_v) { - hash_state->process(col.element(row_index)); - } else { - cudf_assert(false && "Unsupported type for hash function."); - } - } }; -template <> -void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, - size_type row_index, - md5_hash_state* hash_state) const -{ - auto const data = col.child(lists_column_view::child_column_index); - auto const offsets = col.child(lists_column_view::offsets_column_index); - - if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); - - auto const offset_begin = offsets.element(row_index); - auto const offset_end = offsets.element(row_index + 1); - - cudf::type_dispatcher(data.type(), MD5ListHasher{}, data, offset_begin, offset_end, hash_state); -} - // MD5 supported leaf data type check constexpr inline bool md5_leaf_type_check(data_type dt) { @@ -339,11 +338,12 @@ std::unique_ptr md5_hash(table_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hash hasher = MD5Hash{}; + MD5Hasher hasher{}; for (auto const& col : device_input) { if (col.is_valid(row_index)) { + HasherDispatcher hasher_dispatcher{&hasher, col}; cudf::type_dispatcher( - col.type(), hasher, col, row_index, &hasher.hash_state); + col.type(), hasher_dispatcher, row_index); } } hasher.finalize(d_chars + (row_index * digest_size)); From 167dab3ec035a77672832a162d96f1ed4c475c6a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Oct 2021 21:35:33 -0700 Subject: [PATCH 31/54] Move methods from md5_hash_state to MD5Hasher class. --- cpp/src/hash/md5_hash.cu | 126 ++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 37f5638efe1..67ae19e4172 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -136,52 +137,31 @@ auto CUDA_DEVICE_CALLABLE get_data(Key const& k) } } -auto CUDA_DEVICE_CALLABLE get_data(string_view const& s) +auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) { - return thrust::make_pair(reinterpret_cast(s.data()), s.size_bytes()); + return thrust::make_pair(reinterpret_cast(k.data()), k.size_bytes()); } // Forward declarations struct md5_hash_state; -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state); +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state); struct md5_hash_state { uint64_t message_length = 0; uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; hash_circular_buffer<64> buffer; - - void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) - { - auto hash_step = [this]() { md5_hash_step(this); }; - this->buffer.put(in, size, hash_step); - if (extend_message_length) { message_length += size; } - } - - void CUDA_DEVICE_CALLABLE pad(int space_to_leave) - { - auto hash_step = [this]() { md5_hash_step(this); }; - this->buffer.pad(space_to_leave, hash_step); - } - - template - void CUDA_DEVICE_CALLABLE process(Key const& key) - { - auto const normalized_key = normalize_nans_and_zeros(key); - auto const [data, size] = get_data(normalized_key); - put(data, size, true); - } }; /** * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. */ -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state) +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) { - uint32_t A = hash_state->hash_value[0]; - uint32_t B = hash_state->hash_value[1]; - uint32_t C = hash_state->hash_value[2]; - uint32_t D = hash_state->hash_value[3]; + uint32_t A = hash_state.hash_value[0]; + uint32_t B = hash_state.hash_value[1]; + uint32_t C = hash_state.hash_value[2]; + uint32_t D = hash_state.hash_value[3]; for (unsigned int j = 0; j < 64; j++) { uint32_t F; @@ -206,7 +186,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state) } uint32_t buffer_element_as_int; - memcpy(&buffer_element_as_int, &hash_state->buffer[g * 4], 4); + memcpy(&buffer_element_as_int, &hash_state.buffer[g * 4], 4); F = F + A + md5_hash_constants[j] + buffer_element_as_int; A = D; D = C; @@ -214,26 +194,71 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state* hash_state) B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); } - hash_state->hash_value[0] += A; - hash_state->hash_value[1] += B; - hash_state->hash_value[2] += C; - hash_state->hash_value[3] += D; + hash_state.hash_value[0] += A; + hash_state.hash_value[1] += B; + hash_state.hash_value[2] += C; + hash_state.hash_value[3] += D; } +struct MD5Hasher { + md5_hash_state hash_state; + + public: + template + void CUDA_DEVICE_CALLABLE process(Key const& key) + { + auto const normalized_key = normalize_nans_and_zeros(key); + auto const [data, size] = get_data(normalized_key); + put(data, size, true); + } + + void CUDA_DEVICE_CALLABLE finalize(char* result_location) + { + // Add a one bit flag (10000000) to signal the end of the message + uint8_t constexpr end_of_message = 0x80; + // The message length is appended to the end of the last chunk processed + uint64_t const message_length_in_bits = this->hash_state.message_length * 8; + + put(&end_of_message, sizeof(end_of_message), false); + pad(sizeof(message_length_in_bits)); + put(reinterpret_cast(&message_length_in_bits), + sizeof(message_length_in_bits), + false); + + for (int i = 0; i < 4; ++i) { + uint32ToLowercaseHexString(this->hash_state.hash_value[i], result_location + (8 * i)); + } + } + + private: + void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) + { + auto hash_step = [this]() { md5_hash_step(this->hash_state); }; + this->hash_state.buffer.put(in, size, hash_step); + if (extend_message_length) { this->hash_state.message_length += size; } + } + + void CUDA_DEVICE_CALLABLE pad(int space_to_leave) + { + auto hash_step = [this]() { md5_hash_step(this->hash_state); }; + this->hash_state.buffer.pad(space_to_leave, hash_step); + } +}; + struct MD5ListHasher { template void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, size_type offset_begin, size_type offset_end, - md5_hash_state* hash_state) const + MD5Hasher& hasher) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { for (size_type i = offset_begin; i < offset_end; i++) { - if (data_col.is_valid(i)) { hash_state->process(data_col.element(i)); } + if (data_col.is_valid(i)) { hasher.process(data_col.element(i)); } } } else { - cudf_assert(false && "Unsupported type."); + cudf_assert(false && "Unsupported type for hash function."); } } }; @@ -248,7 +273,7 @@ struct HasherDispatcher { { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { - hasher->hash_state.process(col.element(row_index)); + hasher->process(col.element(row_index)); } else if constexpr (std::is_same_v) { auto const data = col.child(lists_column_view::child_column_index); auto const offsets = col.child(lists_column_view::offsets_column_index); @@ -258,36 +283,13 @@ struct HasherDispatcher { auto const offset_begin = offsets.element(row_index); auto const offset_end = offsets.element(row_index + 1); - cudf::type_dispatcher( - data.type(), MD5ListHasher{}, data, offset_begin, offset_end, &hasher->hash_state); + cudf::type_dispatcher(data.type(), MD5ListHasher{}, data, offset_begin, offset_end, *hasher); } else { cudf_assert(false && "Unsupported type for hash function."); } } }; -struct MD5Hasher { - md5_hash_state hash_state; - - void CUDA_DEVICE_CALLABLE finalize(char* result_location) - { - // Add a one bit flag (10000000) to signal the end of the message - uint8_t constexpr end_of_message = 0x80; - // The message length is appended to the end of the last chunk processed - uint64_t const message_length_in_bits = this->hash_state.message_length * 8; - - this->hash_state.put(&end_of_message, sizeof(end_of_message), false); - this->hash_state.pad(sizeof(message_length_in_bits)); - this->hash_state.put(reinterpret_cast(&message_length_in_bits), - sizeof(message_length_in_bits), - false); - - for (int i = 0; i < 4; ++i) { - uint32ToLowercaseHexString(this->hash_state.hash_value[i], result_location + (8 * i)); - } - } -}; - // MD5 supported leaf data type check constexpr inline bool md5_leaf_type_check(data_type dt) { From a8ac635065f67615b08704f668e45f31dc48b77a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 14 Oct 2021 15:39:35 -0700 Subject: [PATCH 32/54] Improve naming. --- cpp/src/hash/md5_hash.cu | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 67ae19e4172..f099e3d7026 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -78,9 +78,6 @@ struct hash_circular_buffer { CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } - CUDA_DEVICE_CALLABLE uint8_t* end() { return &storage[capacity]; } - CUDA_DEVICE_CALLABLE const uint8_t* end() const { return &storage[capacity]; } - CUDA_DEVICE_CALLABLE int size() const { return std::distance(begin(), static_cast(cur)); @@ -153,7 +150,7 @@ struct md5_hash_state { }; /** - * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, + * @brief Core MD5 algorithm implementation. Processes a single 64-byte chunk, * updating the hash value so far. Does not zero out the buffer contents. */ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) @@ -245,12 +242,13 @@ struct MD5Hasher { } }; -struct MD5ListHasher { +template +struct ListHasherDispatcher { template void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, size_type offset_begin, size_type offset_end, - MD5Hasher& hasher) const + Hasher& hasher) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { @@ -283,7 +281,8 @@ struct HasherDispatcher { auto const offset_begin = offsets.element(row_index); auto const offset_end = offsets.element(row_index + 1); - cudf::type_dispatcher(data.type(), MD5ListHasher{}, data, offset_begin, offset_end, *hasher); + cudf::type_dispatcher( + data.type(), ListHasherDispatcher{}, data, offset_begin, offset_end, *hasher); } else { cudf_assert(false && "Unsupported type for hash function."); } From e8b61dd6349f8bbc12231cf72d0935753a1fb1c1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 13:32:41 -0700 Subject: [PATCH 33/54] Use destructor instead of finalize method. --- cpp/src/hash/md5_hash.cu | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index f099e3d7026..39b1fc7e485 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -199,17 +199,15 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) struct MD5Hasher { md5_hash_state hash_state; + char* result_location; public: - template - void CUDA_DEVICE_CALLABLE process(Key const& key) + CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) + : hash_state(), result_location(result_location) { - auto const normalized_key = normalize_nans_and_zeros(key); - auto const [data, size] = get_data(normalized_key); - put(data, size, true); } - void CUDA_DEVICE_CALLABLE finalize(char* result_location) + CUDA_DEVICE_CALLABLE ~MD5Hasher() { // Add a one bit flag (10000000) to signal the end of the message uint8_t constexpr end_of_message = 0x80; @@ -227,6 +225,19 @@ struct MD5Hasher { } } + MD5Hasher(const MD5Hasher&) = delete; + MD5Hasher& operator=(const MD5Hasher&) = delete; + MD5Hasher(MD5Hasher&&) = delete; + MD5Hasher& operator=(MD5Hasher&&) = delete; + + template + void CUDA_DEVICE_CALLABLE process(Key const& key) + { + auto const normalized_key = normalize_nans_and_zeros(key); + auto const [data, size] = get_data(normalized_key); + put(data, size, true); + } + private: void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) { @@ -339,7 +350,7 @@ std::unique_ptr md5_hash(table_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hasher hasher{}; + MD5Hasher hasher(d_chars + (row_index * digest_size)); for (auto const& col : device_input) { if (col.is_valid(row_index)) { HasherDispatcher hasher_dispatcher{&hasher, col}; @@ -347,7 +358,6 @@ std::unique_ptr md5_hash(table_view const& input, col.type(), hasher_dispatcher, row_index); } } - hasher.finalize(d_chars + (row_index * digest_size)); }); return make_strings_column( From 5150c5e51baaa7db5b1ab47d351d4b960f7a6df2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 13:42:08 -0700 Subject: [PATCH 34/54] Avoid double type dispatch for list columns. --- cpp/src/hash/md5_hash.cu | 60 +++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 39b1fc7e485..7467bbc58dc 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -255,16 +255,16 @@ struct MD5Hasher { template struct ListHasherDispatcher { + Hasher* hasher; + column_device_view col; + template - void CUDA_DEVICE_CALLABLE operator()(column_device_view data_col, - size_type offset_begin, - size_type offset_end, - Hasher& hasher) const + void CUDA_DEVICE_CALLABLE operator()(size_type offset_begin, size_type offset_end) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { for (size_type i = offset_begin; i < offset_end; i++) { - if (data_col.is_valid(i)) { hasher.process(data_col.element(i)); } + if (col.is_valid(i)) { hasher->process(col.element(i)); } } } else { cudf_assert(false && "Unsupported type for hash function."); @@ -283,17 +283,6 @@ struct HasherDispatcher { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { hasher->process(col.element(row_index)); - } else if constexpr (std::is_same_v) { - auto const data = col.child(lists_column_view::child_column_index); - auto const offsets = col.child(lists_column_view::offsets_column_index); - - if (data.type().id() == type_id::LIST) cudf_assert(false && "Nested list unsupported"); - - auto const offset_begin = offsets.element(row_index); - auto const offset_end = offsets.element(row_index + 1); - - cudf::type_dispatcher( - data.type(), ListHasherDispatcher{}, data, offset_begin, offset_end, *hasher); } else { cudf_assert(false && "Unsupported type for hash function."); } @@ -346,19 +335,32 @@ std::unique_ptr md5_hash(table_view const& input, auto const device_input = table_device_view::create(input, stream); // Hash each row, hashing each element sequentially left to right - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hasher hasher(d_chars + (row_index * digest_size)); - for (auto const& col : device_input) { - if (col.is_valid(row_index)) { - HasherDispatcher hasher_dispatcher{&hasher, col}; - cudf::type_dispatcher( - col.type(), hasher_dispatcher, row_index); - } - } - }); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input] __device__(auto row_index) { + MD5Hasher hasher(d_chars + (row_index * digest_size)); + for (auto const& col : device_input) { + if (col.is_valid(row_index)) { + if (col.type().id() == type_id::LIST) { + auto const data_col = col.child(lists_column_view::child_column_index); + auto const offsets = col.child(lists_column_view::offsets_column_index); + if (data_col.type().id() == type_id::LIST) { + cudf_assert(false && "Nested list unsupported"); + } + auto const offset_begin = offsets.element(row_index); + auto const offset_end = offsets.element(row_index + 1); + ListHasherDispatcher list_hasher_dispatcher{&hasher, data_col}; + cudf::type_dispatcher( + data_col.type(), list_hasher_dispatcher, offset_begin, offset_end); + } else { + HasherDispatcher hasher_dispatcher{&hasher, col}; + cudf::type_dispatcher(col.type(), hasher_dispatcher, row_index); + } + } + } + }); return make_strings_column( input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, std::move(null_mask)); From 3715e25675606a032ba9e1d088c3f1f0150ddf8a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 14:29:14 -0700 Subject: [PATCH 35/54] Remove hash_state class. --- cpp/src/hash/md5_hash.cu | 54 ++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 7467bbc58dc..2bf751ff665 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -139,26 +139,17 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) return thrust::make_pair(reinterpret_cast(k.data()), k.size_bytes()); } -// Forward declarations -struct md5_hash_state; -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state); - -struct md5_hash_state { - uint64_t message_length = 0; - uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; - hash_circular_buffer<64> buffer; -}; - /** * @brief Core MD5 algorithm implementation. Processes a single 64-byte chunk, * updating the hash value so far. Does not zero out the buffer contents. */ -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) +template +void CUDA_DEVICE_CALLABLE md5_hash_step(hash_circular_buffer const& buffer, uint32_t* hash_values) { - uint32_t A = hash_state.hash_value[0]; - uint32_t B = hash_state.hash_value[1]; - uint32_t C = hash_state.hash_value[2]; - uint32_t D = hash_state.hash_value[3]; + uint32_t A = hash_values[0]; + uint32_t B = hash_values[1]; + uint32_t C = hash_values[2]; + uint32_t D = hash_values[3]; for (unsigned int j = 0; j < 64; j++) { uint32_t F; @@ -183,7 +174,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) } uint32_t buffer_element_as_int; - memcpy(&buffer_element_as_int, &hash_state.buffer[g * 4], 4); + memcpy(&buffer_element_as_int, &buffer[g * 4], 4); F = F + A + md5_hash_constants[j] + buffer_element_as_int; A = D; D = C; @@ -191,28 +182,27 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_hash_state& hash_state) B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); } - hash_state.hash_value[0] += A; - hash_state.hash_value[1] += B; - hash_state.hash_value[2] += C; - hash_state.hash_value[3] += D; + hash_values[0] += A; + hash_values[1] += B; + hash_values[2] += C; + hash_values[3] += D; } struct MD5Hasher { - md5_hash_state hash_state; char* result_location; + uint64_t message_length = 0; + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + hash_circular_buffer<64> buffer; public: - CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) - : hash_state(), result_location(result_location) - { - } + CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) : result_location(result_location) {} CUDA_DEVICE_CALLABLE ~MD5Hasher() { // Add a one bit flag (10000000) to signal the end of the message uint8_t constexpr end_of_message = 0x80; // The message length is appended to the end of the last chunk processed - uint64_t const message_length_in_bits = this->hash_state.message_length * 8; + uint64_t const message_length_in_bits = message_length * 8; put(&end_of_message, sizeof(end_of_message), false); pad(sizeof(message_length_in_bits)); @@ -221,7 +211,7 @@ struct MD5Hasher { false); for (int i = 0; i < 4; ++i) { - uint32ToLowercaseHexString(this->hash_state.hash_value[i], result_location + (8 * i)); + uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i)); } } @@ -241,15 +231,15 @@ struct MD5Hasher { private: void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) { - auto hash_step = [this]() { md5_hash_step(this->hash_state); }; - this->hash_state.buffer.put(in, size, hash_step); - if (extend_message_length) { this->hash_state.message_length += size; } + auto hash_step = [this]() { md5_hash_step(buffer, hash_values); }; + buffer.put(in, size, hash_step); + if (extend_message_length) { message_length += size; } } void CUDA_DEVICE_CALLABLE pad(int space_to_leave) { - auto hash_step = [this]() { md5_hash_step(this->hash_state); }; - this->hash_state.buffer.pad(space_to_leave, hash_step); + auto hash_step = [this]() { md5_hash_step(buffer, hash_values); }; + buffer.pad(space_to_leave, hash_step); } }; From 677c2fefd36fd38fbf250e1d68c3aa3a55283991 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 16:46:56 -0700 Subject: [PATCH 36/54] Refactoring hash step callback structure. --- cpp/src/hash/md5_hash.cu | 148 ++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 72 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 2bf751ff665..477f547fed1 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -68,12 +68,17 @@ static const __device__ __constant__ uint32_t md5_hash_constants[64] = { 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, }; -template +template struct hash_circular_buffer { uint8_t storage[capacity]; uint8_t* cur; + uint32_t* hash_values; + hash_step_callable hash_step; - CUDA_DEVICE_CALLABLE hash_circular_buffer() : cur(storage) {} + CUDA_DEVICE_CALLABLE hash_circular_buffer(uint32_t* hash_values) + : cur{storage}, hash_values{hash_values} + { + } CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } @@ -85,8 +90,7 @@ struct hash_circular_buffer { CUDA_DEVICE_CALLABLE int available_space() const { return capacity - size(); } - template - CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size, hash_step_callable hash_step) + CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size) { int space = available_space(); int copy_start = 0; @@ -94,7 +98,7 @@ struct hash_circular_buffer { // The buffer will be filled by this chunk of data. Copy a chunk of the // data to fill the buffer and trigger a hash step. memcpy(cur, in + copy_start, space); - hash_step(); + hash_step(storage, hash_values); size -= space; copy_start += space; cur = begin(); @@ -107,13 +111,12 @@ struct hash_circular_buffer { cur += size; } - template - CUDA_DEVICE_CALLABLE void pad(int space_to_leave, hash_step_callable hash_step) + CUDA_DEVICE_CALLABLE void pad(int space_to_leave) { int space = available_space(); if (space_to_leave > space) { memset(cur, 0x00, space); - hash_step(); + hash_step(storage, hash_values); cur = begin(); space = available_space(); } @@ -139,69 +142,20 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) return thrust::make_pair(reinterpret_cast(k.data()), k.size_bytes()); } -/** - * @brief Core MD5 algorithm implementation. Processes a single 64-byte chunk, - * updating the hash value so far. Does not zero out the buffer contents. - */ -template -void CUDA_DEVICE_CALLABLE md5_hash_step(hash_circular_buffer const& buffer, uint32_t* hash_values) -{ - uint32_t A = hash_values[0]; - uint32_t B = hash_values[1]; - uint32_t C = hash_values[2]; - uint32_t D = hash_values[3]; - - for (unsigned int j = 0; j < 64; j++) { - uint32_t F; - uint32_t g; - switch (j / 16) { - case 0: - F = (B & C) | ((~B) & D); - g = j; - break; - case 1: - F = (D & B) | ((~D) & C); - g = (5 * j + 1) % 16; - break; - case 2: - F = B ^ C ^ D; - g = (3 * j + 5) % 16; - break; - case 3: - F = C ^ (B | (~D)); - g = (7 * j) % 16; - break; - } - - uint32_t buffer_element_as_int; - memcpy(&buffer_element_as_int, &buffer[g * 4], 4); - F = F + A + md5_hash_constants[j] + buffer_element_as_int; - A = D; - D = C; - C = B; - B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); - } - - hash_values[0] += A; - hash_values[1] += B; - hash_values[2] += C; - hash_values[3] += D; -} - struct MD5Hasher { - char* result_location; - uint64_t message_length = 0; - uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; - hash_circular_buffer<64> buffer; - public: - CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) : result_location(result_location) {} + CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) + : result_location(result_location), buffer(hash_values) + { + } CUDA_DEVICE_CALLABLE ~MD5Hasher() { - // Add a one bit flag (10000000) to signal the end of the message + // On destruction, finalize the message buffer and write out the current + // hexadecimal hash value to the result location. + // Add a one bit flag (10000000) to signal the end of the message. uint8_t constexpr end_of_message = 0x80; - // The message length is appended to the end of the last chunk processed + // The message length is appended to the end of the last chunk processed. uint64_t const message_length_in_bits = message_length * 8; put(&end_of_message, sizeof(end_of_message), false); @@ -231,16 +185,66 @@ struct MD5Hasher { private: void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) { - auto hash_step = [this]() { md5_hash_step(buffer, hash_values); }; - buffer.put(in, size, hash_step); + buffer.put(in, size); if (extend_message_length) { message_length += size; } } - void CUDA_DEVICE_CALLABLE pad(int space_to_leave) - { - auto hash_step = [this]() { md5_hash_step(buffer, hash_values); }; - buffer.pad(space_to_leave, hash_step); - } + void CUDA_DEVICE_CALLABLE pad(int space_to_leave) { buffer.pad(space_to_leave); } + + /** + * @brief Core MD5 algorithm implementation. Processes a single 64-byte chunk, + * updating the hash value so far. Does not zero out the buffer contents. + */ + struct md5_hash_step { + void CUDA_DEVICE_CALLABLE operator()(const uint8_t* buffer, uint32_t* hash_values) + { + uint32_t A = hash_values[0]; + uint32_t B = hash_values[1]; + uint32_t C = hash_values[2]; + uint32_t D = hash_values[3]; + + for (int j = 0; j < 64; j++) { + uint32_t F; + uint32_t g; + switch (j / 16) { + case 0: + F = (B & C) | ((~B) & D); + g = j; + break; + case 1: + F = (D & B) | ((~D) & C); + g = (5 * j + 1) % 16; + break; + case 2: + F = B ^ C ^ D; + g = (3 * j + 5) % 16; + break; + case 3: + F = C ^ (B | (~D)); + g = (7 * j) % 16; + break; + } + + uint32_t buffer_element_as_int; + memcpy(&buffer_element_as_int, &buffer[g * 4], 4); + F = F + A + md5_hash_constants[j] + buffer_element_as_int; + A = D; + D = C; + C = B; + B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); + } + + hash_values[0] += A; + hash_values[1] += B; + hash_values[2] += C; + hash_values[3] += D; + } + }; + + char* result_location; + hash_circular_buffer<64, md5_hash_step> buffer; + uint64_t message_length = 0; + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; }; template From 720b601d62d6f413e46f4b33369d12b7d3ca0173 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 16:51:40 -0700 Subject: [PATCH 37/54] Remove MD5Hasher.put. --- cpp/src/hash/md5_hash.cu | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 477f547fed1..53aa65e5751 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -158,11 +158,10 @@ struct MD5Hasher { // The message length is appended to the end of the last chunk processed. uint64_t const message_length_in_bits = message_length * 8; - put(&end_of_message, sizeof(end_of_message), false); + buffer.put(&end_of_message, sizeof(end_of_message)); pad(sizeof(message_length_in_bits)); - put(reinterpret_cast(&message_length_in_bits), - sizeof(message_length_in_bits), - false); + buffer.put(reinterpret_cast(&message_length_in_bits), + sizeof(message_length_in_bits)); for (int i = 0; i < 4; ++i) { uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i)); @@ -179,16 +178,11 @@ struct MD5Hasher { { auto const normalized_key = normalize_nans_and_zeros(key); auto const [data, size] = get_data(normalized_key); - put(data, size, true); + buffer.put(data, size); + message_length += size; } private: - void CUDA_DEVICE_CALLABLE put(uint8_t const* in, int size, bool extend_message_length) - { - buffer.put(in, size); - if (extend_message_length) { message_length += size; } - } - void CUDA_DEVICE_CALLABLE pad(int space_to_leave) { buffer.pad(space_to_leave); } /** From 55e4bce241c4c0bfee943d10412a8b52e390fc67 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Oct 2021 16:52:49 -0700 Subject: [PATCH 38/54] Remove MD5Hasher.pad. --- cpp/src/hash/md5_hash.cu | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 53aa65e5751..4a689b54730 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -143,7 +143,6 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) } struct MD5Hasher { - public: CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) : result_location(result_location), buffer(hash_values) { @@ -159,7 +158,7 @@ struct MD5Hasher { uint64_t const message_length_in_bits = message_length * 8; buffer.put(&end_of_message, sizeof(end_of_message)); - pad(sizeof(message_length_in_bits)); + buffer.pad(sizeof(message_length_in_bits)); buffer.put(reinterpret_cast(&message_length_in_bits), sizeof(message_length_in_bits)); @@ -182,9 +181,6 @@ struct MD5Hasher { message_length += size; } - private: - void CUDA_DEVICE_CALLABLE pad(int space_to_leave) { buffer.pad(space_to_leave); } - /** * @brief Core MD5 algorithm implementation. Processes a single 64-byte chunk, * updating the hash value so far. Does not zero out the buffer contents. From 27a8bf0e03f3841f0a103cfed480d35b94391f31 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Oct 2021 11:46:48 -0700 Subject: [PATCH 39/54] Move hash_values to be part of the hash step's state. --- cpp/src/hash/md5_hash.cu | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 4a689b54730..ae8a9f07004 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -72,13 +72,9 @@ template struct hash_circular_buffer { uint8_t storage[capacity]; uint8_t* cur; - uint32_t* hash_values; hash_step_callable hash_step; - CUDA_DEVICE_CALLABLE hash_circular_buffer(uint32_t* hash_values) - : cur{storage}, hash_values{hash_values} - { - } + CUDA_DEVICE_CALLABLE hash_circular_buffer() : cur{storage} {} CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } @@ -98,7 +94,7 @@ struct hash_circular_buffer { // The buffer will be filled by this chunk of data. Copy a chunk of the // data to fill the buffer and trigger a hash step. memcpy(cur, in + copy_start, space); - hash_step(storage, hash_values); + hash_step(storage); size -= space; copy_start += space; cur = begin(); @@ -116,7 +112,7 @@ struct hash_circular_buffer { int space = available_space(); if (space_to_leave > space) { memset(cur, 0x00, space); - hash_step(storage, hash_values); + hash_step(storage); cur = begin(); space = available_space(); } @@ -143,10 +139,7 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) } struct MD5Hasher { - CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) - : result_location(result_location), buffer(hash_values) - { - } + CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) : result_location(result_location) {} CUDA_DEVICE_CALLABLE ~MD5Hasher() { @@ -163,7 +156,7 @@ struct MD5Hasher { sizeof(message_length_in_bits)); for (int i = 0; i < 4; ++i) { - uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i)); + uint32ToLowercaseHexString(buffer.hash_step.hash_values[i], result_location + (8 * i)); } } @@ -186,7 +179,7 @@ struct MD5Hasher { * updating the hash value so far. Does not zero out the buffer contents. */ struct md5_hash_step { - void CUDA_DEVICE_CALLABLE operator()(const uint8_t* buffer, uint32_t* hash_values) + void CUDA_DEVICE_CALLABLE operator()(const uint8_t* buffer) { uint32_t A = hash_values[0]; uint32_t B = hash_values[1]; @@ -229,12 +222,13 @@ struct MD5Hasher { hash_values[2] += C; hash_values[3] += D; } + + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; }; char* result_location; hash_circular_buffer<64, md5_hash_step> buffer; uint64_t message_length = 0; - uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; }; template From e56e1d6e81f0afd71fd00087ff4a42969be1fe14 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Oct 2021 12:10:24 -0700 Subject: [PATCH 40/54] Construct hash step explicitly, use message_chunk_size for buffer sizes. --- cpp/src/hash/md5_hash.cu | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index ae8a9f07004..bb3916fd862 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -74,7 +74,10 @@ struct hash_circular_buffer { uint8_t* cur; hash_step_callable hash_step; - CUDA_DEVICE_CALLABLE hash_circular_buffer() : cur{storage} {} + CUDA_DEVICE_CALLABLE hash_circular_buffer(hash_step_callable hash_step) + : cur{storage}, hash_step{hash_step} + { + } CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } @@ -139,7 +142,12 @@ auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) } struct MD5Hasher { - CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) : result_location(result_location) {} + static constexpr int message_chunk_size = 64; + + CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) + : result_location(result_location), buffer(md5_hash_step{}) + { + } CUDA_DEVICE_CALLABLE ~MD5Hasher() { @@ -179,14 +187,16 @@ struct MD5Hasher { * updating the hash value so far. Does not zero out the buffer contents. */ struct md5_hash_step { - void CUDA_DEVICE_CALLABLE operator()(const uint8_t* buffer) + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + + void CUDA_DEVICE_CALLABLE operator()(const uint8_t (&buffer)[message_chunk_size]) { uint32_t A = hash_values[0]; uint32_t B = hash_values[1]; uint32_t C = hash_values[2]; uint32_t D = hash_values[3]; - for (int j = 0; j < 64; j++) { + for (int j = 0; j < message_chunk_size; j++) { uint32_t F; uint32_t g; switch (j / 16) { @@ -222,12 +232,10 @@ struct MD5Hasher { hash_values[2] += C; hash_values[3] += D; } - - uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; }; char* result_location; - hash_circular_buffer<64, md5_hash_step> buffer; + hash_circular_buffer buffer; uint64_t message_length = 0; }; From 5544d871b9821bea67f3d0f47da9644bae812bf5 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Oct 2021 14:34:52 -0700 Subject: [PATCH 41/54] Make MD5Hash own the hash_values. --- cpp/src/hash/md5_hash.cu | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index bb3916fd862..e3c8ad2c061 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -145,7 +145,7 @@ struct MD5Hasher { static constexpr int message_chunk_size = 64; CUDA_DEVICE_CALLABLE MD5Hasher(char* result_location) - : result_location(result_location), buffer(md5_hash_step{}) + : result_location(result_location), buffer(md5_hash_step{hash_values}) { } @@ -164,7 +164,7 @@ struct MD5Hasher { sizeof(message_length_in_bits)); for (int i = 0; i < 4; ++i) { - uint32ToLowercaseHexString(buffer.hash_step.hash_values[i], result_location + (8 * i)); + uint32ToLowercaseHexString(hash_values[i], result_location + (8 * i)); } } @@ -187,7 +187,7 @@ struct MD5Hasher { * updating the hash value so far. Does not zero out the buffer contents. */ struct md5_hash_step { - uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + uint32_t (&hash_values)[4]; void CUDA_DEVICE_CALLABLE operator()(const uint8_t (&buffer)[message_chunk_size]) { @@ -235,6 +235,7 @@ struct MD5Hasher { }; char* result_location; + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; hash_circular_buffer buffer; uint64_t message_length = 0; }; From ff038cdb33c9a4feecbc7b9857a2e5fc3c4a7366 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 18 Oct 2021 14:37:32 -0700 Subject: [PATCH 42/54] Move variable. --- cpp/src/hash/md5_hash.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index e3c8ad2c061..676f2841263 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -235,9 +235,9 @@ struct MD5Hasher { }; char* result_location; - uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; hash_circular_buffer buffer; uint64_t message_length = 0; + uint32_t hash_values[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; }; template From cc2bf4b94d63689da6ce76a44089f82dbc1cf43b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 18:44:50 -0700 Subject: [PATCH 43/54] Declare HasherDispatchers inline. --- cpp/src/hash/md5_hash.cu | 53 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 676f2841263..a4c80c5c92f 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -322,32 +322,33 @@ std::unique_ptr md5_hash(table_view const& input, auto const device_input = table_device_view::create(input, stream); // Hash each row, hashing each element sequentially left to right - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hasher hasher(d_chars + (row_index * digest_size)); - for (auto const& col : device_input) { - if (col.is_valid(row_index)) { - if (col.type().id() == type_id::LIST) { - auto const data_col = col.child(lists_column_view::child_column_index); - auto const offsets = col.child(lists_column_view::offsets_column_index); - if (data_col.type().id() == type_id::LIST) { - cudf_assert(false && "Nested list unsupported"); - } - auto const offset_begin = offsets.element(row_index); - auto const offset_end = offsets.element(row_index + 1); - ListHasherDispatcher list_hasher_dispatcher{&hasher, data_col}; - cudf::type_dispatcher( - data_col.type(), list_hasher_dispatcher, offset_begin, offset_end); - } else { - HasherDispatcher hasher_dispatcher{&hasher, col}; - cudf::type_dispatcher(col.type(), hasher_dispatcher, row_index); - } - } - } - }); + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input] __device__(auto row_index) { + MD5Hasher hasher(d_chars + (row_index * digest_size)); + for (auto const& col : device_input) { + if (col.is_valid(row_index)) { + if (col.type().id() == type_id::LIST) { + auto const data_col = col.child(lists_column_view::child_column_index); + auto const offsets = col.child(lists_column_view::offsets_column_index); + if (data_col.type().id() == type_id::LIST) { + cudf_assert(false && "Nested list unsupported"); + } + auto const offset_begin = offsets.element(row_index); + auto const offset_end = offsets.element(row_index + 1); + cudf::type_dispatcher( + data_col.type(), + ListHasherDispatcher{&hasher, data_col}, + offset_begin, + offset_end); + } else { + cudf::type_dispatcher( + col.type(), HasherDispatcher{&hasher, col}, row_index); + } + } + } + }); return make_strings_column( input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, std::move(null_mask)); From 9fd47fa71b0e5bc7ff7d073d8097b8a1e89a61c2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 19:01:33 -0700 Subject: [PATCH 44/54] Make HasherDispatcher column_device_view const&. --- cpp/src/hash/md5_hash.cu | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index a4c80c5c92f..af40a6c323d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -241,18 +241,16 @@ struct MD5Hasher { }; template -struct ListHasherDispatcher { +struct HasherDispatcher { Hasher* hasher; - column_device_view col; + column_device_view const& input_col; template - void CUDA_DEVICE_CALLABLE operator()(size_type offset_begin, size_type offset_end) const + void CUDA_DEVICE_CALLABLE operator()(size_type row_index) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { - for (size_type i = offset_begin; i < offset_end; i++) { - if (col.is_valid(i)) { hasher->process(col.element(i)); } - } + hasher->process(input_col.element(row_index)); } else { cudf_assert(false && "Unsupported type for hash function."); } @@ -260,16 +258,18 @@ struct ListHasherDispatcher { }; template -struct HasherDispatcher { +struct ListHasherDispatcher { Hasher* hasher; - column_device_view col; + column_device_view const& input_col; template - void CUDA_DEVICE_CALLABLE operator()(size_type row_index) const + void CUDA_DEVICE_CALLABLE operator()(size_type offset_begin, size_type offset_end) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { - hasher->process(col.element(row_index)); + for (size_type i = offset_begin; i < offset_end; i++) { + if (input_col.is_valid(i)) { hasher->process(input_col.element(i)); } + } } else { cudf_assert(false && "Unsupported type for hash function."); } From f12eaf80107530212d7e5ea686bb6ce78ed99328 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 19:04:19 -0700 Subject: [PATCH 45/54] Parenthesize expression for readability. --- cpp/src/hash/md5_hash.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index af40a6c323d..c5549ebf64a 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -279,7 +279,7 @@ struct ListHasherDispatcher { // MD5 supported leaf data type check constexpr inline bool md5_leaf_type_check(data_type dt) { - return (is_fixed_width(dt) && !is_chrono(dt)) || dt.id() == type_id::STRING; + return (is_fixed_width(dt) && !is_chrono(dt)) || (dt.id() == type_id::STRING); } } // namespace From 2f5c2b6270b77522a825640203d7c8638a1a2109 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 19:06:06 -0700 Subject: [PATCH 46/54] Add const. --- cpp/src/hash/md5_hash.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c5549ebf64a..179ca822c65 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -246,7 +246,7 @@ struct HasherDispatcher { column_device_view const& input_col; template - void CUDA_DEVICE_CALLABLE operator()(size_type row_index) const + void CUDA_DEVICE_CALLABLE operator()(size_type const row_index) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { @@ -263,7 +263,8 @@ struct ListHasherDispatcher { column_device_view const& input_col; template - void CUDA_DEVICE_CALLABLE operator()(size_type offset_begin, size_type offset_end) const + void CUDA_DEVICE_CALLABLE operator()(size_type const offset_begin, + size_type const offset_end) const { if constexpr ((is_fixed_width() && !is_chrono()) || std::is_same_v) { From 25c0e907e332ea54ecccbca77345f52a960ce221 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 19:27:25 -0700 Subject: [PATCH 47/54] Rename Key to Element, get_data to get_element_pointer_and_size. --- cpp/src/hash/md5_hash.cu | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 179ca822c65..81f7fadb282 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -126,19 +126,21 @@ struct hash_circular_buffer { CUDA_DEVICE_CALLABLE const uint8_t& operator[](int idx) const { return storage[idx]; } }; -template -auto CUDA_DEVICE_CALLABLE get_data(Key const& k) +// Get a uint8_t pointer to a column element and its size as a pair. +template +auto CUDA_DEVICE_CALLABLE get_element_pointer_and_size(Element const& element) { - if constexpr (is_fixed_width() && !is_chrono()) { - return thrust::make_pair(reinterpret_cast(&k), sizeof(Key)); + if constexpr (is_fixed_width() && !is_chrono()) { + return thrust::make_pair(reinterpret_cast(&element), sizeof(Element)); } else { cudf_assert(false && "Unsupported type."); } } -auto CUDA_DEVICE_CALLABLE get_data(string_view const& k) +template <> +auto CUDA_DEVICE_CALLABLE get_element_pointer_and_size(string_view const& element) { - return thrust::make_pair(reinterpret_cast(k.data()), k.size_bytes()); + return thrust::make_pair(reinterpret_cast(element.data()), element.size_bytes()); } struct MD5Hasher { @@ -173,12 +175,12 @@ struct MD5Hasher { MD5Hasher(MD5Hasher&&) = delete; MD5Hasher& operator=(MD5Hasher&&) = delete; - template - void CUDA_DEVICE_CALLABLE process(Key const& key) + template + void CUDA_DEVICE_CALLABLE process(Element const& element) { - auto const normalized_key = normalize_nans_and_zeros(key); - auto const [data, size] = get_data(normalized_key); - buffer.put(data, size); + auto const normalized_element = normalize_nans_and_zeros(element); + auto const [element_ptr, size] = get_element_pointer_and_size(normalized_element); + buffer.put(element_ptr, size); message_length += size; } @@ -245,12 +247,12 @@ struct HasherDispatcher { Hasher* hasher; column_device_view const& input_col; - template + template void CUDA_DEVICE_CALLABLE operator()(size_type const row_index) const { - if constexpr ((is_fixed_width() && !is_chrono()) || - std::is_same_v) { - hasher->process(input_col.element(row_index)); + if constexpr ((is_fixed_width() && !is_chrono()) || + std::is_same_v) { + hasher->process(input_col.element(row_index)); } else { cudf_assert(false && "Unsupported type for hash function."); } @@ -262,14 +264,14 @@ struct ListHasherDispatcher { Hasher* hasher; column_device_view const& input_col; - template + template void CUDA_DEVICE_CALLABLE operator()(size_type const offset_begin, size_type const offset_end) const { - if constexpr ((is_fixed_width() && !is_chrono()) || - std::is_same_v) { + if constexpr ((is_fixed_width() && !is_chrono()) || + std::is_same_v) { for (size_type i = offset_begin; i < offset_end; i++) { - if (input_col.is_valid(i)) { hasher->process(input_col.element(i)); } + if (input_col.is_valid(i)) { hasher->process(input_col.element(i)); } } } else { cudf_assert(false && "Unsupported type for hash function."); From b6c4c75e7174f2b88f73458a5e7f4072526759d5 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Oct 2021 19:33:45 -0700 Subject: [PATCH 48/54] Add constructor to HasherDispatcher. --- cpp/src/hash/md5_hash.cu | 62 +++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 81f7fadb282..9b340cb6632 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -247,6 +247,11 @@ struct HasherDispatcher { Hasher* hasher; column_device_view const& input_col; + CUDA_DEVICE_CALLABLE HasherDispatcher(Hasher* hasher, column_device_view const& input_col) + : hasher{hasher}, input_col{input_col} + { + } + template void CUDA_DEVICE_CALLABLE operator()(size_type const row_index) const { @@ -264,6 +269,11 @@ struct ListHasherDispatcher { Hasher* hasher; column_device_view const& input_col; + CUDA_DEVICE_CALLABLE ListHasherDispatcher(Hasher* hasher, column_device_view const& input_col) + : hasher{hasher}, input_col{input_col} + { + } + template void CUDA_DEVICE_CALLABLE operator()(size_type const offset_begin, size_type const offset_end) const @@ -325,33 +335,31 @@ std::unique_ptr md5_hash(table_view const& input, auto const device_input = table_device_view::create(input, stream); // Hash each row, hashing each element sequentially left to right - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input] __device__(auto row_index) { - MD5Hasher hasher(d_chars + (row_index * digest_size)); - for (auto const& col : device_input) { - if (col.is_valid(row_index)) { - if (col.type().id() == type_id::LIST) { - auto const data_col = col.child(lists_column_view::child_column_index); - auto const offsets = col.child(lists_column_view::offsets_column_index); - if (data_col.type().id() == type_id::LIST) { - cudf_assert(false && "Nested list unsupported"); - } - auto const offset_begin = offsets.element(row_index); - auto const offset_end = offsets.element(row_index + 1); - cudf::type_dispatcher( - data_col.type(), - ListHasherDispatcher{&hasher, data_col}, - offset_begin, - offset_end); - } else { - cudf::type_dispatcher( - col.type(), HasherDispatcher{&hasher, col}, row_index); - } - } - } - }); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input] __device__(auto row_index) { + MD5Hasher hasher(d_chars + (row_index * digest_size)); + for (auto const& col : device_input) { + if (col.is_valid(row_index)) { + if (col.type().id() == type_id::LIST) { + auto const data_col = col.child(lists_column_view::child_column_index); + auto const offsets = col.child(lists_column_view::offsets_column_index); + if (data_col.type().id() == type_id::LIST) { + cudf_assert(false && "Nested list unsupported"); + } + auto const offset_begin = offsets.element(row_index); + auto const offset_end = offsets.element(row_index + 1); + cudf::type_dispatcher( + data_col.type(), ListHasherDispatcher(&hasher, data_col), offset_begin, offset_end); + } else { + cudf::type_dispatcher( + col.type(), HasherDispatcher(&hasher, col), row_index); + } + } + } + }); return make_strings_column( input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, std::move(null_mask)); From d7ff9655ef6b2ad7ce4cbe23def3addd80defa50 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 07:38:07 -0700 Subject: [PATCH 49/54] Simplify hash_circular_buffer. --- cpp/src/hash/md5_hash.cu | 41 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 9b340cb6632..dec7671a436 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -72,6 +72,7 @@ template struct hash_circular_buffer { uint8_t storage[capacity]; uint8_t* cur; + int available_space{capacity}; hash_step_callable hash_step; CUDA_DEVICE_CALLABLE hash_circular_buffer(hash_step_callable hash_step) @@ -79,48 +80,38 @@ struct hash_circular_buffer { { } - CUDA_DEVICE_CALLABLE uint8_t* begin() { return storage; } - CUDA_DEVICE_CALLABLE const uint8_t* begin() const { return storage; } - - CUDA_DEVICE_CALLABLE int size() const - { - return std::distance(begin(), static_cast(cur)); - } - - CUDA_DEVICE_CALLABLE int available_space() const { return capacity - size(); } - CUDA_DEVICE_CALLABLE void put(uint8_t const* in, int size) { - int space = available_space(); int copy_start = 0; - while (size >= space) { + while (size >= available_space) { // The buffer will be filled by this chunk of data. Copy a chunk of the // data to fill the buffer and trigger a hash step. - memcpy(cur, in + copy_start, space); + memcpy(cur, in + copy_start, available_space); hash_step(storage); - size -= space; - copy_start += space; - cur = begin(); - space = available_space(); + size -= available_space; + copy_start += available_space; + cur = storage; + available_space = capacity; } // The buffer will not be filled by the remaining data. That is, `size >= 0 // && size < capacity`. We copy the remaining data into the buffer but do // not trigger a hash step. memcpy(cur, in + copy_start, size); cur += size; + available_space -= size; } - CUDA_DEVICE_CALLABLE void pad(int space_to_leave) + CUDA_DEVICE_CALLABLE void pad(int const space_to_leave) { - int space = available_space(); - if (space_to_leave > space) { - memset(cur, 0x00, space); + if (space_to_leave > available_space) { + memset(cur, 0x00, available_space); hash_step(storage); - cur = begin(); - space = available_space(); + cur = storage; + available_space = capacity; } - memset(cur, 0x00, space - space_to_leave); - cur += space - space_to_leave; + memset(cur, 0x00, available_space - space_to_leave); + cur += available_space - space_to_leave; + available_space = space_to_leave; } CUDA_DEVICE_CALLABLE const uint8_t& operator[](int idx) const { return storage[idx]; } From 8cdf423554c5f24bc157f4ce47e7f808f5d58019 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 07:38:23 -0700 Subject: [PATCH 50/54] Reformat shift constants. --- cpp/src/hash/md5_hash.cu | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index dec7671a436..2ed3785d88d 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -39,23 +39,7 @@ namespace detail { namespace { static const __device__ __constant__ uint32_t md5_shift_constants[16] = { - 7, - 12, - 17, - 22, - 5, - 9, - 14, - 20, - 4, - 11, - 16, - 23, - 6, - 10, - 15, - 21, -}; + 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21}; static const __device__ __constant__ uint32_t md5_hash_constants[64] = { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, From e7834dddf8ab2bacb9b5bf9c89bc6648badbe7d2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 07:42:29 -0700 Subject: [PATCH 51/54] Add comment. --- cpp/src/hash/md5_hash.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 2ed3785d88d..fa234c77a73 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -176,6 +176,7 @@ struct MD5Hasher { for (int j = 0; j < message_chunk_size; j++) { uint32_t F; uint32_t g; + // No default case is needed because j < 64. j / 16 is always 0, 1, 2, or 3. switch (j / 16) { case 0: F = (B & C) | ((~B) & D); From 92b329b7f0836786af1289df13833db5674248dc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 09:41:26 -0700 Subject: [PATCH 52/54] Remove unnecessary qualifiers. --- cpp/src/hash/md5_hash.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index fa234c77a73..5203fc8de33 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -38,10 +38,10 @@ namespace detail { namespace { -static const __device__ __constant__ uint32_t md5_shift_constants[16] = { +const __constant__ uint32_t md5_shift_constants[16] = { 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21}; -static const __device__ __constant__ uint32_t md5_hash_constants[64] = { +const __constant__ uint32_t md5_hash_constants[64] = { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, From 410b30ddc2a7f9497373c7b9a5a2f61c976d9455 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 11:23:04 -0700 Subject: [PATCH 53/54] Add comment about constant sources. --- cpp/src/hash/md5_hash.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 5203fc8de33..cf6fc4b4f1e 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -38,6 +38,9 @@ namespace detail { namespace { +// The MD5 algorithm and its hash/shift constants are officially specified in +// RFC 1321. For convenience, these values can also be found on Wikipedia: +// https://en.wikipedia.org/wiki/MD5 const __constant__ uint32_t md5_shift_constants[16] = { 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21}; From c7d132f2ed7477af63ff322693aef83f71835238 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Oct 2021 15:07:35 -0700 Subject: [PATCH 54/54] Clarify flag. --- cpp/src/hash/md5_hash.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index cf6fc4b4f1e..d0e47d93bc6 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -133,7 +133,7 @@ struct MD5Hasher { { // On destruction, finalize the message buffer and write out the current // hexadecimal hash value to the result location. - // Add a one bit flag (10000000) to signal the end of the message. + // Add a one byte flag 0b10000000 to signal the end of the message. uint8_t constexpr end_of_message = 0x80; // The message length is appended to the end of the last chunk processed. uint64_t const message_length_in_bits = message_length * 8;