From b6264323f82c54fad9ee071b786e3a7adced7fb3 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 9 Jun 2020 22:50:00 +0000 Subject: [PATCH 01/17] refactor that details proposed code --- cpp/include/cudf/detail/hashing.hpp | 15 +++++++++++++++ cpp/include/cudf/hashing.hpp | 1 + cpp/include/cudf/types.hpp | 9 +++++++++ cpp/src/hash/hashing.cu | 22 +++++++++++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 3c83db0edba..485eb509175 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -37,9 +37,24 @@ std::pair, std::vector> hash_partition( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr hash(table_view const& input, + hash_id hash_function = hash_id::HASH_MURMUR3, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), cudaStream_t stream = 0); +std::unique_ptr identity_hash(table_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); + +std::unique_ptr murmur_hash3_32(table_view const& input, + std::vector const& initial_hash = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); + +std::unique_ptr md5_hash(table_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); + + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index cc532e377e7..c7209f070d4 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -35,6 +35,7 @@ namespace cudf { * @returns A column where each row is the hash of a column from the input */ std::unique_ptr hash(table_view const& input, + hash_id hash_function = hash_id::HASH_MURMUR3, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 5bce5dc393c..13b5f39a28b 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -269,5 +269,14 @@ inline bool operator==(data_type const& lhs, data_type const& rhs) { return lhs. */ std::size_t size_of(data_type t); +/** + * @brief Identifies the hash function to be used + */ +enum class hash_id { + HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed + HASH_MURMUR3, ///< Murmur3 hash function + HASH_MD5 ///< MD5 hash function +}; + /** @} */ } // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 756b99eb011..fb40de7d8e1 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -634,9 +635,27 @@ std::pair, std::vector> hash_partition( } std::unique_ptr hash(table_view const& input, + hash_id hash_function, std::vector const& initial_hash, rmm::mr::device_memory_resource* mr, cudaStream_t stream) +{ + switch(hash_function) { + // case(hash_id::HASH_IDENTITY) : + // return identity_hash(input); + case(hash_id::HASH_MURMUR3) : + return murmur_hash3_32(input, initial_hash, mr, stream); + // case(hash_id::HASH_MD5) : + // return md5_hash(input, mr, stream); + default : + return NULL; + } +} + +std::unique_ptr murmur_hash3_32(table_view const& input, + std::vector const& initial_hash, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) { // TODO this should be UINT32 auto output = make_numeric_column( @@ -688,11 +707,12 @@ std::unique_ptr hash(table_view const& input, } // namespace detail std::unique_ptr hash(table_view const& input, + hash_id hash_function, std::vector const& initial_hash, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hash(input, initial_hash, mr); + return detail::hash(input, hash_function, initial_hash, mr); } } // namespace cudf From 0d3845c072d985885bbd40aabf554f8edbc47a74 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 7 Jul 2020 10:28:40 +0000 Subject: [PATCH 02/17] Initial MD5 implementation --- cpp/CMakeLists.txt | 1 + .../cudf/detail/utilities/hash_functions.cuh | 215 +++++++++++++++++- cpp/src/hash/hash_constants.cu | 96 ++++++++ cpp/src/hash/hash_constants.cuh | 22 ++ cpp/src/hash/hash_constants.hpp | 74 ++++++ cpp/src/hash/hashing.cu | 96 +++++++- cpp/tests/hashing/hash_test.cpp | 172 +++++++++++++- 7 files changed, 669 insertions(+), 7 deletions(-) create mode 100644 cpp/src/hash/hash_constants.cu create mode 100644 cpp/src/hash/hash_constants.cuh create mode 100644 cpp/src/hash/hash_constants.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3f14909ad38..4141ab2820a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,6 +423,7 @@ add_library(cudf src/stream_compaction/drop_duplicates.cu src/datetime/datetime_ops.cu src/hash/hashing.cu + src/hash/hash_constants.cu src/partitioning/partitioning.cu src/quantiles/quantile.cu src/quantiles/quantiles.cu diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 8f26690c835..9d145fd20c8 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,221 @@ #pragma once +#include #include +#include + +#include "cuda_runtime_api.h" +#include "cudf/types.hpp" +#include "driver_types.h" +#include "vector_types.h" using hash_value_type = uint32_t; +namespace cudf { +namespace detail { + + /** + * @brief Helper function, left rotate bit value the value n bits + */ + CUDA_HOST_DEVICE_CALLABLE uint32_t left_rotate(uint32_t value, uint32_t shift) + { + return (value << shift) | (value >> (32-shift)); + } + + /** + * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk, + * updating the hash value so far. Does not zero out the buffer contents. + */ + void CUDA_HOST_DEVICE_CALLABLE + md5_hash_step(md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) + { + uint32_t A = hash_state->hash_value[0]; + uint32_t B = hash_state->hash_value[1]; + uint32_t C = hash_state->hash_value[2]; + uint32_t D = hash_state->hash_value[3]; + + uint32_t* buffer_ints = (uint32_t*)hash_state->buffer; + + for(unsigned int j = 0; j < 64; j++) { + uint32_t F, g; + switch(j / 16) { + case 0 : + F = (B & C) | ((~B)&D); // D ^ (B & (C ^ D)) + g = j; + break; + case 1 : + F = (D & B) | ((~D)&C); + g = (5 * j + 1) % 16; + break; + case 2 : + F = B ^ C ^ D; + g = (3 * j + 5) % 16; + break; + case 3 : + F = C ^ (B | (~D)); + g = (7 * j) % 16; + break; + } + + F = F + A + hash_constants[j] + buffer_ints[g]; + + A = D; + D = C; + C = B; + B = B + left_rotate(F, shift_constants[((j/16) * 4) + (j%4)]); + } + + hash_state->hash_value[0] += A; + hash_state->hash_value[1] += B; + hash_state->hash_value[2] += C; + hash_state->hash_value[3] += D; + + hash_state->buffer_length = 0; + } + + template + struct MD5Hash { + using argument_type = Key; + + /** + * @brief Core MD5 element processing function + */ + template + void CUDA_HOST_DEVICE_CALLABLE + process(TKey const& key, + const uint32_t len, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + uint8_t* data = (uint8_t*)&key; + hash_state->message_length += len; + + if(hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length); + md5_hash_step(hash_state, hash_constants, shift_constants); + + while(len > 64 + copylen) { + thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer); + md5_hash_step(hash_state, hash_constants, shift_constants); + copylen += 64; + } + + thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer); + hash_state->buffer_length = len-copylen; + } + } + + template ()>* = nullptr> + void CUDA_HOST_DEVICE_CALLABLE + operator()(T const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + process(key, size_of(key), hash_state, hash_constants, shift_constants); + } + + template ()>* = nullptr> + void CUDA_HOST_DEVICE_CALLABLE + operator()(T const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + CUDF_FAIL("Unsupported hash type"); + } + + void CUDA_HOST_DEVICE_CALLABLE + operator()(Key const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const {} + }; + + /** + * @brief Specialization of MD5Hash operator for strings. + */ + template <> + void CUDA_HOST_DEVICE_CALLABLE + MD5Hash::operator()(cudf::string_view const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + const uint32_t len = (uint32_t)key.size_bytes(); + const uint8_t* data = (const uint8_t*)key.data(); + + hash_state->message_length += len; + + if(hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length); + md5_hash_step(hash_state, hash_constants, shift_constants); + + while(len > 64 + copylen) { + thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer); + md5_hash_step(hash_state, hash_constants, shift_constants); + copylen += 64; + } + + thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer); + hash_state->buffer_length = len-copylen; + } + } + + /** + * @brief Finalize MD5 hash including converstion to hex string. + */ + void CUDA_HOST_DEVICE_CALLABLE + finalize_md5_hash(md5_intermediate_data* hash_state, + char* result_location, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants, + const hex_to_char_mapping_type* hex_char_map) + { + uint64_t full_length = (uint64_t)hash_state->message_length; + full_length = full_length << 3; + thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length, 1, 0x80); + + if(hash_state->buffer_length <= 55) { + thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1, + (55 - hash_state->buffer_length), 0x00); + } else { + thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1, + (64 - hash_state->buffer_length), 0x00); + md5_hash_step(hash_state, hash_constants, shift_constants); + + thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + } + + thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer+56); + md5_hash_step(hash_state, hash_constants, shift_constants); + + u_char final_hash[32]; + uint8_t* hash_result = (uint8_t*)hash_state->hash_value; + for(int i = 0; i < 16; i++) { + final_hash[i*2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; + final_hash[i*2+1] = hex_char_map[hash_result[i] & 0xf]; + } + + thrust::copy_n(thrust::seq, final_hash, 32, result_location); + } + +} // namespace detail +} // namespace cudf + // MurmurHash3_32 implementation from // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp //----------------------------------------------------------------------------- @@ -250,4 +461,4 @@ struct IdentityHash { }; template -using default_hash = MurmurHash3_32; +using default_hash = MurmurHash3_32; \ No newline at end of file diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu new file mode 100644 index 00000000000..648b8b65fe3 --- /dev/null +++ b/cpp/src/hash/hash_constants.cu @@ -0,0 +1,96 @@ +/* Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "./hash_constants.hpp" + +#include + +namespace cudf { +namespace detail { + +const hex_to_char_mapping_type g_hex_to_char_mapping[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +}; +const md5_shift_constants_type g_md5_shift_constants[] = { + 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23 , 6, 10, 15, 21, +}; + +const md5_hash_constants_type g_md5_hash_constants[] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +std::mutex g_hex_to_char_mapping_mutex; +std::mutex g_md5_hash_constants_mutex; +std::mutex g_md5_shift_constants_mutex; + +hex_to_char_mapping_type* d_hex_to_char_mapping = nullptr; +md5_hash_constants_type* d_md5_hash_constants = nullptr; +md5_shift_constants_type* d_md5_shift_constants = nullptr; + +__device__ hex_to_char_mapping_type hex_to_char_mapping[sizeof(g_hex_to_char_mapping)]; +__device__ md5_hash_constants_type md5_hash_constants[sizeof(g_md5_hash_constants)]; +__device__ md5_shift_constants_type md5_shift_constants[sizeof(g_md5_shift_constants)]; + +/** + * @copydoc cudf::detail::get_hex_to_char_mapping + */ +const hex_to_char_mapping_type* get_hex_to_char_mapping() +{ + std::lock_guard guard(g_hex_to_char_mapping_mutex); + if (!d_hex_to_char_mapping) { + CUDA_TRY(cudaMemcpyToSymbol( + hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping))); + CUDA_TRY(cudaGetSymbolAddress((void**)&d_hex_to_char_mapping, hex_to_char_mapping)); + } + return d_hex_to_char_mapping; +} + +/** + * @copydoc cudf::detail::get_md5_hash_constants + */ +const md5_hash_constants_type* get_md5_hash_constants() +{ + std::lock_guard guard(g_md5_hash_constants_mutex); + if (!d_md5_hash_constants) { + CUDA_TRY(cudaMemcpyToSymbol( + md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants))); + CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_hash_constants, md5_hash_constants)); + } + return d_md5_hash_constants; +} + +/** + * @copydoc cudf::detail::get_md5_shift_constants + */ +const md5_shift_constants_type* get_md5_shift_constants() +{ + std::lock_guard guard(g_md5_shift_constants_mutex); + if (!d_md5_shift_constants) { + CUDA_TRY(cudaMemcpyToSymbol( + md5_shift_constants, g_md5_shift_constants, sizeof(g_md5_shift_constants))); + CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_shift_constants, md5_shift_constants)); + } + return d_md5_shift_constants; +} + +} // namespace detail +} // namespace cudf \ No newline at end of file diff --git a/cpp/src/hash/hash_constants.cuh b/cpp/src/hash/hash_constants.cuh new file mode 100644 index 00000000000..f7cec3f0652 --- /dev/null +++ b/cpp/src/hash/hash_constants.cuh @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +namespace cudf { +namespace detail { + +} // namespace detail +} // namespace cudf \ No newline at end of file diff --git a/cpp/src/hash/hash_constants.hpp b/cpp/src/hash/hash_constants.hpp new file mode 100644 index 00000000000..f6d9b195e3f --- /dev/null +++ b/cpp/src/hash/hash_constants.hpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +namespace cudf { +namespace detail { + +struct md5_intermediate_data{ + uint64_t message_length = 0; + uint32_t buffer_length = 0; + uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + uint8_t buffer[64]; +}; + +// Type for the shift constants table. +using md5_shift_constants_type = uint32_t; + +/** + * @brief Returns pointer to device memory that contains the static + * md5 shift constants table. On first call, this will copy the table into + * device memory and is guaranteed to be thread-safe. + * + * This table is used in the MD5 hash to lookup the number of bits + * to rotate left during each hash iteration. + * + * @return Device memory pointer to the MD5 shift constants table. + */ +const md5_shift_constants_type* get_md5_shift_constants(); + +// Type for the hash constants table. +using md5_hash_constants_type = uint32_t; + +/** + * @brief Returns pointer to device memory that contains the static + * md5 hash constants table. On first call, this will copy the table into + * device memory and is guaranteed to be thread-safe. + * + * This table is used in the MD5 hash to lookup values added to + * the hash during each hash iteration. + * + * @return Device memory pointer to the MD5 hash constants table. + */ +const md5_hash_constants_type* get_md5_hash_constants(); + +// Type for the hexidecimal character mapping. +using hex_to_char_mapping_type = char; + +/** + * @brief Returns pointer to device memory that contains the static + * hexidecimal character map. On first call, this will copy the table into + * device memory and is guaranteed to be thread-safe. + * + * This table is used to lookup the corresponding hex character when + * translating the hash into a hexidecimal string. + * + * @return Device memory pointer to the hex to char map. + */ +const hex_to_char_mapping_type* get_hex_to_char_mapping(); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index fb40de7d8e1..b0acbc1aaa4 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include #include @@ -26,6 +27,7 @@ #include #include +#include #include namespace cudf { @@ -641,17 +643,103 @@ std::unique_ptr hash(table_view const& input, cudaStream_t stream) { switch(hash_function) { - // case(hash_id::HASH_IDENTITY) : - // return identity_hash(input); case(hash_id::HASH_MURMUR3) : return murmur_hash3_32(input, initial_hash, mr, stream); - // case(hash_id::HASH_MD5) : - // return md5_hash(input, mr, stream); + case(hash_id::HASH_MD5) : + return md5_hash(input, mr, stream); default : return NULL; } } +/** + * @brief Updates the MD5 hash value with an element in the given column. + * + * @tparam has_nulls Indicates the potential for null values in the column. + **/ + template + class md5_element_hasher { + public: + template + __device__ inline void operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) + { + if (!has_nulls || col.is_valid(row_index)) { + MD5Hash{}(col.element(row_index), hash_state, hash_constants, shift_constants); + } + } +}; + +std::unique_ptr md5_hash(table_view const& input, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + if(input.num_columns() == 0 || input.num_rows() == 0) { + const string_scalar string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); + auto output = make_column_from_scalar(string_128bit, input.num_rows(), mr, stream); + return output; + } + + // Result column allocation and creation + auto transformer = [] __device__(size_type idx) { return 32; }; + auto begin = thrust::make_transform_iterator(thrust::make_counting_iterator(0), transformer); + auto offsets_column = + cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), mr, stream); + auto offsets_view = offsets_column->view(); + auto d_new_offsets = offsets_view.data(); + + auto chars_column = + strings::detail::create_chars_child_column(input.num_rows(), 0, input.num_rows()*32, mr, stream); + auto chars_view = chars_column->mutable_view(); + auto d_chars = chars_view.data(); + + rmm::device_buffer null_mask{0, stream, mr}; + + bool const nullable = has_nulls(input); + auto const device_input = table_device_view::create(input, stream); + + // Fetch hash constants + const md5_shift_constants_type* shift_constants = get_md5_shift_constants(); + const md5_hash_constants_type* hash_constants = get_md5_hash_constants(); + const hex_to_char_mapping_type* hex_char_map = get_hex_to_char_mapping(); + + // Hash each row, hashing each element sequentially left to right + thrust::for_each( + rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input, hash_constants = hash_constants, shift_constants = shift_constants, hex_char_map = hex_char_map, has_nulls = nullable] __device__ (auto row_index) { + md5_intermediate_data hash_state; + for(int col_index = 0; col_index < device_input.num_columns(); col_index++) { + if (!has_nulls) { + cudf::type_dispatcher( + device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, &hash_state, hash_constants, shift_constants); + } else { + cudf::type_dispatcher( + device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, &hash_state, hash_constants, shift_constants); + } + } + finalize_md5_hash(&hash_state, d_chars + (row_index*32), hash_constants, shift_constants, hex_char_map); + }); + + return make_strings_column(input.num_rows(), + std::move(offsets_column), + std::move(chars_column), + 0, + std::move(null_mask), + stream, + mr); +} + std::unique_ptr murmur_hash3_32(table_view const& input, std::vector const& initial_hash, rmm::mr::device_memory_resource* mr, diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index 86d62c537f7..37ffdbbd6bb 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,4 +176,174 @@ TYPED_TEST(HashTestFloatTyped, TestExtremes) expect_columns_equal(output1->view(), output2->view(), true); } +class MD5HashTest : public cudf::test::BaseFixture { +}; + +TEST_F(MD5HashTest, MultiValue) +{ + strings_column_wrapper const strings_col({"", + "A 60 character string to test MD5's message padding algorithm", + "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the MD5 hash function. This string needed to be longer.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}); + + strings_column_wrapper const md5_string_results1({"d41d8cd98f00b204e9800998ecf8427e", + "682240021651ae166d08fe2a014d5c09", + "3669d5225fddbb34676312ca3b78bbd9", + "c61a4185135eda043f35e92c3505e180", + "52da74c75cb6575d25be29e66bd0adde"}); + + strings_column_wrapper const md5_string_results2({"d41d8cd98f00b204e9800998ecf8427e", + "e5a5682e82278e78dbaad9a689df7a73", + "4121ab1bb6e84172fd94822645862ae9", + "28970886501efe20164213855afe5850", + "6bc1b872103cc6a02d882245b8516e2e"}); + + using limits = std::numeric_limits; + fixed_width_column_wrapper const ints_col({0, 100, -100, limits::min(), limits::max()}); + + // Different truth values should be equal + fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); + fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); + + using ts = cudf::timestamp_s; + fixed_width_column_wrapper const secs_col({ts::duration::zero(), + static_cast(100), + static_cast(-100), + ts::duration::min(), + ts::duration::max()}); + + auto const string_input1 = cudf::table_view({strings_col}); + auto const string_input2 = cudf::table_view({strings_col, strings_col}); + auto const md5_string_output1 = cudf::hash(string_input1, cudf::hash_id::HASH_MD5); + auto const md5_string_output2 = cudf::hash(string_input2, cudf::hash_id::HASH_MD5); + EXPECT_EQ(string_input1.num_rows(), md5_string_output1->size()); + EXPECT_EQ(string_input2.num_rows(), md5_string_output2->size()); + expect_columns_equal(md5_string_output1->view(), md5_string_results1); + expect_columns_equal(md5_string_output2->view(), md5_string_results2); + + auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col}); + auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col}); + auto const md5_output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); + auto const md5_output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); + EXPECT_EQ(input1.num_rows(), md5_output1->size()); + expect_columns_equal(md5_output1->view(), md5_output2->view()); +} + +TEST_F(MD5HashTest, MultiValueNulls) +{ + // Nulls with different values should be equal + strings_column_wrapper const strings_col1({"", + "Different but null!", + "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the MD5 hash function. This string needed to be longer.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}, + {1, 0, 0, 1, 0}); + strings_column_wrapper const strings_col2({"", + "A 60 character string to test MD5's message padding algorithm", + "Very different... but null", + "All work and no play makes Jack a dull boy", + ""}, + {1, 0, 0, 1, 1}); //empty string is equivalent to null + + // Nulls with different values should be equal + using limits = std::numeric_limits; + fixed_width_column_wrapper const ints_col1({0, 100, -100, limits::min(), limits::max()}, + {1, 0, 0, 1, 1}); + fixed_width_column_wrapper const ints_col2({0, -200, 200, limits::min(), limits::max()}, + {1, 0, 0, 1, 1}); + + // Nulls with different values should be equal + // Different truthy values should be equal + fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); + fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); + + // Nulls with different values should be equal + using ts = cudf::timestamp_s; + fixed_width_column_wrapper const secs_col1({ts::duration::zero(), + static_cast(100), + static_cast(-100), + ts::duration::min(), + ts::duration::max()}, + {1, 0, 0, 1, 1}); + fixed_width_column_wrapper const secs_col2({ts::duration::zero(), + static_cast(-200), + static_cast(200), + ts::duration::min(), + ts::duration::max()}, + {1, 0, 0, 1, 1}); + + auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1}); + auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2}); + + auto const output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); + auto const output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); + + EXPECT_EQ(input1.num_rows(), output1->size()); + expect_columns_equal(output1->view(), output2->view(), true); +} + +template +class MD5HashTestTyped : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(MD5HashTestTyped, cudf::test::FixedWidthTypes); + +TYPED_TEST(MD5HashTestTyped, Equality) +{ + fixed_width_column_wrapper const col({0, 127, 1, 2, 8}); + auto const input = cudf::table_view({col}); + + // Hash of same input should be equal + auto const output1 = cudf::hash(input, cudf::hash_id::HASH_MD5); + auto const output2 = cudf::hash(input, cudf::hash_id::HASH_MD5); + + EXPECT_EQ(input.num_rows(), output1->size()); + expect_columns_equal(output1->view(), output2->view()); +} + +TYPED_TEST(MD5HashTestTyped, EqualityNulls) +{ + using T = TypeParam; + + // Nulls with different values should be equal + fixed_width_column_wrapper const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + fixed_width_column_wrapper const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1}); + + auto const input1 = cudf::table_view({col1}); + auto const input2 = cudf::table_view({col2}); + + auto const output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); + auto const output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); + + EXPECT_EQ(input1.num_rows(), output1->size()); + expect_columns_equal(output1->view(), output2->view()); +} + +template +class MD5HashTestFloatTyped : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(MD5HashTestFloatTyped, cudf::test::FloatingPointTypes); + +TYPED_TEST(MD5HashTestFloatTyped, TestExtremes) +{ + using T = TypeParam; + T min = std::numeric_limits::min(); + T max = std::numeric_limits::max(); + T nan = std::numeric_limits::quiet_NaN(); + T inf = std::numeric_limits::infinity(); + + fixed_width_column_wrapper const col1({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); + fixed_width_column_wrapper const col2({T(-0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf}); + + auto const input1 = cudf::table_view({col1}); + auto const input2 = cudf::table_view({col2}); + + auto const output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); + auto const output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); + + expect_columns_equal(output1->view(), output2->view(), true); +} + CUDF_TEST_PROGRAM_MAIN() From d32552d7dcb3418164f97acb4620c66ee2cdaf3b Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 21 Jul 2020 08:22:42 +0000 Subject: [PATCH 03/17] Modify CHANGELOG and fix copywrite headers --- CHANGELOG.md | 1 + cpp/include/cudf/detail/hashing.hpp | 2 +- cpp/include/cudf/hashing.hpp | 2 +- cpp/src/hash/hashing.cu | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26b4ae58475..cdf28851163 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ - PR #5536 Parquet reader - add support for multiple sources - PR #5607 Add Java bindings for duration types - PR #5612 Add `is_hex` strings API +- PR #5438 Add MD5 hash support ## Improvements diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 485eb509175..e3064887d23 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index c7209f070d4..7d5227ef727 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index b0acbc1aaa4..5401a797ad3 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From f4ad66ee055d07c9f2b04d5fd9a9dcc7d6b6f818 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 21 Jul 2020 23:34:31 +0000 Subject: [PATCH 04/17] style fixes --- cpp/include/cudf/detail/hashing.hpp | 26 +- .../cudf/detail/utilities/hash_functions.cuh | 318 +++++++++--------- cpp/include/cudf/hashing.hpp | 2 +- cpp/include/cudf/types.hpp | 6 +- cpp/src/hash/hash_constants.cu | 28 +- cpp/src/hash/hash_constants.hpp | 6 +- cpp/src/hash/hashing.cu | 103 +++--- cpp/tests/hashing/hash_test.cpp | 66 ++-- 8 files changed, 292 insertions(+), 263 deletions(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index e3064887d23..80d2f7de52f 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -37,24 +37,26 @@ std::pair, std::vector> hash_partition( * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr hash(table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, + hash_id hash_function = hash_id::HASH_MURMUR3, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), cudaStream_t stream = 0); -std::unique_ptr identity_hash(table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), - cudaStream_t stream = 0); - -std::unique_ptr murmur_hash3_32(table_view const& input, - std::vector const& initial_hash = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), - cudaStream_t stream = 0); +std::unique_ptr identity_hash( + table_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); -std::unique_ptr md5_hash(table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), - cudaStream_t stream = 0); +std::unique_ptr murmur_hash3_32( + table_view const& input, + std::vector const& initial_hash = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); +std::unique_ptr md5_hash( + table_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), + cudaStream_t stream = 0); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 9d145fd20c8..6d7bdeabd21 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -30,204 +30,204 @@ using hash_value_type = uint32_t; namespace cudf { namespace detail { - /** - * @brief Helper function, left rotate bit value the value n bits - */ - CUDA_HOST_DEVICE_CALLABLE uint32_t left_rotate(uint32_t value, uint32_t shift) - { - return (value << shift) | (value >> (32-shift)); - } +/** + * @brief Helper function, left rotate bit value the value n bits + */ +CUDA_HOST_DEVICE_CALLABLE uint32_t left_rotate(uint32_t value, uint32_t shift) +{ + return (value << shift) | (value >> (32 - shift)); +} - /** - * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk, - * updating the hash value so far. Does not zero out the buffer contents. - */ - void CUDA_HOST_DEVICE_CALLABLE - md5_hash_step(md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) - { - uint32_t A = hash_state->hash_value[0]; - uint32_t B = hash_state->hash_value[1]; - uint32_t C = hash_state->hash_value[2]; - uint32_t D = hash_state->hash_value[3]; - - uint32_t* buffer_ints = (uint32_t*)hash_state->buffer; - - for(unsigned int j = 0; j < 64; j++) { - uint32_t F, g; - switch(j / 16) { - case 0 : - F = (B & C) | ((~B)&D); // D ^ (B & (C ^ D)) - g = j; +/** + * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk, + * updating the hash value so far. Does not zero out the buffer contents. + */ +void CUDA_HOST_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) +{ + uint32_t A = hash_state->hash_value[0]; + uint32_t B = hash_state->hash_value[1]; + uint32_t C = hash_state->hash_value[2]; + uint32_t D = hash_state->hash_value[3]; + + uint32_t* buffer_ints = (uint32_t*)hash_state->buffer; + + for (unsigned int j = 0; j < 64; j++) { + uint32_t F, g; + switch (j / 16) { + case 0: + F = (B & C) | ((~B) & D); // D ^ (B & (C ^ D)) + g = j; break; - case 1 : - F = (D & B) | ((~D)&C); - g = (5 * j + 1) % 16; + case 1: + F = (D & B) | ((~D) & C); + g = (5 * j + 1) % 16; break; - case 2 : - F = B ^ C ^ D; - g = (3 * j + 5) % 16; + case 2: + F = B ^ C ^ D; + g = (3 * j + 5) % 16; break; - case 3 : - F = C ^ (B | (~D)); - g = (7 * j) % 16; + case 3: + F = C ^ (B | (~D)); + g = (7 * j) % 16; break; - } - - F = F + A + hash_constants[j] + buffer_ints[g]; - - A = D; - D = C; - C = B; - B = B + left_rotate(F, shift_constants[((j/16) * 4) + (j%4)]); } - - hash_state->hash_value[0] += A; - hash_state->hash_value[1] += B; - hash_state->hash_value[2] += C; - hash_state->hash_value[3] += D; - - hash_state->buffer_length = 0; + + F = F + A + hash_constants[j] + buffer_ints[g]; + + A = D; + D = C; + C = B; + B = B + left_rotate(F, shift_constants[((j / 16) * 4) + (j % 4)]); } - - template - struct MD5Hash { - using argument_type = Key; - - /** - * @brief Core MD5 element processing function - */ - template - void CUDA_HOST_DEVICE_CALLABLE - process(TKey const& key, - const uint32_t len, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const - { - uint8_t* data = (uint8_t*)&key; - hash_state->message_length += len; - - if(hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length); - hash_state->buffer_length += len; - } else { - uint32_t copylen = 64 - hash_state->buffer_length; - - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length); - md5_hash_step(hash_state, hash_constants, shift_constants); - - while(len > 64 + copylen) { - thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer); - md5_hash_step(hash_state, hash_constants, shift_constants); - copylen += 64; - } - - thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer); - hash_state->buffer_length = len-copylen; - } - } - template ()>* = nullptr> - void CUDA_HOST_DEVICE_CALLABLE - operator()(T const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const - { - process(key, size_of(key), hash_state, hash_constants, shift_constants); - } + hash_state->hash_value[0] += A; + hash_state->hash_value[1] += B; + hash_state->hash_value[2] += C; + hash_state->hash_value[3] += D; - template ()>* = nullptr> - void CUDA_HOST_DEVICE_CALLABLE - operator()(T const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const - { - CUDF_FAIL("Unsupported hash type"); - } + hash_state->buffer_length = 0; +} - void CUDA_HOST_DEVICE_CALLABLE - operator()(Key const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const {} - }; +template +struct MD5Hash { + using argument_type = Key; /** - * @brief Specialization of MD5Hash operator for strings. + * @brief Core MD5 element processing function */ - template <> - void CUDA_HOST_DEVICE_CALLABLE - MD5Hash::operator()(cudf::string_view const& key, + template + void CUDA_HOST_DEVICE_CALLABLE process(TKey const& key, + const uint32_t len, md5_intermediate_data* hash_state, const md5_hash_constants_type* hash_constants, const md5_shift_constants_type* shift_constants) const { - const uint32_t len = (uint32_t)key.size_bytes(); - const uint8_t* data = (const uint8_t*)key.data(); - + uint8_t* data = (uint8_t*)&key; hash_state->message_length += len; - - if(hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer+hash_state->buffer_length); + + if (hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); hash_state->buffer_length += len; } else { uint32_t copylen = 64 - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer+hash_state->buffer_length); + + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); md5_hash_step(hash_state, hash_constants, shift_constants); - while(len > 64 + copylen) { - thrust::copy_n(thrust::seq, data+copylen, 64, hash_state->buffer); + while (len > 64 + copylen) { + thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); md5_hash_step(hash_state, hash_constants, shift_constants); copylen += 64; } - - thrust::copy_n(thrust::seq, data+copylen, len-copylen, hash_state->buffer); - hash_state->buffer_length = len-copylen; + + thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + hash_state->buffer_length = len - copylen; } } - /** - * @brief Finalize MD5 hash including converstion to hex string. - */ - void CUDA_HOST_DEVICE_CALLABLE - finalize_md5_hash(md5_intermediate_data* hash_state, - char* result_location, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants, - const hex_to_char_mapping_type* hex_char_map) + template ()>* = nullptr> + void CUDA_HOST_DEVICE_CALLABLE operator()(T const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + process(key, size_of(key), hash_state, hash_constants, shift_constants); + } + + template ()>* = nullptr> + void CUDA_HOST_DEVICE_CALLABLE operator()(T const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const { - uint64_t full_length = (uint64_t)hash_state->message_length; - full_length = full_length << 3; - thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length, 1, 0x80); + CUDF_FAIL("Unsupported hash type"); + } - if(hash_state->buffer_length <= 55) { - thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1, - (55 - hash_state->buffer_length), 0x00); - } else { - thrust::fill_n(thrust::seq, hash_state->buffer+hash_state->buffer_length+1, - (64 - hash_state->buffer_length), 0x00); + void CUDA_HOST_DEVICE_CALLABLE operator()(Key const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const + { + } +}; + +/** + * @brief Specialization of MD5Hash operator for strings. + */ +template <> +void CUDA_HOST_DEVICE_CALLABLE +MD5Hash::operator()(cudf::string_view const& key, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) const +{ + const uint32_t len = (uint32_t)key.size_bytes(); + const uint8_t* data = (const uint8_t*)key.data(); + + hash_state->message_length += len; + + if (hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + md5_hash_step(hash_state, hash_constants, shift_constants); + + while (len > 64 + copylen) { + thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); md5_hash_step(hash_state, hash_constants, shift_constants); - - thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + copylen += 64; } - thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer+56); + thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + hash_state->buffer_length = len - copylen; + } +} + +/** + * @brief Finalize MD5 hash including converstion to hex string. + */ +void CUDA_HOST_DEVICE_CALLABLE finalize_md5_hash(md5_intermediate_data* hash_state, + char* result_location, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants, + const hex_to_char_mapping_type* hex_char_map) +{ + uint64_t full_length = (uint64_t)hash_state->message_length; + full_length = full_length << 3; + thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); + + if (hash_state->buffer_length <= 55) { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (55 - hash_state->buffer_length), + 0x00); + } else { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (64 - hash_state->buffer_length), + 0x00); md5_hash_step(hash_state, hash_constants, shift_constants); - u_char final_hash[32]; - uint8_t* hash_result = (uint8_t*)hash_state->hash_value; - for(int i = 0; i < 16; i++) { - final_hash[i*2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; - final_hash[i*2+1] = hex_char_map[hash_result[i] & 0xf]; - } + thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + } - thrust::copy_n(thrust::seq, final_hash, 32, result_location); + thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer + 56); + md5_hash_step(hash_state, hash_constants, shift_constants); + + u_char final_hash[32]; + uint8_t* hash_result = (uint8_t*)hash_state->hash_value; + for (int i = 0; i < 16; i++) { + final_hash[i * 2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; + final_hash[i * 2 + 1] = hex_char_map[hash_result[i] & 0xf]; } + thrust::copy_n(thrust::seq, final_hash, 32, result_location); +} + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 7d5227ef727..d897609a1a1 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -35,7 +35,7 @@ namespace cudf { * @returns A column where each row is the hash of a column from the input */ std::unique_ptr hash(table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, + hash_id hash_function = hash_id::HASH_MURMUR3, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 13b5f39a28b..24f29ddea71 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -273,9 +273,9 @@ std::size_t size_of(data_type t); * @brief Identifies the hash function to be used */ enum class hash_id { - HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed - HASH_MURMUR3, ///< Murmur3 hash function - HASH_MD5 ///< MD5 hash function + HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed + HASH_MURMUR3, ///< Murmur3 hash function + HASH_MD5 ///< MD5 hash function }; /** @} */ diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu index 648b8b65fe3..bc7e467f820 100644 --- a/cpp/src/hash/hash_constants.cu +++ b/cpp/src/hash/hash_constants.cu @@ -21,10 +21,24 @@ namespace cudf { namespace detail { const hex_to_char_mapping_type g_hex_to_char_mapping[] = { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' -}; + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; const md5_shift_constants_type g_md5_shift_constants[] = { - 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23 , 6, 10, 15, 21, + 7, + 12, + 17, + 22, + 5, + 9, + 14, + 20, + 4, + 11, + 16, + 23, + 6, + 10, + 15, + 21, }; const md5_hash_constants_type g_md5_hash_constants[] = { @@ -43,7 +57,7 @@ std::mutex g_md5_hash_constants_mutex; std::mutex g_md5_shift_constants_mutex; hex_to_char_mapping_type* d_hex_to_char_mapping = nullptr; -md5_hash_constants_type* d_md5_hash_constants = nullptr; +md5_hash_constants_type* d_md5_hash_constants = nullptr; md5_shift_constants_type* d_md5_shift_constants = nullptr; __device__ hex_to_char_mapping_type hex_to_char_mapping[sizeof(g_hex_to_char_mapping)]; @@ -58,7 +72,7 @@ const hex_to_char_mapping_type* get_hex_to_char_mapping() std::lock_guard guard(g_hex_to_char_mapping_mutex); if (!d_hex_to_char_mapping) { CUDA_TRY(cudaMemcpyToSymbol( - hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping))); + hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping))); CUDA_TRY(cudaGetSymbolAddress((void**)&d_hex_to_char_mapping, hex_to_char_mapping)); } return d_hex_to_char_mapping; @@ -71,8 +85,8 @@ const md5_hash_constants_type* get_md5_hash_constants() { std::lock_guard guard(g_md5_hash_constants_mutex); if (!d_md5_hash_constants) { - CUDA_TRY(cudaMemcpyToSymbol( - md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants))); + CUDA_TRY( + cudaMemcpyToSymbol(md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants))); CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_hash_constants, md5_hash_constants)); } return d_md5_hash_constants; diff --git a/cpp/src/hash/hash_constants.hpp b/cpp/src/hash/hash_constants.hpp index f6d9b195e3f..9063a60d283 100644 --- a/cpp/src/hash/hash_constants.hpp +++ b/cpp/src/hash/hash_constants.hpp @@ -18,10 +18,10 @@ namespace cudf { namespace detail { -struct md5_intermediate_data{ +struct md5_intermediate_data { uint64_t message_length = 0; - uint32_t buffer_length = 0; - uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; + uint32_t buffer_length = 0; + uint32_t hash_value[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476}; uint8_t buffer[64]; }; diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 10341ede0e1..9db5dcd5a9d 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include +#include #include #include #include @@ -642,13 +642,10 @@ std::unique_ptr hash(table_view const& input, rmm::mr::device_memory_resource* mr, cudaStream_t stream) { - switch(hash_function) { - case(hash_id::HASH_MURMUR3) : - return murmur_hash3_32(input, initial_hash, mr, stream); - case(hash_id::HASH_MD5) : - return md5_hash(input, mr, stream); - default : - return NULL; + switch (hash_function) { + case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, initial_hash, mr, stream); + case (hash_id::HASH_MD5): return md5_hash(input, mr, stream); + default: return NULL; } } @@ -657,16 +654,16 @@ std::unique_ptr hash(table_view const& input, * * @tparam has_nulls Indicates the potential for null values in the column. **/ - template - class md5_element_hasher { - public: - template - __device__ inline void operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) - { +template +class md5_element_hasher { + public: + template + __device__ inline void operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state, + const md5_hash_constants_type* hash_constants, + const md5_shift_constants_type* shift_constants) + { if (!has_nulls || col.is_valid(row_index)) { MD5Hash{}(col.element(row_index), hash_state, hash_constants, shift_constants); } @@ -674,10 +671,10 @@ std::unique_ptr hash(table_view const& input, }; std::unique_ptr md5_hash(table_view const& input, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) { - if(input.num_columns() == 0 || input.num_rows() == 0) { + if (input.num_columns() == 0 || input.num_rows() == 0) { const string_scalar string_128bit("d41d8cd98f00b204e9orig98ecf8427e"); auto output = make_column_from_scalar(string_128bit, input.num_rows(), mr, stream); return output; @@ -691,59 +688,69 @@ std::unique_ptr md5_hash(table_view const& input, auto offsets_view = offsets_column->view(); auto d_new_offsets = offsets_view.data(); - auto chars_column = - strings::detail::create_chars_child_column(input.num_rows(), 0, input.num_rows()*32, mr, stream); + auto chars_column = strings::detail::create_chars_child_column( + input.num_rows(), 0, input.num_rows() * 32, mr, stream); auto chars_view = chars_column->mutable_view(); auto d_chars = chars_view.data(); rmm::device_buffer null_mask{0, stream, mr}; - bool const nullable = has_nulls(input); + bool const nullable = has_nulls(input); auto const device_input = table_device_view::create(input, stream); // Fetch hash constants const md5_shift_constants_type* shift_constants = get_md5_shift_constants(); - const md5_hash_constants_type* hash_constants = get_md5_hash_constants(); - const hex_to_char_mapping_type* hex_char_map = get_hex_to_char_mapping(); + const md5_hash_constants_type* hash_constants = get_md5_hash_constants(); + const hex_to_char_mapping_type* hex_char_map = get_hex_to_char_mapping(); // Hash each row, hashing each element sequentially left to right thrust::for_each( rmm::exec_policy(stream)->on(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), - [d_chars, device_input = *device_input, hash_constants = hash_constants, shift_constants = shift_constants, hex_char_map = hex_char_map, has_nulls = nullable] __device__ (auto row_index) { + [d_chars, + device_input = *device_input, + hash_constants = hash_constants, + shift_constants = shift_constants, + hex_char_map = hex_char_map, + has_nulls = nullable] __device__(auto row_index) { md5_intermediate_data hash_state; - for(int col_index = 0; col_index < device_input.num_columns(); col_index++) { + for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { if (!has_nulls) { - cudf::type_dispatcher( - device_input.column(col_index).type(), - md5_element_hasher{}, - device_input.column(col_index), - row_index, &hash_state, hash_constants, shift_constants); + cudf::type_dispatcher(device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, + &hash_state, + hash_constants, + shift_constants); } else { - cudf::type_dispatcher( - device_input.column(col_index).type(), - md5_element_hasher{}, - device_input.column(col_index), - row_index, &hash_state, hash_constants, shift_constants); + cudf::type_dispatcher(device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, + &hash_state, + hash_constants, + shift_constants); } } - finalize_md5_hash(&hash_state, d_chars + (row_index*32), hash_constants, shift_constants, hex_char_map); + finalize_md5_hash( + &hash_state, d_chars + (row_index * 32), hash_constants, shift_constants, hex_char_map); }); return make_strings_column(input.num_rows(), - std::move(offsets_column), - std::move(chars_column), - 0, - std::move(null_mask), - stream, - mr); + std::move(offsets_column), + std::move(chars_column), + 0, + std::move(null_mask), + stream, + mr); } std::unique_ptr murmur_hash3_32(table_view const& input, - std::vector const& initial_hash, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) + std::vector const& initial_hash, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) { // TODO this should be UINT32 auto output = make_numeric_column( diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index 37ffdbbd6bb..a7c2c6ff5d0 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -181,23 +181,25 @@ class MD5HashTest : public cudf::test::BaseFixture { TEST_F(MD5HashTest, MultiValue) { - strings_column_wrapper const strings_col({"", - "A 60 character string to test MD5's message padding algorithm", - "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the MD5 hash function. This string needed to be longer.", - "All work and no play makes Jack a dull boy", - "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}); + strings_column_wrapper const strings_col( + {"", + "A 60 character string to test MD5's message padding algorithm", + "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the " + "MD5 hash function. This string needed to be longer.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}); strings_column_wrapper const md5_string_results1({"d41d8cd98f00b204e9800998ecf8427e", - "682240021651ae166d08fe2a014d5c09", - "3669d5225fddbb34676312ca3b78bbd9", - "c61a4185135eda043f35e92c3505e180", - "52da74c75cb6575d25be29e66bd0adde"}); + "682240021651ae166d08fe2a014d5c09", + "3669d5225fddbb34676312ca3b78bbd9", + "c61a4185135eda043f35e92c3505e180", + "52da74c75cb6575d25be29e66bd0adde"}); strings_column_wrapper const md5_string_results2({"d41d8cd98f00b204e9800998ecf8427e", - "e5a5682e82278e78dbaad9a689df7a73", - "4121ab1bb6e84172fd94822645862ae9", - "28970886501efe20164213855afe5850", - "6bc1b872103cc6a02d882245b8516e2e"}); + "e5a5682e82278e78dbaad9a689df7a73", + "4121ab1bb6e84172fd94822645862ae9", + "28970886501efe20164213855afe5850", + "6bc1b872103cc6a02d882245b8516e2e"}); using limits = std::numeric_limits; fixed_width_column_wrapper const ints_col({0, 100, -100, limits::min(), limits::max()}); @@ -213,8 +215,8 @@ TEST_F(MD5HashTest, MultiValue) ts::duration::min(), ts::duration::max()}); - auto const string_input1 = cudf::table_view({strings_col}); - auto const string_input2 = cudf::table_view({strings_col, strings_col}); + auto const string_input1 = cudf::table_view({strings_col}); + auto const string_input2 = cudf::table_view({strings_col, strings_col}); auto const md5_string_output1 = cudf::hash(string_input1, cudf::hash_id::HASH_MD5); auto const md5_string_output2 = cudf::hash(string_input2, cudf::hash_id::HASH_MD5); EXPECT_EQ(string_input1.num_rows(), md5_string_output1->size()); @@ -222,8 +224,8 @@ TEST_F(MD5HashTest, MultiValue) expect_columns_equal(md5_string_output1->view(), md5_string_results1); expect_columns_equal(md5_string_output2->view(), md5_string_results2); - auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col}); - auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col}); + auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col}); + auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col}); auto const md5_output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); auto const md5_output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); EXPECT_EQ(input1.num_rows(), md5_output1->size()); @@ -233,18 +235,21 @@ TEST_F(MD5HashTest, MultiValue) TEST_F(MD5HashTest, MultiValueNulls) { // Nulls with different values should be equal - strings_column_wrapper const strings_col1({"", - "Different but null!", - "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the MD5 hash function. This string needed to be longer.", - "All work and no play makes Jack a dull boy", - "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}, - {1, 0, 0, 1, 0}); - strings_column_wrapper const strings_col2({"", - "A 60 character string to test MD5's message padding algorithm", - "Very different... but null", - "All work and no play makes Jack a dull boy", - ""}, - {1, 0, 0, 1, 1}); //empty string is equivalent to null + strings_column_wrapper const strings_col1( + {"", + "Different but null!", + "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the " + "MD5 hash function. This string needed to be longer.", + "All work and no play makes Jack a dull boy", + "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"}, + {1, 0, 0, 1, 0}); + strings_column_wrapper const strings_col2( + {"", + "A 60 character string to test MD5's message padding algorithm", + "Very different... but null", + "All work and no play makes Jack a dull boy", + ""}, + {1, 0, 0, 1, 1}); // empty string is equivalent to null // Nulls with different values should be equal using limits = std::numeric_limits; @@ -335,7 +340,8 @@ TYPED_TEST(MD5HashTestFloatTyped, TestExtremes) T inf = std::numeric_limits::infinity(); fixed_width_column_wrapper const col1({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf}); - fixed_width_column_wrapper const col2({T(-0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf}); + fixed_width_column_wrapper const col2( + {T(-0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf}); auto const input1 = cudf::table_view({col1}); auto const input2 = cudf::table_view({col2}); From fa8c5d1496890cf21eb82dab96c4cf319efaf4c2 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 29 Jul 2020 10:10:36 +0000 Subject: [PATCH 05/17] PR fixes and restructuring of md5 hash operator function --- cpp/include/cudf/detail/hashing.hpp | 5 - .../cudf/detail/utilities/hash_functions.cuh | 183 ++++++++---------- cpp/src/hash/hash_constants.cu | 51 +++-- cpp/src/hash/hash_constants.cuh | 2 +- cpp/src/hash/hashing.cu | 53 ++++- cpp/tests/hashing/hash_test.cpp | 32 +-- 6 files changed, 155 insertions(+), 171 deletions(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 80d2f7de52f..359e6cc93bf 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -42,11 +42,6 @@ std::unique_ptr hash(table_view const& input, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), cudaStream_t stream = 0); -std::unique_ptr identity_hash( - table_view const& input, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(), - cudaStream_t stream = 0); - std::unique_ptr murmur_hash3_32( table_view const& input, std::vector const& initial_hash = {}, diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 6d7bdeabd21..78df77790ad 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -30,28 +30,20 @@ using hash_value_type = uint32_t; namespace cudf { namespace detail { -/** - * @brief Helper function, left rotate bit value the value n bits - */ -CUDA_HOST_DEVICE_CALLABLE uint32_t left_rotate(uint32_t value, uint32_t shift) -{ - return (value << shift) | (value >> (32 - shift)); -} - /** * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. */ -void CUDA_HOST_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) +void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) { uint32_t A = hash_state->hash_value[0]; uint32_t B = hash_state->hash_value[1]; uint32_t C = hash_state->hash_value[2]; uint32_t D = hash_state->hash_value[3]; - uint32_t* buffer_ints = (uint32_t*)hash_state->buffer; + uint32_t* buffer_ints = reinterpret_cast(hash_state->buffer); for (unsigned int j = 0; j < 64; j++) { uint32_t F, g; @@ -75,11 +67,12 @@ void CUDA_HOST_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, } F = F + A + hash_constants[j] + buffer_ints[g]; - A = D; D = C; C = B; - B = B + left_rotate(F, shift_constants[((j / 16) * 4) + (j % 4)]); + + uint32_t shift = shift_constants[((j / 16) * 4) + (j % 4)]; + B = B + ((F << shift) | (F >> (32 - shift))); } hash_state->hash_value[0] += A; @@ -98,13 +91,13 @@ struct MD5Hash { * @brief Core MD5 element processing function */ template - void CUDA_HOST_DEVICE_CALLABLE process(TKey const& key, - const uint32_t len, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const + void __device__ process(TKey const& key, + uint32_t const len, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const { - uint8_t* data = (uint8_t*)&key; + uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; if (hash_state->buffer_length + len < 64) { @@ -127,106 +120,84 @@ struct MD5Hash { } } - template ()>* = nullptr> - void CUDA_HOST_DEVICE_CALLABLE operator()(T const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const - { - process(key, size_of(key), hash_state, hash_constants, shift_constants); - } - - template ()>* = nullptr> - void CUDA_HOST_DEVICE_CALLABLE operator()(T const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const + template ::value>* = nullptr> + void __device__ operator()(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const { - CUDF_FAIL("Unsupported hash type"); + if (isnan(key)) { + T nan = std::numeric_limits::quiet_NaN(); + process(nan, sizeof(T), hash_state, hash_constants, shift_constants); + } else if (key == T{0.0}) { + process(T{0.0}, sizeof(T), hash_state, hash_constants, shift_constants); + } else { + process(key, sizeof(T), hash_state, hash_constants, shift_constants); + } } - void CUDA_HOST_DEVICE_CALLABLE operator()(Key const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const + template ::value>* = nullptr> + void __device__ operator()(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const { - } -}; + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); -/** - * @brief Specialization of MD5Hash operator for strings. - */ -template <> -void CUDA_HOST_DEVICE_CALLABLE -MD5Hash::operator()(cudf::string_view const& key, - md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) const -{ - const uint32_t len = (uint32_t)key.size_bytes(); - const uint8_t* data = (const uint8_t*)key.data(); + hash_state->message_length += len; - hash_state->message_length += len; + if (hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + md5_hash_step(hash_state, hash_constants, shift_constants); - if (hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); - hash_state->buffer_length += len; - } else { - uint32_t copylen = 64 - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); - md5_hash_step(hash_state, hash_constants, shift_constants); + while (len > 64 + copylen) { + thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); + md5_hash_step(hash_state, hash_constants, shift_constants); + copylen += 64; + } - while (len > 64 + copylen) { - thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); - md5_hash_step(hash_state, hash_constants, shift_constants); - copylen += 64; + thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + hash_state->buffer_length = len - copylen; } - - thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); - hash_state->buffer_length = len - copylen; } -} -/** - * @brief Finalize MD5 hash including converstion to hex string. - */ -void CUDA_HOST_DEVICE_CALLABLE finalize_md5_hash(md5_intermediate_data* hash_state, - char* result_location, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants, - const hex_to_char_mapping_type* hex_char_map) -{ - uint64_t full_length = (uint64_t)hash_state->message_length; - full_length = full_length << 3; - thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - - if (hash_state->buffer_length <= 55) { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (55 - hash_state->buffer_length), - 0x00); - } else { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (64 - hash_state->buffer_length), - 0x00); - md5_hash_step(hash_state, hash_constants, shift_constants); - - thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + template ()>* = nullptr> + void __device__ operator()(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + release_assert(false && "Unsupported hash type"); } - thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer + 56); - md5_hash_step(hash_state, hash_constants, shift_constants); - - u_char final_hash[32]; - uint8_t* hash_result = (uint8_t*)hash_state->hash_value; - for (int i = 0; i < 16; i++) { - final_hash[i * 2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; - final_hash[i * 2 + 1] = hex_char_map[hash_result[i] & 0xf]; + template ::value && + !is_fixed_width()>* = nullptr> + void __device__ operator()(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + release_assert(false && "Unsupported hash type"); } - thrust::copy_n(thrust::seq, final_hash, 32, result_location); -} + template ::value && !is_chrono() && + is_numeric()>* = nullptr> + void __device__ operator()(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + process(key, sizeof(T), hash_state, hash_constants, shift_constants); + } +}; } // namespace detail } // namespace cudf @@ -461,4 +432,4 @@ struct IdentityHash { }; template -using default_hash = MurmurHash3_32; \ No newline at end of file +using default_hash = MurmurHash3_32; diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu index bc7e467f820..5c082a31eef 100644 --- a/cpp/src/hash/hash_constants.cu +++ b/cpp/src/hash/hash_constants.cu @@ -12,16 +12,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +// #include #include "./hash_constants.hpp" -#include +#include namespace cudf { namespace detail { const hex_to_char_mapping_type g_hex_to_char_mapping[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + const md5_shift_constants_type g_md5_shift_constants[] = { 7, 12, @@ -52,30 +53,28 @@ const md5_hash_constants_type g_md5_hash_constants[] = { 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, }; -std::mutex g_hex_to_char_mapping_mutex; -std::mutex g_md5_hash_constants_mutex; -std::mutex g_md5_shift_constants_mutex; - -hex_to_char_mapping_type* d_hex_to_char_mapping = nullptr; -md5_hash_constants_type* d_md5_hash_constants = nullptr; -md5_shift_constants_type* d_md5_shift_constants = nullptr; - +namespace { __device__ hex_to_char_mapping_type hex_to_char_mapping[sizeof(g_hex_to_char_mapping)]; __device__ md5_hash_constants_type md5_hash_constants[sizeof(g_md5_hash_constants)]; __device__ md5_shift_constants_type md5_shift_constants[sizeof(g_md5_shift_constants)]; +strings::detail::thread_safe_per_context_cache d_hex_to_char_mapping; +strings::detail::thread_safe_per_context_cache d_md5_hash_constants; +strings::detail::thread_safe_per_context_cache d_md5_shift_constants; +} // namespace + /** * @copydoc cudf::detail::get_hex_to_char_mapping */ const hex_to_char_mapping_type* get_hex_to_char_mapping() { - std::lock_guard guard(g_hex_to_char_mapping_mutex); - if (!d_hex_to_char_mapping) { + return d_hex_to_char_mapping.find_or_initialize([&](void) { + hex_to_char_mapping_type* table = nullptr; CUDA_TRY(cudaMemcpyToSymbol( hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping))); - CUDA_TRY(cudaGetSymbolAddress((void**)&d_hex_to_char_mapping, hex_to_char_mapping)); - } - return d_hex_to_char_mapping; + CUDA_TRY(cudaGetSymbolAddress((void**)&table, hex_to_char_mapping)); + return table; + }); } /** @@ -83,13 +82,13 @@ const hex_to_char_mapping_type* get_hex_to_char_mapping() */ const md5_hash_constants_type* get_md5_hash_constants() { - std::lock_guard guard(g_md5_hash_constants_mutex); - if (!d_md5_hash_constants) { + return d_md5_hash_constants.find_or_initialize([&](void) { + md5_hash_constants_type* table = nullptr; CUDA_TRY( cudaMemcpyToSymbol(md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants))); - CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_hash_constants, md5_hash_constants)); - } - return d_md5_hash_constants; + CUDA_TRY(cudaGetSymbolAddress((void**)&table, md5_hash_constants)); + return table; + }); } /** @@ -97,14 +96,14 @@ const md5_hash_constants_type* get_md5_hash_constants() */ const md5_shift_constants_type* get_md5_shift_constants() { - std::lock_guard guard(g_md5_shift_constants_mutex); - if (!d_md5_shift_constants) { + return d_md5_shift_constants.find_or_initialize([&](void) { + md5_shift_constants_type* table = nullptr; CUDA_TRY(cudaMemcpyToSymbol( md5_shift_constants, g_md5_shift_constants, sizeof(g_md5_shift_constants))); - CUDA_TRY(cudaGetSymbolAddress((void**)&d_md5_shift_constants, md5_shift_constants)); - } - return d_md5_shift_constants; + CUDA_TRY(cudaGetSymbolAddress((void**)&table, md5_shift_constants)); + return table; + }); } } // namespace detail -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/src/hash/hash_constants.cuh b/cpp/src/hash/hash_constants.cuh index f7cec3f0652..dbbcfda6962 100644 --- a/cpp/src/hash/hash_constants.cuh +++ b/cpp/src/hash/hash_constants.cuh @@ -19,4 +19,4 @@ namespace cudf { namespace detail { } // namespace detail -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 9db5dcd5a9d..358688dc5de 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -26,6 +26,7 @@ #include #include #include +// #include #include #include @@ -610,6 +611,46 @@ std::pair, std::vector> hash_partition_table( } } +/** + * @brief Finalize MD5 hash including converstion to hex string. + */ +void __device__ finalize_md5_hash(detail::md5_intermediate_data* hash_state, + char* result_location, + const detail::md5_hash_constants_type* hash_constants, + const detail::md5_shift_constants_type* shift_constants, + const detail::hex_to_char_mapping_type* hex_char_map) +{ + auto const full_length = (static_cast(hash_state->message_length)) << 3; + thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); + + if (hash_state->buffer_length < 56) { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (55 - hash_state->buffer_length), + 0x00); + } else { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (64 - hash_state->buffer_length), + 0x00); + detail::md5_hash_step(hash_state, hash_constants, shift_constants); + + thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + } + + thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer + 56); + detail::md5_hash_step(hash_state, hash_constants, shift_constants); + + u_char final_hash[32]; + uint8_t* hash_result = reinterpret_cast(hash_state->hash_value); + for (int i = 0; i < 16; i++) { + final_hash[i * 2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; + final_hash[i * 2 + 1] = hex_char_map[hash_result[i] & 0xf]; + } + + thrust::copy_n(thrust::seq, final_hash, 32, result_location); +} + } // namespace namespace detail { @@ -661,8 +702,8 @@ class md5_element_hasher { __device__ inline void operator()(column_device_view col, size_type row_index, md5_intermediate_data* hash_state, - const md5_hash_constants_type* hash_constants, - const md5_shift_constants_type* shift_constants) + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) { if (!has_nulls || col.is_valid(row_index)) { MD5Hash{}(col.element(row_index), hash_state, hash_constants, shift_constants); @@ -699,9 +740,9 @@ std::unique_ptr md5_hash(table_view const& input, auto const device_input = table_device_view::create(input, stream); // Fetch hash constants - const md5_shift_constants_type* shift_constants = get_md5_shift_constants(); - const md5_hash_constants_type* hash_constants = get_md5_hash_constants(); - const hex_to_char_mapping_type* hex_char_map = get_hex_to_char_mapping(); + md5_shift_constants_type const* shift_constants = get_md5_shift_constants(); + md5_hash_constants_type const* hash_constants = get_md5_hash_constants(); + hex_to_char_mapping_type const* hex_char_map = get_hex_to_char_mapping(); // Hash each row, hashing each element sequentially left to right thrust::for_each( @@ -716,7 +757,7 @@ std::unique_ptr md5_hash(table_view const& input, has_nulls = nullable] __device__(auto row_index) { md5_intermediate_data hash_state; for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (!has_nulls) { + if (has_nulls) { cudf::type_dispatcher(device_input.column(col_index).type(), md5_element_hasher{}, device_input.column(col_index), diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index a7c2c6ff5d0..28cf15d82b7 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -208,13 +208,6 @@ TEST_F(MD5HashTest, MultiValue) fixed_width_column_wrapper const bools_col1({0, 1, 1, 1, 0}); fixed_width_column_wrapper const bools_col2({0, 1, 2, 255, 0}); - using ts = cudf::timestamp_s; - fixed_width_column_wrapper const secs_col({ts::duration::zero(), - static_cast(100), - static_cast(-100), - ts::duration::min(), - ts::duration::max()}); - auto const string_input1 = cudf::table_view({strings_col}); auto const string_input2 = cudf::table_view({strings_col, strings_col}); auto const md5_string_output1 = cudf::hash(string_input1, cudf::hash_id::HASH_MD5); @@ -224,8 +217,8 @@ TEST_F(MD5HashTest, MultiValue) expect_columns_equal(md5_string_output1->view(), md5_string_results1); expect_columns_equal(md5_string_output2->view(), md5_string_results2); - auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col}); - auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col}); + auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1}); + auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2}); auto const md5_output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); auto const md5_output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); EXPECT_EQ(input1.num_rows(), md5_output1->size()); @@ -263,29 +256,14 @@ TEST_F(MD5HashTest, MultiValueNulls) fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1}); - // Nulls with different values should be equal - using ts = cudf::timestamp_s; - fixed_width_column_wrapper const secs_col1({ts::duration::zero(), - static_cast(100), - static_cast(-100), - ts::duration::min(), - ts::duration::max()}, - {1, 0, 0, 1, 1}); - fixed_width_column_wrapper const secs_col2({ts::duration::zero(), - static_cast(-200), - static_cast(200), - ts::duration::min(), - ts::duration::max()}, - {1, 0, 0, 1, 1}); - - auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1}); - auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2}); + auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1}); + auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2}); auto const output1 = cudf::hash(input1, cudf::hash_id::HASH_MD5); auto const output2 = cudf::hash(input2, cudf::hash_id::HASH_MD5); EXPECT_EQ(input1.num_rows(), output1->size()); - expect_columns_equal(output1->view(), output2->view(), true); + expect_columns_equal(output1->view(), output2->view()); } template From 535ff93433c2528503e007a5420ad96f4104d676 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 30 Jul 2020 08:29:43 +0000 Subject: [PATCH 06/17] Review fixes --- .../cudf/detail/utilities/hash_functions.cuh | 143 ++++++++++-------- cpp/src/hash/hash_constants.cuh | 22 --- cpp/src/hash/hashing.cu | 11 +- cpp/tests/hashing/hash_test.cpp | 2 +- 4 files changed, 91 insertions(+), 87 deletions(-) delete mode 100644 cpp/src/hash/hash_constants.cuh diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 78df77790ad..707adc662e9 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -43,13 +43,12 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, uint32_t C = hash_state->hash_value[2]; uint32_t D = hash_state->hash_value[3]; - uint32_t* buffer_ints = reinterpret_cast(hash_state->buffer); - for (unsigned int j = 0; j < 64; j++) { - uint32_t F, g; + uint32_t F; + uint32_t g; switch (j / 16) { case 0: - F = (B & C) | ((~B) & D); // D ^ (B & (C ^ D)) + F = (B & C) | ((~B) & D); g = j; break; case 1: @@ -66,7 +65,12 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, break; } - F = F + A + hash_constants[j] + buffer_ints[g]; + uint32_t buffer_element_as_int; + thrust::copy_n(thrust::seq, + hash_state->buffer + g * 4, + 4, + reinterpret_cast(&buffer_element_as_int)); + F = F + A + hash_constants[j] + buffer_element_as_int; A = D; D = C; C = B; @@ -92,11 +96,11 @@ struct MD5Hash { */ template void __device__ process(TKey const& key, - uint32_t const len, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const { + uint32_t const len = sizeof(TKey); uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; @@ -120,84 +124,101 @@ struct MD5Hash { } } - template ::value>* = nullptr> + template ()>* = nullptr> void __device__ operator()(T const& key, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const { - if (isnan(key)) { - T nan = std::numeric_limits::quiet_NaN(); - process(nan, sizeof(T), hash_state, hash_constants, shift_constants); - } else if (key == T{0.0}) { - process(T{0.0}, sizeof(T), hash_state, hash_constants, shift_constants); - } else { - process(key, sizeof(T), hash_state, hash_constants, shift_constants); - } + release_assert(false && "MD5 Unsupported chrono type column"); } template ::value>* = nullptr> + typename std::enable_if_t::value && + !is_fixed_width()>* = nullptr> void __device__ operator()(T const& key, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const { - uint32_t const len = static_cast(key.size_bytes()); - uint8_t const* data = reinterpret_cast(key.data()); + release_assert(false && "MD5 Unsupported non-fixed-width type column"); + } - hash_state->message_length += len; + void CUDA_DEVICE_CALLABLE operator()(argument_type const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + process(key, hash_state, hash_constants, shift_constants); + } - if (hash_state->buffer_length + len < 64) { - thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); - hash_state->buffer_length += len; + template ::value>* = nullptr> + void __device__ process_floating_point(T const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + if (isnan(key)) { + T nan = std::numeric_limits::quiet_NaN(); + process(nan, hash_state, hash_constants, shift_constants); + } else if (key == T{0.0}) { + process(T{0.0}, hash_state, hash_constants, shift_constants); } else { - uint32_t copylen = 64 - hash_state->buffer_length; - thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); - md5_hash_step(hash_state, hash_constants, shift_constants); + process(key, hash_state, hash_constants, shift_constants); + } + } +}; - while (len > 64 + copylen) { - thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); - md5_hash_step(hash_state, hash_constants, shift_constants); - copylen += 64; - } +template <> +void CUDA_DEVICE_CALLABLE +MD5Hash::operator()(string_view const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const +{ + uint32_t const len = static_cast(key.size_bytes()); + uint8_t const* data = reinterpret_cast(key.data()); - thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); - hash_state->buffer_length = len - copylen; + hash_state->message_length += len; + + if (hash_state->buffer_length + len < 64) { + thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); + hash_state->buffer_length += len; + } else { + uint32_t copylen = 64 - hash_state->buffer_length; + thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); + md5_hash_step(hash_state, hash_constants, shift_constants); + + while (len > 64 + copylen) { + thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); + md5_hash_step(hash_state, hash_constants, shift_constants); + copylen += 64; } - } - template ()>* = nullptr> - void __device__ operator()(T const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const - { - release_assert(false && "Unsupported hash type"); + thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); + hash_state->buffer_length = len - copylen; } +} - template ::value && - !is_fixed_width()>* = nullptr> - void __device__ operator()(T const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const - { - release_assert(false && "Unsupported hash type"); - } +template <> +void CUDA_DEVICE_CALLABLE +MD5Hash::operator()(float const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const +{ + this->process_floating_point(key, hash_state, hash_constants, shift_constants); +} - template ::value && !is_chrono() && - is_numeric()>* = nullptr> - void __device__ operator()(T const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const - { - process(key, sizeof(T), hash_state, hash_constants, shift_constants); - } -}; +template <> +void CUDA_DEVICE_CALLABLE +MD5Hash::operator()(double const& key, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const +{ + this->process_floating_point(key, hash_state, hash_constants, shift_constants); +} } // namespace detail } // namespace cudf diff --git a/cpp/src/hash/hash_constants.cuh b/cpp/src/hash/hash_constants.cuh deleted file mode 100644 index dbbcfda6962..00000000000 --- a/cpp/src/hash/hash_constants.cuh +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -namespace cudf { -namespace detail { - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 358688dc5de..04e8a157707 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -638,7 +638,8 @@ void __device__ finalize_md5_hash(detail::md5_intermediate_data* hash_state, thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); } - thrust::copy_n(thrust::seq, (uint8_t*)&full_length, 8, hash_state->buffer + 56); + thrust::copy_n( + thrust::seq, reinterpret_cast(&full_length), 8, hash_state->buffer + 56); detail::md5_hash_step(hash_state, hash_constants, shift_constants); u_char final_hash[32]; @@ -721,9 +722,13 @@ std::unique_ptr md5_hash(table_view const& input, return output; } + std::for_each(input.begin(), input.end(), [](auto col) { + CUDF_EXPECTS(col.type().id() <= type_id::BOOL8 || col.type().id() == type_id::STRING, + "Unsupported column type"); + }); + // Result column allocation and creation - auto transformer = [] __device__(size_type idx) { return 32; }; - auto begin = thrust::make_transform_iterator(thrust::make_counting_iterator(0), transformer); + auto begin = thrust::make_constant_iterator(32); auto offsets_column = cudf::strings::detail::make_offsets_child_column(begin, begin + input.num_rows(), mr, stream); auto offsets_view = offsets_column->view(); diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp index 28cf15d82b7..498ce2e37ba 100644 --- a/cpp/tests/hashing/hash_test.cpp +++ b/cpp/tests/hashing/hash_test.cpp @@ -270,7 +270,7 @@ template class MD5HashTestTyped : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(MD5HashTestTyped, cudf::test::FixedWidthTypes); +TYPED_TEST_CASE(MD5HashTestTyped, cudf::test::NumericTypes); TYPED_TEST(MD5HashTestTyped, Equality) { From 14d76721d8b0d0228006bd6428a2479f6d007b99 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Fri, 31 Jul 2020 08:54:28 +0000 Subject: [PATCH 07/17] Change conversion and other review fixes --- .../cudf/detail/utilities/hash_functions.cuh | 5 +- cpp/src/hash/hash_constants.cu | 22 +--- cpp/src/hash/hash_constants.hpp | 15 --- cpp/src/hash/hashing.cu | 106 ++++++++++-------- 4 files changed, 61 insertions(+), 87 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 707adc662e9..d0962c1ac0e 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -16,14 +16,11 @@ #pragma once -#include #include #include #include "cuda_runtime_api.h" #include "cudf/types.hpp" -#include "driver_types.h" -#include "vector_types.h" using hash_value_type = uint32_t; @@ -31,7 +28,7 @@ namespace cudf { namespace detail { /** - * @brief Core MD5 algorith implementation. Processes a single 512-bit chunk, + * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. */ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu index 5c082a31eef..a329d0be986 100644 --- a/cpp/src/hash/hash_constants.cu +++ b/cpp/src/hash/hash_constants.cu @@ -12,17 +12,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// #include -#include "./hash_constants.hpp" +#include "hash_constants.hpp" #include namespace cudf { namespace detail { -const hex_to_char_mapping_type g_hex_to_char_mapping[] = { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - const md5_shift_constants_type g_md5_shift_constants[] = { 7, 12, @@ -54,29 +50,13 @@ const md5_hash_constants_type g_md5_hash_constants[] = { }; namespace { -__device__ hex_to_char_mapping_type hex_to_char_mapping[sizeof(g_hex_to_char_mapping)]; __device__ md5_hash_constants_type md5_hash_constants[sizeof(g_md5_hash_constants)]; __device__ md5_shift_constants_type md5_shift_constants[sizeof(g_md5_shift_constants)]; -strings::detail::thread_safe_per_context_cache d_hex_to_char_mapping; strings::detail::thread_safe_per_context_cache d_md5_hash_constants; strings::detail::thread_safe_per_context_cache d_md5_shift_constants; } // namespace -/** - * @copydoc cudf::detail::get_hex_to_char_mapping - */ -const hex_to_char_mapping_type* get_hex_to_char_mapping() -{ - return d_hex_to_char_mapping.find_or_initialize([&](void) { - hex_to_char_mapping_type* table = nullptr; - CUDA_TRY(cudaMemcpyToSymbol( - hex_to_char_mapping, g_hex_to_char_mapping, sizeof(g_hex_to_char_mapping))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, hex_to_char_mapping)); - return table; - }); -} - /** * @copydoc cudf::detail::get_md5_hash_constants */ diff --git a/cpp/src/hash/hash_constants.hpp b/cpp/src/hash/hash_constants.hpp index 9063a60d283..c742aa19312 100644 --- a/cpp/src/hash/hash_constants.hpp +++ b/cpp/src/hash/hash_constants.hpp @@ -55,20 +55,5 @@ using md5_hash_constants_type = uint32_t; */ const md5_hash_constants_type* get_md5_hash_constants(); -// Type for the hexidecimal character mapping. -using hex_to_char_mapping_type = char; - -/** - * @brief Returns pointer to device memory that contains the static - * hexidecimal character map. On first call, this will copy the table into - * device memory and is guaranteed to be thread-safe. - * - * This table is used to lookup the corresponding hex character when - * translating the hash into a hexidecimal string. - * - * @return Device memory pointer to the hex to char map. - */ -const hex_to_char_mapping_type* get_hex_to_char_mapping(); - } // namespace detail } // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 04e8a157707..330523d2e4b 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -26,7 +26,6 @@ #include #include #include -// #include #include #include @@ -611,14 +610,34 @@ std::pair, std::vector> hash_partition_table( } } +/** + * Modified GPU implementation of + * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ that is lowercase only and + * does not flip the endianness of the input. + */ +void __device__ uint32ToLowercaseHexString(uint32_t num, char* destination) +{ + // Transform 0xABCD1234 => 0x0000ABCD00001234 => 0x0B0A0D0C02010403 + uint64_t x = num; + x = ((x & 0xFFFF0000) << 16) | ((x & 0xFFFF)); + x = ((x & 0xF0000000F) << 8) | ((x & 0xF0000000F0) >> 4) | ((x & 0xF0000000F00) << 16) | + ((x & 0xF0000000F000) << 4); + + // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits + uint64_t offsets = (((x + 0x0606060606060606) >> 4) & 0x0101010101010101) * 0x27; + + x |= 0x3030303030303030; + x += offsets; + thrust::copy_n(thrust::seq, reinterpret_cast(&x), 8, destination); +} + /** * @brief Finalize MD5 hash including converstion to hex string. */ void __device__ finalize_md5_hash(detail::md5_intermediate_data* hash_state, char* result_location, const detail::md5_hash_constants_type* hash_constants, - const detail::md5_shift_constants_type* shift_constants, - const detail::hex_to_char_mapping_type* hex_char_map) + const detail::md5_shift_constants_type* shift_constants) { auto const full_length = (static_cast(hash_state->message_length)) << 3; thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); @@ -642,14 +661,9 @@ void __device__ finalize_md5_hash(detail::md5_intermediate_data* hash_state, thrust::seq, reinterpret_cast(&full_length), 8, hash_state->buffer + 56); detail::md5_hash_step(hash_state, hash_constants, shift_constants); - u_char final_hash[32]; - uint8_t* hash_result = reinterpret_cast(hash_state->hash_value); - for (int i = 0; i < 16; i++) { - final_hash[i * 2] = hex_char_map[(hash_result[i] >> 4) & 0xf]; - final_hash[i * 2 + 1] = hex_char_map[hash_result[i] & 0xf]; - } - - thrust::copy_n(thrust::seq, final_hash, 32, result_location); +#pragma unroll + for (int i = 0; i < 4; ++i) + uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); } } // namespace @@ -723,8 +737,9 @@ std::unique_ptr md5_hash(table_view const& input, } std::for_each(input.begin(), input.end(), [](auto col) { - CUDF_EXPECTS(col.type().id() <= type_id::BOOL8 || col.type().id() == type_id::STRING, - "Unsupported column type"); + CUDF_EXPECTS(!is_chrono(col.type()), "MD5 does not support chrono column types"); + CUDF_EXPECTS(is_fixed_width(col.type()) || (col.type().id() == type_id::STRING), + "MD5 requires fixed width column types except for strings"); }); // Result column allocation and creation @@ -747,42 +762,39 @@ std::unique_ptr md5_hash(table_view const& input, // Fetch hash constants md5_shift_constants_type const* shift_constants = get_md5_shift_constants(); md5_hash_constants_type const* hash_constants = get_md5_hash_constants(); - hex_to_char_mapping_type const* hex_char_map = get_hex_to_char_mapping(); // Hash each row, hashing each element sequentially left to right - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, - device_input = *device_input, - hash_constants = hash_constants, - shift_constants = shift_constants, - hex_char_map = hex_char_map, - has_nulls = nullable] __device__(auto row_index) { - md5_intermediate_data hash_state; - for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (has_nulls) { - cudf::type_dispatcher(device_input.column(col_index).type(), - md5_element_hasher{}, - device_input.column(col_index), - row_index, - &hash_state, - hash_constants, - shift_constants); - } else { - cudf::type_dispatcher(device_input.column(col_index).type(), - md5_element_hasher{}, - device_input.column(col_index), - row_index, - &hash_state, - hash_constants, - shift_constants); - } - } - finalize_md5_hash( - &hash_state, d_chars + (row_index * 32), hash_constants, shift_constants, hex_char_map); - }); + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, + device_input = *device_input, + hash_constants = hash_constants, + shift_constants = shift_constants, + has_nulls = nullable] __device__(auto row_index) { + md5_intermediate_data hash_state; + for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { + if (has_nulls) { + cudf::type_dispatcher(device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, + &hash_state, + hash_constants, + shift_constants); + } else { + cudf::type_dispatcher(device_input.column(col_index).type(), + md5_element_hasher{}, + device_input.column(col_index), + row_index, + &hash_state, + hash_constants, + shift_constants); + } + } + finalize_md5_hash( + &hash_state, d_chars + (row_index * 32), hash_constants, shift_constants); + }); return make_strings_column(input.num_rows(), std::move(offsets_column), From fa93274f0f971f10df8c6349bfb153cbb1df099f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Fri, 31 Jul 2020 23:38:40 +0000 Subject: [PATCH 08/17] Remove md5 element hasher and MD5Hash template --- .../cudf/detail/utilities/hash_functions.cuh | 70 ++++++++----------- cpp/src/hash/hashing.cu | 33 +-------- 2 files changed, 30 insertions(+), 73 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index d0962c1ac0e..a3baabbc523 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -84,10 +85,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, hash_state->buffer_length = 0; } -template struct MD5Hash { - using argument_type = Key; - /** * @brief Core MD5 element processing function */ @@ -122,7 +120,8 @@ struct MD5Hash { } template ()>* = nullptr> - void __device__ operator()(T const& key, + void __device__ operator()(column_device_view col, + size_type row_index, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const @@ -130,10 +129,9 @@ struct MD5Hash { release_assert(false && "MD5 Unsupported chrono type column"); } - template ::value && - !is_fixed_width()>* = nullptr> - void __device__ operator()(T const& key, + template ()>* = nullptr> + void __device__ operator()(column_device_view col, + size_type row_index, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const @@ -141,20 +139,14 @@ struct MD5Hash { release_assert(false && "MD5 Unsupported non-fixed-width type column"); } - void CUDA_DEVICE_CALLABLE operator()(argument_type const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const - { - process(key, hash_state, hash_constants, shift_constants); - } - - template ::value>* = nullptr> - void __device__ process_floating_point(T const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + template ()>* = nullptr> + void __device__ operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const { + T const& key = col.element(row_index); if (isnan(key)) { T nan = std::numeric_limits::quiet_NaN(); process(nan, hash_state, hash_constants, shift_constants); @@ -164,15 +156,29 @@ struct MD5Hash { process(key, hash_state, hash_constants, shift_constants); } } + + template () && !is_floating_point() && + !is_chrono()>* = nullptr> + void CUDA_DEVICE_CALLABLE operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state, + md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) const + { + process(col.element(row_index), hash_state, hash_constants, shift_constants); + } }; template <> void CUDA_DEVICE_CALLABLE -MD5Hash::operator()(string_view const& key, +MD5Hash::operator()(column_device_view col, + size_type row_index, md5_intermediate_data* hash_state, md5_hash_constants_type const* hash_constants, md5_shift_constants_type const* shift_constants) const { + string_view key = col.element(row_index); uint32_t const len = static_cast(key.size_bytes()); uint8_t const* data = reinterpret_cast(key.data()); @@ -197,26 +203,6 @@ MD5Hash::operator()(string_view const& key, } } -template <> -void CUDA_DEVICE_CALLABLE -MD5Hash::operator()(float const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const -{ - this->process_floating_point(key, hash_state, hash_constants, shift_constants); -} - -template <> -void CUDA_DEVICE_CALLABLE -MD5Hash::operator()(double const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const -{ - this->process_floating_point(key, hash_state, hash_constants, shift_constants); -} - } // namespace detail } // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 330523d2e4b..41e57653b6f 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -705,27 +705,6 @@ std::unique_ptr hash(table_view const& input, } } -/** - * @brief Updates the MD5 hash value with an element in the given column. - * - * @tparam has_nulls Indicates the potential for null values in the column. - **/ -template -class md5_element_hasher { - public: - template - __device__ inline void operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) - { - if (!has_nulls || col.is_valid(row_index)) { - MD5Hash{}(col.element(row_index), hash_state, hash_constants, shift_constants); - } - } -}; - std::unique_ptr md5_hash(table_view const& input, rmm::mr::device_memory_resource* mr, cudaStream_t stream) @@ -774,17 +753,9 @@ std::unique_ptr md5_hash(table_view const& input, has_nulls = nullable] __device__(auto row_index) { md5_intermediate_data hash_state; for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (has_nulls) { - cudf::type_dispatcher(device_input.column(col_index).type(), - md5_element_hasher{}, - device_input.column(col_index), - row_index, - &hash_state, - hash_constants, - shift_constants); - } else { + if (device_input.column(col_index).is_valid(row_index)) { cudf::type_dispatcher(device_input.column(col_index).type(), - md5_element_hasher{}, + MD5Hash{}, device_input.column(col_index), row_index, &hash_state, From 94ceeaefae56545f82fbc8c6dffdceaf85089b2e Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 4 Aug 2020 09:02:14 +0000 Subject: [PATCH 09/17] fix column filtering and use funnelshift --- .../cudf/detail/utilities/hash_functions.cuh | 5 ++--- cpp/src/hash/hashing.cu | 13 ++++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index a3baabbc523..3e47ecf4994 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -22,6 +22,7 @@ #include "cuda_runtime_api.h" #include "cudf/types.hpp" +#include "sm_35_intrinsics.h" using hash_value_type = uint32_t; @@ -72,9 +73,7 @@ void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, A = D; D = C; C = B; - - uint32_t shift = shift_constants[((j / 16) * 4) + (j % 4)]; - B = B + ((F << shift) | (F >> (32 - shift))); + B = B + __funnelshift_l(F, F, shift_constants[((j / 16) * 4) + (j % 4)]); } hash_state->hash_value[0] += A; diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 41e57653b6f..fe92d00d7f4 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -715,11 +715,14 @@ std::unique_ptr md5_hash(table_view const& input, return output; } - std::for_each(input.begin(), input.end(), [](auto col) { - CUDF_EXPECTS(!is_chrono(col.type()), "MD5 does not support chrono column types"); - CUDF_EXPECTS(is_fixed_width(col.type()) || (col.type().id() == type_id::STRING), - "MD5 requires fixed width column types except for strings"); - }); + CUDF_EXPECTS( + std::all_of(input.begin(), + input.end(), + [](auto col) { + return !is_chrono(col.type()) && + (is_fixed_width(col.type()) || (col.type().id() == type_id::STRING)); + }), + "MD5 unsupported column type"); // Result column allocation and creation auto begin = thrust::make_constant_iterator(32); From 8ae065e8f65e78c1cc3c20e61271825a060895e7 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 4 Aug 2020 18:21:31 +0000 Subject: [PATCH 10/17] switch to memcpy and move functions to reduce args --- .../cudf/detail/utilities/hash_functions.cuh | 197 +++++++++++------- cpp/src/hash/hashing.cu | 66 +----- 2 files changed, 122 insertions(+), 141 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 3e47ecf4994..21f2ffcaabb 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -28,71 +28,88 @@ using hash_value_type = uint32_t; namespace cudf { namespace detail { - /** - * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, - * updating the hash value so far. Does not zero out the buffer contents. + * Modified GPU implementation of + * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ that is lowercase only and + * does not flip the endianness of the input. */ -void CUDA_DEVICE_CALLABLE md5_hash_step(md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) +void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destination) { - uint32_t A = hash_state->hash_value[0]; - uint32_t B = hash_state->hash_value[1]; - uint32_t C = hash_state->hash_value[2]; - uint32_t D = hash_state->hash_value[3]; - - for (unsigned int j = 0; j < 64; j++) { - uint32_t F; - uint32_t g; - switch (j / 16) { - case 0: - F = (B & C) | ((~B) & D); - g = j; - break; - case 1: - F = (D & B) | ((~D) & C); - g = (5 * j + 1) % 16; - break; - case 2: - F = B ^ C ^ D; - g = (3 * j + 5) % 16; - break; - case 3: - F = C ^ (B | (~D)); - g = (7 * j) % 16; - break; - } + // Transform 0xABCD1234 => 0x0000ABCD00001234 => 0x0B0A0D0C02010403 + uint64_t x = num; + x = ((x & 0xFFFF0000) << 16) | ((x & 0xFFFF)); + x = ((x & 0xF0000000F) << 8) | ((x & 0xF0000000F0) >> 4) | ((x & 0xF0000000F00) << 16) | + ((x & 0xF0000000F000) << 4); + + // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits + uint64_t offsets = (((x + 0x0606060606060606) >> 4) & 0x0101010101010101) * 0x27; + + x |= 0x3030303030303030; + x += offsets; + thrust::copy_n(thrust::seq, reinterpret_cast(&x), 8, destination); +} - uint32_t buffer_element_as_int; - thrust::copy_n(thrust::seq, - hash_state->buffer + g * 4, - 4, - reinterpret_cast(&buffer_element_as_int)); - F = F + A + hash_constants[j] + buffer_element_as_int; - A = D; - D = C; - C = B; - B = B + __funnelshift_l(F, F, shift_constants[((j / 16) * 4) + (j % 4)]); +struct MD5Hash { + __device__ MD5Hash(md5_hash_constants_type const* hash_constants, + md5_shift_constants_type const* shift_constants) + : d_hash_constants(hash_constants), d_shift_constants(shift_constants) + { } - hash_state->hash_value[0] += A; - hash_state->hash_value[1] += B; - hash_state->hash_value[2] += C; - hash_state->hash_value[3] += D; + /** + * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, + * updating the hash value so far. Does not zero out the buffer contents. + */ + void __device__ hash_step(md5_intermediate_data* hash_state) const + { + uint32_t A = hash_state->hash_value[0]; + uint32_t B = hash_state->hash_value[1]; + uint32_t C = hash_state->hash_value[2]; + uint32_t D = hash_state->hash_value[3]; + + for (unsigned int j = 0; j < 64; j++) { + uint32_t F; + uint32_t g; + switch (j / 16) { + case 0: + F = (B & C) | ((~B) & D); + g = j; + break; + case 1: + F = (D & B) | ((~D) & C); + g = (5 * j + 1) % 16; + break; + case 2: + F = B ^ C ^ D; + g = (3 * j + 5) % 16; + break; + case 3: + F = C ^ (B | (~D)); + g = (7 * j) % 16; + break; + } - hash_state->buffer_length = 0; -} + uint32_t buffer_element_as_int; + std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); + F = F + A + d_hash_constants[j] + buffer_element_as_int; + A = D; + D = C; + C = B; + B = B + __funnelshift_l(F, F, d_shift_constants[((j / 16) * 4) + (j % 4)]); + } -struct MD5Hash { + hash_state->hash_value[0] += A; + hash_state->hash_value[1] += B; + hash_state->hash_value[2] += C; + hash_state->hash_value[3] += D; + + hash_state->buffer_length = 0; + } /** * @brief Core MD5 element processing function */ template - void __device__ process(TKey const& key, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + void __device__ process(TKey const& key, md5_intermediate_data* hash_state) const { uint32_t const len = sizeof(TKey); uint8_t const* data = reinterpret_cast(&key); @@ -105,11 +122,11 @@ struct MD5Hash { uint32_t copylen = 64 - hash_state->buffer_length; thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); - md5_hash_step(hash_state, hash_constants, shift_constants); + hash_step(hash_state); while (len > 64 + copylen) { thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); - md5_hash_step(hash_state, hash_constants, shift_constants); + hash_step(hash_state); copylen += 64; } @@ -118,12 +135,39 @@ struct MD5Hash { } } + void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const + { + auto const full_length = (static_cast(hash_state->message_length)) << 3; + thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); + + if (hash_state->buffer_length < 56) { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (55 - hash_state->buffer_length), + 0x00); + } else { + thrust::fill_n(thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (64 - hash_state->buffer_length), + 0x00); + hash_step(hash_state); + + thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + } + + thrust::copy_n( + thrust::seq, reinterpret_cast(&full_length), 8, hash_state->buffer + 56); + hash_step(hash_state); + +#pragma unroll + for (int i = 0; i < 4; ++i) + uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); + } + template ()>* = nullptr> void __device__ operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + md5_intermediate_data* hash_state) const { release_assert(false && "MD5 Unsupported chrono type column"); } @@ -131,9 +175,7 @@ struct MD5Hash { template ()>* = nullptr> void __device__ operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + md5_intermediate_data* hash_state) const { release_assert(false && "MD5 Unsupported non-fixed-width type column"); } @@ -141,18 +183,16 @@ struct MD5Hash { template ()>* = nullptr> void __device__ operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + md5_intermediate_data* hash_state) const { T const& key = col.element(row_index); if (isnan(key)) { T nan = std::numeric_limits::quiet_NaN(); - process(nan, hash_state, hash_constants, shift_constants); + process(nan, hash_state); } else if (key == T{0.0}) { - process(T{0.0}, hash_state, hash_constants, shift_constants); + process(T{0.0}, hash_state); } else { - process(key, hash_state, hash_constants, shift_constants); + process(key, hash_state); } } @@ -161,21 +201,20 @@ struct MD5Hash { !is_chrono()>* = nullptr> void CUDA_DEVICE_CALLABLE operator()(column_device_view col, size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const + md5_intermediate_data* hash_state) const { - process(col.element(row_index), hash_state, hash_constants, shift_constants); + process(col.element(row_index), hash_state); } + + private: + md5_hash_constants_type const* d_hash_constants; + md5_shift_constants_type const* d_shift_constants; }; template <> -void CUDA_DEVICE_CALLABLE -MD5Hash::operator()(column_device_view col, - size_type row_index, - md5_intermediate_data* hash_state, - md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) const +void CUDA_DEVICE_CALLABLE MD5Hash::operator()(column_device_view col, + size_type row_index, + md5_intermediate_data* hash_state) const { string_view key = col.element(row_index); uint32_t const len = static_cast(key.size_bytes()); @@ -189,11 +228,11 @@ MD5Hash::operator()(column_device_view col, } else { uint32_t copylen = 64 - hash_state->buffer_length; thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); - md5_hash_step(hash_state, hash_constants, shift_constants); + hash_step(hash_state); while (len > 64 + copylen) { thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); - md5_hash_step(hash_state, hash_constants, shift_constants); + hash_step(hash_state); copylen += 64; } diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index fe92d00d7f4..a02be8d69ff 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -610,62 +610,6 @@ std::pair, std::vector> hash_partition_table( } } -/** - * Modified GPU implementation of - * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ that is lowercase only and - * does not flip the endianness of the input. - */ -void __device__ uint32ToLowercaseHexString(uint32_t num, char* destination) -{ - // Transform 0xABCD1234 => 0x0000ABCD00001234 => 0x0B0A0D0C02010403 - uint64_t x = num; - x = ((x & 0xFFFF0000) << 16) | ((x & 0xFFFF)); - x = ((x & 0xF0000000F) << 8) | ((x & 0xF0000000F0) >> 4) | ((x & 0xF0000000F00) << 16) | - ((x & 0xF0000000F000) << 4); - - // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits - uint64_t offsets = (((x + 0x0606060606060606) >> 4) & 0x0101010101010101) * 0x27; - - x |= 0x3030303030303030; - x += offsets; - thrust::copy_n(thrust::seq, reinterpret_cast(&x), 8, destination); -} - -/** - * @brief Finalize MD5 hash including converstion to hex string. - */ -void __device__ finalize_md5_hash(detail::md5_intermediate_data* hash_state, - char* result_location, - const detail::md5_hash_constants_type* hash_constants, - const detail::md5_shift_constants_type* shift_constants) -{ - auto const full_length = (static_cast(hash_state->message_length)) << 3; - thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - - if (hash_state->buffer_length < 56) { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (55 - hash_state->buffer_length), - 0x00); - } else { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (64 - hash_state->buffer_length), - 0x00); - detail::md5_hash_step(hash_state, hash_constants, shift_constants); - - thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); - } - - thrust::copy_n( - thrust::seq, reinterpret_cast(&full_length), 8, hash_state->buffer + 56); - detail::md5_hash_step(hash_state, hash_constants, shift_constants); - -#pragma unroll - for (int i = 0; i < 4; ++i) - uint32ToLowercaseHexString(hash_state->hash_value[i], result_location + (8 * i)); -} - } // namespace namespace detail { @@ -755,19 +699,17 @@ std::unique_ptr md5_hash(table_view const& input, shift_constants = shift_constants, has_nulls = nullable] __device__(auto row_index) { md5_intermediate_data hash_state; + MD5Hash hasher = MD5Hash(hash_constants, shift_constants); for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { if (device_input.column(col_index).is_valid(row_index)) { cudf::type_dispatcher(device_input.column(col_index).type(), - MD5Hash{}, + hasher, device_input.column(col_index), row_index, - &hash_state, - hash_constants, - shift_constants); + &hash_state); } } - finalize_md5_hash( - &hash_state, d_chars + (row_index * 32), hash_constants, shift_constants); + hasher.finalize(&hash_state, d_chars + (row_index * 32)); }); return make_strings_column(input.num_rows(), From efd8fdb8d30460ee2107c0851f812b805e6edc0f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 4 Aug 2020 21:29:01 +0000 Subject: [PATCH 11/17] remove extra include --- cpp/include/cudf/detail/utilities/hash_functions.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 21f2ffcaabb..debbf17ea56 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -22,7 +22,6 @@ #include "cuda_runtime_api.h" #include "cudf/types.hpp" -#include "sm_35_intrinsics.h" using hash_value_type = uint32_t; From f3daf4b3ea51e28e78a36dfc4df4f175a250839e Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Fri, 7 Aug 2020 08:58:09 +0000 Subject: [PATCH 12/17] fix python api --- cpp/include/cudf/hashing.hpp | 5 +++++ cpp/src/hash/hashing.cu | 8 ++++++++ python/cudf/cudf/_lib/cpp/hash.pxd | 2 +- python/cudf/cudf/_lib/hash.pyx | 2 +- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index d897609a1a1..b29b84cd08e 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -39,5 +39,10 @@ std::unique_ptr hash(table_view const& input, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); +std::unique_ptr murmur_hash3_32( + table_view const& input, + std::vector const& initial_hash = {}, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index a02be8d69ff..13de80ccf6b 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -784,4 +784,12 @@ std::unique_ptr hash(table_view const& input, return detail::hash(input, hash_function, initial_hash, mr); } +std::unique_ptr murmur_hash3_32(table_view const& input, + std::vector const& initial_hash, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::murmur_hash3_32(input, initial_hash, mr); +} + } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/hash.pxd b/python/cudf/cudf/_lib/cpp/hash.pxd index 1ee6c55cc95..7d70be0e282 100644 --- a/python/cudf/cudf/_lib/cpp/hash.pxd +++ b/python/cudf/cudf/_lib/cpp/hash.pxd @@ -11,7 +11,7 @@ cimport cudf._lib.cpp.types as libcudf_types cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil: - cdef unique_ptr[column] hash "cudf::hash" ( + cdef unique_ptr[column] murmur_hash3_32 "cudf::murmur_hash3_32" ( const table_view& input, const vector[uint32_t]& initial_hash ) except + diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index a662cbac686..ef7a94f16b5 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -14,7 +14,7 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.hash cimport ( - hash as cpp_hash + murmur_hash3_32 as cpp_hash ) from cudf._lib.cpp.partitioning cimport ( hash_partition as cpp_hash_partition, From f41c8cc30879226638ef9200090ad7b014608632 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Fri, 7 Aug 2020 22:11:25 +0000 Subject: [PATCH 13/17] fix code attribution and add license link --- cpp/include/cudf/detail/utilities/hash_functions.cuh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index debbf17ea56..675fa5359b5 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -29,8 +29,10 @@ namespace cudf { namespace detail { /** * Modified GPU implementation of - * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ that is lowercase only and - * does not flip the endianness of the input. + * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ + * Copyright (c) 2015 Barry Clark + * Licensed under the MIT license. + * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destination) { From 614182be4d1a25aa4b9e736312a1c5a6d8c1abc5 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Sat, 8 Aug 2020 02:07:38 +0000 Subject: [PATCH 14/17] value naming --- .../cudf/detail/utilities/hash_functions.cuh | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 675fa5359b5..651a9bf6dda 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -116,19 +116,21 @@ struct MD5Hash { uint8_t const* data = reinterpret_cast(&key); hash_state->message_length += len; - if (hash_state->buffer_length + len < 64) { + // 64 bytes for the number of bytes processed in a given step + constexpr int md5_chunk_size = 64; + if (hash_state->buffer_length + len < md5_chunk_size) { thrust::copy_n(thrust::seq, data, len, hash_state->buffer + hash_state->buffer_length); hash_state->buffer_length += len; } else { - uint32_t copylen = 64 - hash_state->buffer_length; + uint32_t copylen = md5_chunk_size - hash_state->buffer_length; thrust::copy_n(thrust::seq, data, copylen, hash_state->buffer + hash_state->buffer_length); hash_step(hash_state); - while (len > 64 + copylen) { - thrust::copy_n(thrust::seq, data + copylen, 64, hash_state->buffer); + while (len > md5_chunk_size + copylen) { + thrust::copy_n(thrust::seq, data + copylen, md5_chunk_size, hash_state->buffer); hash_step(hash_state); - copylen += 64; + copylen += md5_chunk_size; } thrust::copy_n(thrust::seq, data + copylen, len - copylen, hash_state->buffer); @@ -141,23 +143,32 @@ struct MD5Hash { auto const full_length = (static_cast(hash_state->message_length)) << 3; thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length, 1, 0x80); - if (hash_state->buffer_length < 56) { - thrust::fill_n(thrust::seq, - hash_state->buffer + hash_state->buffer_length + 1, - (55 - hash_state->buffer_length), - 0x00); + // 64 bytes for the number of bytes processed in a given step + constexpr int md5_chunk_size = 64; + // 8 bytes for the total message length, appended to the end of the last chunk processed + constexpr int message_length_size = 8; + // 1 byte for the end of the message flag + constexpr int end_of_message_size = 1; + if (hash_state->buffer_length + message_length_size + end_of_message_size <= md5_chunk_size) { + thrust::fill_n( + thrust::seq, + hash_state->buffer + hash_state->buffer_length + 1, + (md5_chunk_size - message_length_size - end_of_message_size - hash_state->buffer_length), + 0x00); } else { thrust::fill_n(thrust::seq, hash_state->buffer + hash_state->buffer_length + 1, - (64 - hash_state->buffer_length), + (md5_chunk_size - hash_state->buffer_length), 0x00); hash_step(hash_state); - thrust::fill_n(thrust::seq, hash_state->buffer, 56, 0x00); + thrust::fill_n(thrust::seq, hash_state->buffer, md5_chunk_size - message_length_size, 0x00); } - thrust::copy_n( - thrust::seq, reinterpret_cast(&full_length), 8, hash_state->buffer + 56); + thrust::copy_n(thrust::seq, + reinterpret_cast(&full_length), + message_length_size, + hash_state->buffer + md5_chunk_size - message_length_size); hash_step(hash_state); #pragma unroll From 6aaa7b86c9d141fec25da9ad35e61a181c022145 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 10 Aug 2020 23:03:24 +0000 Subject: [PATCH 15/17] add hash_id to python interface --- cpp/src/hash/hashing.cu | 3 +-- python/cudf/cudf/_lib/cpp/hash.pxd | 3 ++- python/cudf/cudf/_lib/cpp/types.pxd | 5 +++++ python/cudf/cudf/_lib/hash.pyx | 3 ++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 13de80ccf6b..e31df805f17 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -27,7 +27,6 @@ #include #include -#include #include namespace cudf { @@ -645,7 +644,7 @@ std::unique_ptr hash(table_view const& input, switch (hash_function) { case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, initial_hash, mr, stream); case (hash_id::HASH_MD5): return md5_hash(input, mr, stream); - default: return NULL; + default: return nullptr; } } diff --git a/python/cudf/cudf/_lib/cpp/hash.pxd b/python/cudf/cudf/_lib/cpp/hash.pxd index 7d70be0e282..6507cf67ae3 100644 --- a/python/cudf/cudf/_lib/cpp/hash.pxd +++ b/python/cudf/cudf/_lib/cpp/hash.pxd @@ -11,7 +11,8 @@ cimport cudf._lib.cpp.types as libcudf_types cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil: - cdef unique_ptr[column] murmur_hash3_32 "cudf::murmur_hash3_32" ( + cdef unique_ptr[column] hash "cudf::hash" ( const table_view& input, + const libcudf_types.hash_id& hash_function, const vector[uint32_t]& initial_hash ) except + diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd index 903ab2a4be8..55458f00f8b 100644 --- a/python/cudf/cudf/_lib/cpp/types.pxd +++ b/python/cudf/cudf/_lib/cpp/types.pxd @@ -69,6 +69,11 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: LIST "cudf::type_id::LIST" NUM_TYPE_IDS "cudf::type_id::NUM_TYPE_IDS" + ctypedef enum hash_id "cudf::hash_id": + HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY" + HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3" + HASH_MD5 "cudf::hash_id::HASH_MD5" + cdef cppclass data_type: data_type() except + data_type(const data_type&) except + diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index ef7a94f16b5..ade0bb2e17a 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -14,7 +14,7 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.hash cimport ( - murmur_hash3_32 as cpp_hash + hash as cpp_hash ) from cudf._lib.cpp.partitioning cimport ( hash_partition as cpp_hash_partition, @@ -64,6 +64,7 @@ def hash(Table source_table, object initial_hash_values=None): c_result = move( cpp_hash( c_source_view, + libcudf_types.hash_id.HASH_MURMUR3, c_initial_hash ) ) From 3b68a9d946863ad75bca745c54332079f1c8fd78 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 11 Aug 2020 21:51:34 +0000 Subject: [PATCH 16/17] Switch hash constant handling --- cpp/CMakeLists.txt | 1 - .../cudf/detail/utilities/hash_functions.cuh | 15 +--- cpp/include/cudf/hashing.hpp | 5 -- cpp/src/hash/hash_constants.cu | 89 ------------------- cpp/src/hash/hash_constants.hpp | 51 ++++++----- cpp/src/hash/hashing.cu | 43 ++++----- 6 files changed, 49 insertions(+), 155 deletions(-) delete mode 100644 cpp/src/hash/hash_constants.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d03b1343cbb..0483e2c4cc5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,7 +423,6 @@ add_library(cudf src/stream_compaction/drop_duplicates.cu src/datetime/datetime_ops.cu src/hash/hashing.cu - src/hash/hash_constants.cu src/partitioning/partitioning.cu src/quantiles/quantile.cu src/quantiles/quantiles.cu diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 651a9bf6dda..21c78eef048 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -51,12 +51,6 @@ void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destina } struct MD5Hash { - __device__ MD5Hash(md5_hash_constants_type const* hash_constants, - md5_shift_constants_type const* shift_constants) - : d_hash_constants(hash_constants), d_shift_constants(shift_constants) - { - } - /** * @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk, * updating the hash value so far. Does not zero out the buffer contents. @@ -92,11 +86,11 @@ struct MD5Hash { uint32_t buffer_element_as_int; std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4); - F = F + A + d_hash_constants[j] + buffer_element_as_int; + F = F + A + md5_hash_constants[j] + buffer_element_as_int; A = D; D = C; C = B; - B = B + __funnelshift_l(F, F, d_shift_constants[((j / 16) * 4) + (j % 4)]); + B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]); } hash_state->hash_value[0] += A; @@ -106,6 +100,7 @@ struct MD5Hash { hash_state->buffer_length = 0; } + /** * @brief Core MD5 element processing function */ @@ -217,10 +212,6 @@ struct MD5Hash { { process(col.element(row_index), hash_state); } - - private: - md5_hash_constants_type const* d_hash_constants; - md5_shift_constants_type const* d_shift_constants; }; template <> diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index b29b84cd08e..d897609a1a1 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -39,10 +39,5 @@ std::unique_ptr hash(table_view const& input, std::vector const& initial_hash = {}, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); -std::unique_ptr murmur_hash3_32( - table_view const& input, - std::vector const& initial_hash = {}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); - /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/hash/hash_constants.cu b/cpp/src/hash/hash_constants.cu deleted file mode 100644 index a329d0be986..00000000000 --- a/cpp/src/hash/hash_constants.cu +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "hash_constants.hpp" - -#include - -namespace cudf { -namespace detail { - -const md5_shift_constants_type g_md5_shift_constants[] = { - 7, - 12, - 17, - 22, - 5, - 9, - 14, - 20, - 4, - 11, - 16, - 23, - 6, - 10, - 15, - 21, -}; - -const md5_hash_constants_type g_md5_hash_constants[] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, -}; - -namespace { -__device__ md5_hash_constants_type md5_hash_constants[sizeof(g_md5_hash_constants)]; -__device__ md5_shift_constants_type md5_shift_constants[sizeof(g_md5_shift_constants)]; - -strings::detail::thread_safe_per_context_cache d_md5_hash_constants; -strings::detail::thread_safe_per_context_cache d_md5_shift_constants; -} // namespace - -/** - * @copydoc cudf::detail::get_md5_hash_constants - */ -const md5_hash_constants_type* get_md5_hash_constants() -{ - return d_md5_hash_constants.find_or_initialize([&](void) { - md5_hash_constants_type* table = nullptr; - CUDA_TRY( - cudaMemcpyToSymbol(md5_hash_constants, g_md5_hash_constants, sizeof(g_md5_hash_constants))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, md5_hash_constants)); - return table; - }); -} - -/** - * @copydoc cudf::detail::get_md5_shift_constants - */ -const md5_shift_constants_type* get_md5_shift_constants() -{ - return d_md5_shift_constants.find_or_initialize([&](void) { - md5_shift_constants_type* table = nullptr; - CUDA_TRY(cudaMemcpyToSymbol( - md5_shift_constants, g_md5_shift_constants, sizeof(g_md5_shift_constants))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, md5_shift_constants)); - return table; - }); -} - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/hash/hash_constants.hpp b/cpp/src/hash/hash_constants.hpp index c742aa19312..0a5a9e0be93 100644 --- a/cpp/src/hash/hash_constants.hpp +++ b/cpp/src/hash/hash_constants.hpp @@ -28,32 +28,37 @@ struct md5_intermediate_data { // Type for the shift constants table. using md5_shift_constants_type = uint32_t; -/** - * @brief Returns pointer to device memory that contains the static - * md5 shift constants table. On first call, this will copy the table into - * device memory and is guaranteed to be thread-safe. - * - * This table is used in the MD5 hash to lookup the number of bits - * to rotate left during each hash iteration. - * - * @return Device memory pointer to the MD5 shift constants table. - */ -const md5_shift_constants_type* get_md5_shift_constants(); +__device__ __constant__ md5_shift_constants_type md5_shift_constants[16] = { + 7, + 12, + 17, + 22, + 5, + 9, + 14, + 20, + 4, + 11, + 16, + 23, + 6, + 10, + 15, + 21, +}; // Type for the hash constants table. using md5_hash_constants_type = uint32_t; -/** - * @brief Returns pointer to device memory that contains the static - * md5 hash constants table. On first call, this will copy the table into - * device memory and is guaranteed to be thread-safe. - * - * This table is used in the MD5 hash to lookup values added to - * the hash during each hash iteration. - * - * @return Device memory pointer to the MD5 hash constants table. - */ -const md5_hash_constants_type* get_md5_hash_constants(); - +__device__ __constant__ md5_hash_constants_type md5_hash_constants[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; } // namespace detail } // namespace cudf diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index e31df805f17..1939c12dee7 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -684,32 +684,25 @@ std::unique_ptr md5_hash(table_view const& input, bool const nullable = has_nulls(input); auto const device_input = table_device_view::create(input, stream); - // Fetch hash constants - md5_shift_constants_type const* shift_constants = get_md5_shift_constants(); - md5_hash_constants_type const* hash_constants = get_md5_hash_constants(); - // Hash each row, hashing each element sequentially left to right - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - [d_chars, - device_input = *device_input, - hash_constants = hash_constants, - shift_constants = shift_constants, - has_nulls = nullable] __device__(auto row_index) { - md5_intermediate_data hash_state; - MD5Hash hasher = MD5Hash(hash_constants, shift_constants); - for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { - if (device_input.column(col_index).is_valid(row_index)) { - cudf::type_dispatcher(device_input.column(col_index).type(), - hasher, - device_input.column(col_index), - row_index, - &hash_state); - } - } - hasher.finalize(&hash_state, d_chars + (row_index * 32)); - }); + thrust::for_each( + rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + [d_chars, device_input = *device_input, has_nulls = nullable] __device__(auto row_index) { + md5_intermediate_data hash_state; + MD5Hash hasher = MD5Hash{}; + for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { + if (device_input.column(col_index).is_valid(row_index)) { + cudf::type_dispatcher(device_input.column(col_index).type(), + hasher, + device_input.column(col_index), + row_index, + &hash_state); + } + } + hasher.finalize(&hash_state, d_chars + (row_index * 32)); + }); return make_strings_column(input.num_rows(), std::move(offsets_column), From 7379b2305c13e27a19b1d885a16a508a5264586f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 11 Aug 2020 22:23:22 +0000 Subject: [PATCH 17/17] remove ide insert --- cpp/include/cudf/detail/utilities/hash_functions.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 21c78eef048..da9c31286dd 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -20,7 +20,6 @@ #include #include -#include "cuda_runtime_api.h" #include "cudf/types.hpp" using hash_value_type = uint32_t;