From 39e6d8c05d9ec5d8a55c01afca528ce507f14f09 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 20 Nov 2024 15:44:13 -0800 Subject: [PATCH 1/7] Update xxhash_64 to utilize the cuco equivalent implementation --- cpp/src/hash/xxhash_64.cu | 176 +++++--------------------------------- 1 file changed, 21 insertions(+), 155 deletions(-) diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index fad8383210b..89c90236c8b 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -25,6 +25,7 @@ #include #include +#include #include namespace cudf { @@ -36,204 +37,69 @@ namespace { using hash_value_type = uint64_t; template -struct XXHash_64 { - using result_type = hash_value_type; - - constexpr XXHash_64() = default; - constexpr XXHash_64(hash_value_type seed) : m_seed(seed) {} - - __device__ inline uint32_t getblock32(std::byte const* data, std::size_t offset) const - { - // Read a 4-byte value from the data pointer as individual bytes for safe - // unaligned access (very likely for string types). - auto block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); - } - - __device__ inline uint64_t getblock64(std::byte const* data, std::size_t offset) const - { - uint64_t result = getblock32(data, offset + 4); - result = result << 32; - return result | getblock32(data, offset); - } - - result_type __device__ inline operator()(Key const& key) const { return compute(key); } - - template - result_type __device__ inline compute(T const& key) const +struct XXHash_64 : public cuco::xxhash_64 { + __device__ hash_value_type operator()(Key const& key) const { - auto data = device_span(reinterpret_cast(&key), sizeof(T)); - return compute_bytes(data); + return cuco::xxhash_64::operator()(key); } - result_type __device__ inline compute_remaining_bytes(device_span& in, - std::size_t offset, - result_type h64) const + template + __device__ hash_value_type compute_hash(cuda::std::byte const* bytes, Extent size) const { - // remaining data can be processed in 8-byte chunks - if ((in.size() % 32) >= 8) { - for (; offset <= in.size() - 8; offset += 8) { - uint64_t k1 = getblock64(in.data(), offset) * prime2; - - k1 = rotate_bits_left(k1, 31) * prime1; - h64 ^= k1; - h64 = rotate_bits_left(h64, 27) * prime1 + prime4; - } - } - - // remaining data can be processed in 4-byte chunks - if ((in.size() % 8) >= 4) { - for (; offset <= in.size() - 4; offset += 4) { - h64 ^= (getblock32(in.data(), offset) & 0xfffffffful) * prime1; - h64 = rotate_bits_left(h64, 23) * prime2 + prime3; - } - } - - // and the rest - if (in.size() % 4) { - while (offset < in.size()) { - h64 ^= (std::to_integer(in[offset]) & 0xff) * prime5; - h64 = rotate_bits_left(h64, 11) * prime1; - ++offset; - } - } - return h64; + return cuco::xxhash_64::compute_hash(bytes, size); } - - result_type __device__ compute_bytes(device_span& in) const - { - uint64_t offset = 0; - uint64_t h64; - // data can be processed in 32-byte chunks - if (in.size() >= 32) { - auto limit = in.size() - 32; - uint64_t v1 = m_seed + prime1 + prime2; - uint64_t v2 = m_seed + prime2; - uint64_t v3 = m_seed; - uint64_t v4 = m_seed - prime1; - - do { - // pipeline 4*8byte computations - v1 += getblock64(in.data(), offset) * prime2; - v1 = rotate_bits_left(v1, 31); - v1 *= prime1; - offset += 8; - v2 += getblock64(in.data(), offset) * prime2; - v2 = rotate_bits_left(v2, 31); - v2 *= prime1; - offset += 8; - v3 += getblock64(in.data(), offset) * prime2; - v3 = rotate_bits_left(v3, 31); - v3 *= prime1; - offset += 8; - v4 += getblock64(in.data(), offset) * prime2; - v4 = rotate_bits_left(v4, 31); - v4 *= prime1; - offset += 8; - } while (offset <= limit); - - h64 = rotate_bits_left(v1, 1) + rotate_bits_left(v2, 7) + rotate_bits_left(v3, 12) + - rotate_bits_left(v4, 18); - - v1 *= prime2; - v1 = rotate_bits_left(v1, 31); - v1 *= prime1; - h64 ^= v1; - h64 = h64 * prime1 + prime4; - - v2 *= prime2; - v2 = rotate_bits_left(v2, 31); - v2 *= prime1; - h64 ^= v2; - h64 = h64 * prime1 + prime4; - - v3 *= prime2; - v3 = rotate_bits_left(v3, 31); - v3 *= prime1; - h64 ^= v3; - h64 = h64 * prime1 + prime4; - - v4 *= prime2; - v4 = rotate_bits_left(v4, 31); - v4 *= prime1; - h64 ^= v4; - h64 = h64 * prime1 + prime4; - } else { - h64 = m_seed + prime5; - } - - h64 += in.size(); - - h64 = compute_remaining_bytes(in, offset, h64); - - return finalize(h64); - } - - constexpr __host__ __device__ std::uint64_t finalize(std::uint64_t h) const noexcept - { - h ^= h >> 33; - h *= prime2; - h ^= h >> 29; - h *= prime3; - h ^= h >> 32; - return h; - } - - private: - hash_value_type m_seed{}; - static constexpr uint64_t prime1 = 0x9e3779b185ebca87ul; - static constexpr uint64_t prime2 = 0xc2b2ae3d27d4eb4ful; - static constexpr uint64_t prime3 = 0x165667b19e3779f9ul; - static constexpr uint64_t prime4 = 0x85ebca77c2b2ae63ul; - static constexpr uint64_t prime5 = 0x27d4eb2f165667c5ul; }; template <> -hash_value_type __device__ inline XXHash_64::operator()(bool const& key) const +hash_value_type __device__ XXHash_64::operator()(bool const& key) const { - return compute(static_cast(key)); + return this->compute_hash(reinterpret_cast(&key), sizeof(key)); } template <> hash_value_type __device__ inline XXHash_64::operator()(float const& key) const { - return compute(normalize_nans(key)); + return cuco::xxhash_64::operator()(normalize_nans(key)); } template <> hash_value_type __device__ inline XXHash_64::operator()(double const& key) const { - return compute(normalize_nans(key)); + return cuco::xxhash_64::operator()(normalize_nans(key)); } template <> hash_value_type __device__ inline XXHash_64::operator()( cudf::string_view const& key) const { - auto const len = key.size_bytes(); - auto data = device_span(reinterpret_cast(key.data()), len); - return compute_bytes(data); + return this->compute_hash(reinterpret_cast(key.data()), key.size_bytes()); } template <> hash_value_type __device__ inline XXHash_64::operator()( numeric::decimal32 const& key) const { - return compute(key.value()); + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); } template <> hash_value_type __device__ inline XXHash_64::operator()( numeric::decimal64 const& key) const { - return compute(key.value()); + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); } template <> hash_value_type __device__ inline XXHash_64::operator()( numeric::decimal128 const& key) const { - return compute(key.value()); + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); } /** From b5c6093dd3a56d6852e2fc4bc239d5d1322ba3a9 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 20 Nov 2024 15:51:10 -0800 Subject: [PATCH 2/7] minor fix --- cpp/src/hash/xxhash_64.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index 89c90236c8b..baaaac9b8e9 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -51,7 +51,7 @@ struct XXHash_64 : public cuco::xxhash_64 { }; template <> -hash_value_type __device__ XXHash_64::operator()(bool const& key) const +hash_value_type __device__ inline XXHash_64::operator()(bool const& key) const { return this->compute_hash(reinterpret_cast(&key), sizeof(key)); } From 2b54f08fe90ebf63ff7554640bfe52a414bfe6bd Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 20 Nov 2024 16:11:21 -0800 Subject: [PATCH 3/7] Add missing pragma once to murmur128 hash header --- cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh index c986a908706..5e88b905023 100644 --- a/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh +++ b/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once + #include #include From 675083463ed3e72ae82079be0d25737260905aa7 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 20 Nov 2024 16:25:33 -0800 Subject: [PATCH 4/7] Expose detail::xxhash_64 as requested --- cpp/include/cudf/hashing/detail/xxhash_64.cuh | 96 +++++++++++++++++++ cpp/src/hash/xxhash_64.cu | 68 +------------ 2 files changed, 97 insertions(+), 67 deletions(-) create mode 100644 cpp/include/cudf/hashing/detail/xxhash_64.cuh diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh new file mode 100644 index 00000000000..546bc509209 --- /dev/null +++ b/cpp/include/cudf/hashing/detail/xxhash_64.cuh @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace cudf::hashing::detail { + +template +struct XXHash_64 : public cuco::xxhash_64 { + using result_type = typename cuco::xxhash_64::result_type; + + __device__ result_type operator()(Key const& key) const + { + return cuco::xxhash_64::operator()(key); + } + + template + __device__ result_type compute_hash(cuda::std::byte const* bytes, Extent size) const + { + return cuco::xxhash_64::compute_hash(bytes, size); + } +}; + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()(bool const& key) const +{ + return this->compute_hash(reinterpret_cast(&key), sizeof(key)); +} + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()(float const& key) const +{ + return cuco::xxhash_64::operator()(normalize_nans(key)); +} + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()( + double const& key) const +{ + return cuco::xxhash_64::operator()(normalize_nans(key)); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(cudf::string_view const& key) const +{ + return this->compute_hash(reinterpret_cast(key.data()), key.size_bytes()); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal32 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal64 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal128 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +} // namespace cudf::hashing::detail diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index baaaac9b8e9..2f9dfc9b3cc 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include namespace cudf { @@ -36,72 +36,6 @@ namespace { using hash_value_type = uint64_t; -template -struct XXHash_64 : public cuco::xxhash_64 { - __device__ hash_value_type operator()(Key const& key) const - { - return cuco::xxhash_64::operator()(key); - } - - template - __device__ hash_value_type compute_hash(cuda::std::byte const* bytes, Extent size) const - { - return cuco::xxhash_64::compute_hash(bytes, size); - } -}; - -template <> -hash_value_type __device__ inline XXHash_64::operator()(bool const& key) const -{ - return this->compute_hash(reinterpret_cast(&key), sizeof(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()(float const& key) const -{ - return cuco::xxhash_64::operator()(normalize_nans(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()(double const& key) const -{ - return cuco::xxhash_64::operator()(normalize_nans(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - cudf::string_view const& key) const -{ - return this->compute_hash(reinterpret_cast(key.data()), key.size_bytes()); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal32 const& key) const -{ - auto const val = key.value(); - auto const len = sizeof(val); - return this->compute_hash(reinterpret_cast(&val), len); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal64 const& key) const -{ - auto const val = key.value(); - auto const len = sizeof(val); - return this->compute_hash(reinterpret_cast(&val), len); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal128 const& key) const -{ - auto const val = key.value(); - auto const len = sizeof(val); - return this->compute_hash(reinterpret_cast(&val), len); -} - /** * @brief Computes the hash value of a row in the given table. * From b4fbd5dd94af4b18336d1f17d22f86393df11452 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 20 Nov 2024 19:39:31 -0800 Subject: [PATCH 5/7] Update cpp/include/cudf/hashing/detail/xxhash_64.cuh Co-authored-by: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> --- cpp/include/cudf/hashing/detail/xxhash_64.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh index 546bc509209..1bf78b74d40 100644 --- a/cpp/include/cudf/hashing/detail/xxhash_64.cuh +++ b/cpp/include/cudf/hashing/detail/xxhash_64.cuh @@ -16,6 +16,8 @@ #pragma once +#include "hash_functions.cuh" + #include #include From da03d51a9b0c02f9442d45f44c07074d8d9f39c4 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 22 Nov 2024 10:11:19 -0800 Subject: [PATCH 6/7] Remove unused header --- cpp/src/hash/xxhash_64.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index 2f9dfc9b3cc..bdbe13b1ffb 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include From 7cbefc1230646612e86c834d7ab8053646461344 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 22 Nov 2024 10:31:32 -0800 Subject: [PATCH 7/7] Add missing header for numerics --- cpp/include/cudf/hashing/detail/xxhash_64.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh index 1bf78b74d40..b00e8297ac9 100644 --- a/cpp/include/cudf/hashing/detail/xxhash_64.cuh +++ b/cpp/include/cudf/hashing/detail/xxhash_64.cuh @@ -18,6 +18,7 @@ #include "hash_functions.cuh" +#include #include #include