diff --git a/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh index c986a908706..5e88b905023 100644 --- a/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh +++ b/cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once + #include #include diff --git a/cpp/include/cudf/hashing/detail/xxhash_64.cuh b/cpp/include/cudf/hashing/detail/xxhash_64.cuh new file mode 100644 index 00000000000..b00e8297ac9 --- /dev/null +++ b/cpp/include/cudf/hashing/detail/xxhash_64.cuh @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "hash_functions.cuh" + +#include +#include +#include + +#include +#include + +namespace cudf::hashing::detail { + +template +struct XXHash_64 : public cuco::xxhash_64 { + using result_type = typename cuco::xxhash_64::result_type; + + __device__ result_type operator()(Key const& key) const + { + return cuco::xxhash_64::operator()(key); + } + + template + __device__ result_type compute_hash(cuda::std::byte const* bytes, Extent size) const + { + return cuco::xxhash_64::compute_hash(bytes, size); + } +}; + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()(bool const& key) const +{ + return this->compute_hash(reinterpret_cast(&key), sizeof(key)); +} + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()(float const& key) const +{ + return cuco::xxhash_64::operator()(normalize_nans(key)); +} + +template <> +XXHash_64::result_type __device__ inline XXHash_64::operator()( + double const& key) const +{ + return cuco::xxhash_64::operator()(normalize_nans(key)); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(cudf::string_view const& key) const +{ + return this->compute_hash(reinterpret_cast(key.data()), key.size_bytes()); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal32 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal64 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +template <> +XXHash_64::result_type + __device__ inline XXHash_64::operator()(numeric::decimal128 const& key) const +{ + auto const val = key.value(); + auto const len = sizeof(val); + return this->compute_hash(reinterpret_cast(&val), len); +} + +} // namespace cudf::hashing::detail diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index fad8383210b..bdbe13b1ffb 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -35,207 +35,6 @@ namespace { using hash_value_type = uint64_t; -template -struct XXHash_64 { - using result_type = hash_value_type; - - constexpr XXHash_64() = default; - constexpr XXHash_64(hash_value_type seed) : m_seed(seed) {} - - __device__ inline uint32_t getblock32(std::byte const* data, std::size_t offset) const - { - // Read a 4-byte value from the data pointer as individual bytes for safe - // unaligned access (very likely for string types). - auto block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); - } - - __device__ inline uint64_t getblock64(std::byte const* data, std::size_t offset) const - { - uint64_t result = getblock32(data, offset + 4); - result = result << 32; - return result | getblock32(data, offset); - } - - result_type __device__ inline operator()(Key const& key) const { return compute(key); } - - template - result_type __device__ inline compute(T const& key) const - { - auto data = device_span(reinterpret_cast(&key), sizeof(T)); - return compute_bytes(data); - } - - result_type __device__ inline compute_remaining_bytes(device_span& in, - std::size_t offset, - result_type h64) const - { - // remaining data can be processed in 8-byte chunks - if ((in.size() % 32) >= 8) { - for (; offset <= in.size() - 8; offset += 8) { - uint64_t k1 = getblock64(in.data(), offset) * prime2; - - k1 = rotate_bits_left(k1, 31) * prime1; - h64 ^= k1; - h64 = rotate_bits_left(h64, 27) * prime1 + prime4; - } - } - - // remaining data can be processed in 4-byte chunks - if ((in.size() % 8) >= 4) { - for (; offset <= in.size() - 4; offset += 4) { - h64 ^= (getblock32(in.data(), offset) & 0xfffffffful) * prime1; - h64 = rotate_bits_left(h64, 23) * prime2 + prime3; - } - } - - // and the rest - if (in.size() % 4) { - while (offset < in.size()) { - h64 ^= (std::to_integer(in[offset]) & 0xff) * prime5; - h64 = rotate_bits_left(h64, 11) * prime1; - ++offset; - } - } - return h64; - } - - result_type __device__ compute_bytes(device_span& in) const - { - uint64_t offset = 0; - uint64_t h64; - // data can be processed in 32-byte chunks - if (in.size() >= 32) { - auto limit = in.size() - 32; - uint64_t v1 = m_seed + prime1 + prime2; - uint64_t v2 = m_seed + prime2; - uint64_t v3 = m_seed; - uint64_t v4 = m_seed - prime1; - - do { - // pipeline 4*8byte computations - v1 += getblock64(in.data(), offset) * prime2; - v1 = rotate_bits_left(v1, 31); - v1 *= prime1; - offset += 8; - v2 += getblock64(in.data(), offset) * prime2; - v2 = rotate_bits_left(v2, 31); - v2 *= prime1; - offset += 8; - v3 += getblock64(in.data(), offset) * prime2; - v3 = rotate_bits_left(v3, 31); - v3 *= prime1; - offset += 8; - v4 += getblock64(in.data(), offset) * prime2; - v4 = rotate_bits_left(v4, 31); - v4 *= prime1; - offset += 8; - } while (offset <= limit); - - h64 = rotate_bits_left(v1, 1) + rotate_bits_left(v2, 7) + rotate_bits_left(v3, 12) + - rotate_bits_left(v4, 18); - - v1 *= prime2; - v1 = rotate_bits_left(v1, 31); - v1 *= prime1; - h64 ^= v1; - h64 = h64 * prime1 + prime4; - - v2 *= prime2; - v2 = rotate_bits_left(v2, 31); - v2 *= prime1; - h64 ^= v2; - h64 = h64 * prime1 + prime4; - - v3 *= prime2; - v3 = rotate_bits_left(v3, 31); - v3 *= prime1; - h64 ^= v3; - h64 = h64 * prime1 + prime4; - - v4 *= prime2; - v4 = rotate_bits_left(v4, 31); - v4 *= prime1; - h64 ^= v4; - h64 = h64 * prime1 + prime4; - } else { - h64 = m_seed + prime5; - } - - h64 += in.size(); - - h64 = compute_remaining_bytes(in, offset, h64); - - return finalize(h64); - } - - constexpr __host__ __device__ std::uint64_t finalize(std::uint64_t h) const noexcept - { - h ^= h >> 33; - h *= prime2; - h ^= h >> 29; - h *= prime3; - h ^= h >> 32; - return h; - } - - private: - hash_value_type m_seed{}; - static constexpr uint64_t prime1 = 0x9e3779b185ebca87ul; - static constexpr uint64_t prime2 = 0xc2b2ae3d27d4eb4ful; - static constexpr uint64_t prime3 = 0x165667b19e3779f9ul; - static constexpr uint64_t prime4 = 0x85ebca77c2b2ae63ul; - static constexpr uint64_t prime5 = 0x27d4eb2f165667c5ul; -}; - -template <> -hash_value_type __device__ inline XXHash_64::operator()(bool const& key) const -{ - return compute(static_cast(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()(float const& key) const -{ - return compute(normalize_nans(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()(double const& key) const -{ - return compute(normalize_nans(key)); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - cudf::string_view const& key) const -{ - auto const len = key.size_bytes(); - auto data = device_span(reinterpret_cast(key.data()), len); - return compute_bytes(data); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal32 const& key) const -{ - return compute(key.value()); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal64 const& key) const -{ - return compute(key.value()); -} - -template <> -hash_value_type __device__ inline XXHash_64::operator()( - numeric::decimal128 const& key) const -{ - return compute(key.value()); -} - /** * @brief Computes the hash value of a row in the given table. *