Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update MurmurHash3_x86_32 to use the cuco equivalent implementation #17429

Merged
merged 15 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
function(find_and_configure_cucollections)
include(${rapids-cmake-dir}/cpm/cuco.cmake)

if(BUILD_SHARED_LIBS)
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
else()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endif()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endfunction()

find_and_configure_cucollections()
116 changes: 21 additions & 95 deletions cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION.
* Copyright (c) 2017-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,157 +24,83 @@
#include <cudf/structs/struct_view.hpp>
#include <cudf/types.hpp>

#include <cstddef>
#include <cuco/hash_functions.cuh>
#include <cuda/std/cstddef>

namespace cudf::hashing::detail {

// MurmurHash3_x86_32 implementation from
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_x86_32 {
using result_type = hash_value_type;

constexpr MurmurHash3_x86_32() = default;
constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
__host__ __device__ constexpr MurmurHash3_x86_32(uint32_t seed = cudf::DEFAULT_HASH_SEED)
: _impl{seed}
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}

[[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data,
cudf::size_type offset) const
{
// Read a 4-byte value from the data pointer as individual bytes for safe
// unaligned access (very likely for string types).
auto const block = reinterpret_cast<uint8_t const*>(data + offset);
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
}
__device__ constexpr result_type operator()(Key const& key) const { return this->_impl(key); }

[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
__device__ constexpr result_type compute_bytes(cuda::std::byte const* bytes,
std::uint64_t size) const
{
return compute(normalize_nans_and_zeros(key));
return this->_impl.compute_hash(bytes, size);
}

private:
template <typename T>
result_type __device__ inline compute(T const& key) const
__device__ constexpr result_type compute(T const& key) const
{
return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(&key), sizeof(T));
}

result_type __device__ inline compute_remaining_bytes(std::byte const* data,
cudf::size_type len,
cudf::size_type tail_offset,
result_type h) const
{
// Process remaining bytes that do not fill a four-byte chunk.
uint32_t k1 = 0;
switch (len % 4) {
case 3: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 2]) << 16; [[fallthrough]];
case 2: k1 ^= std::to_integer<uint8_t>(data[tail_offset + 1]) << 8; [[fallthrough]];
case 1:
k1 ^= std::to_integer<uint8_t>(data[tail_offset]);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
};
return h;
}

result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const
{
constexpr cudf::size_type BLOCK_SIZE = 4;
cudf::size_type const nblocks = len / BLOCK_SIZE;
cudf::size_type const tail_offset = nblocks * BLOCK_SIZE;
result_type h = m_seed;

// Process all four-byte chunks.
for (cudf::size_type i = 0; i < nblocks; i++) {
uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
k1 *= c1;
k1 = rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}

h = compute_remaining_bytes(data, len, tail_offset, h);

// Finalize hash.
h ^= len;
h = fmix32(h);
return h;
}

private:
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
static constexpr uint32_t c1 = 0xcc9e2d51;
static constexpr uint32_t c2 = 0x1b873593;
static constexpr uint32_t c3 = 0xe6546b64;
static constexpr uint32_t rot_c1 = 15;
static constexpr uint32_t rot_c2 = 13;
cuco::murmurhash3_32<Key> _impl;
};

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
{
return compute(static_cast<uint8_t>(key));
return this->compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
{
return compute(normalize_nans_and_zeros(key));
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return compute_bytes(data, len);
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return compute(key.value());
return this->compute(key.value());
}

template <>
Expand Down
9 changes: 7 additions & 2 deletions cpp/include/cudf/hashing/detail/xxhash_64.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

#pragma once

#include "hash_functions.cuh"

#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand All @@ -31,6 +31,11 @@ template <typename Key>
struct XXHash_64 : public cuco::xxhash_64<Key> {
using result_type = typename cuco::xxhash_64<Key>::result_type;

__host__ __device__ constexpr XXHash_64(uint64_t seed = cudf::DEFAULT_HASH_SEED)
: cuco::xxhash_64<Key>{seed}
{
}

__device__ result_type operator()(Key const& key) const
{
return cuco::xxhash_64<Key>::operator()(key);
Expand Down
Loading