Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor hash functions and hash_combine #10379

Merged
merged 10 commits into from
Mar 8, 2022
35 changes: 28 additions & 7 deletions cpp/include/cudf/detail/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -61,20 +61,41 @@ std::unique_ptr<column> serial_murmur_hash3_32(
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hashed values into a single hashed value.
* @brief Combines two hash values into a single hash value.
*
* Adapted from Boost hash_combine function, modified for 64-bit
* Taken from the Boost hash_combine function.
* https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
*
* @param lhs The first hashed value
* @param rhs The second hashed value
* @param lhs The first hash value
* @param rhs The second hash value
* @return Combined hash value
*/
constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs)
{
return lhs ^ (rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2));
}

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hash values into a single hash value.
*
* Adapted from Boost hash_combine function and modified for 64-bit.
* https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
*
* @param lhs The first hash value
* @param rhs The second hash value
* @return Combined hash value
*/
constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
{
lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2);
return lhs;
return lhs ^ (rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2));
}

} // namespace detail
} // namespace cudf

Expand Down
124 changes: 33 additions & 91 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ struct MurmurHash3_32 {
MurmurHash3_32() = default;
constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
[[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
{
return (x << r) | (x >> (32 - r));
return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r))
}

[[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
Expand All @@ -114,32 +114,6 @@ struct MurmurHash3_32 {
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
}

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hash values into a new single hash value. Called
* repeatedly to create a hash value from several variables.
* Taken from the Boost hash_combine function
* https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
*
* @param lhs The first hash value to combine
* @param rhs The second hash value to combine
*
* @returns A hash value that intelligently combines the lhs and rhs hash values
*/
constexpr result_type hash_combine(result_type lhs, result_type rhs) const
{
result_type combined{lhs};

combined ^= rhs + 0x9e3779b9 + (combined << 6) + (combined >> 2);

return combined;
}

// TODO Do we need this operator() and/or compute? Probably not both.
[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
{
Expand Down Expand Up @@ -218,28 +192,25 @@ hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& k
return this->compute(static_cast<uint8_t>(key));
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
return this->compute_floating_point(key);
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
Expand Down Expand Up @@ -286,9 +257,9 @@ struct SparkMurmurHash3_32 {
SparkMurmurHash3_32() = default;
constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}

__device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
[[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
{
return (x << r) | (x >> (32 - r));
return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r))
}

__device__ inline uint32_t fmix32(uint32_t h) const
Expand Down Expand Up @@ -408,6 +379,27 @@ hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
codereport marked this conversation as resolved.
Show resolved Hide resolved
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
Expand Down Expand Up @@ -480,30 +472,6 @@ hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operat
return 0;
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

/**
* @brief This hash function simply returns the value that is asked to be hash
* reinterpreted as the result_type of the functor.
Expand All @@ -514,32 +482,6 @@ struct IdentityHash {
IdentityHash() = default;
constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hash values into a new single hash value. Called
* repeatedly to create a hash value from several variables.
* Taken from the Boost hash_combine function
* https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
*
* @param lhs The first hash value to combine
* @param rhs The second hash value to combine
*
* @returns A hash value that intelligently combines the lhs and rhs hash values
*/
constexpr result_type hash_combine(result_type lhs, result_type rhs) const
{
result_type combined{lhs};

combined ^= rhs + 0x9e3779b9 + (combined << 6) + (combined >> 2);

return combined;
}

template <typename return_type = result_type>
constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
Key const& key) const
Expand Down
23 changes: 11 additions & 12 deletions cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/hashing.hpp>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/sorting.hpp>
Expand Down Expand Up @@ -503,18 +504,14 @@ class row_hasher {

__device__ auto operator()(size_type row_index) const
{
auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
};

// Hash the first column w/ the seed
auto const initial_hash =
hash_combiner(hash_value_type{0},
type_dispatcher<dispatch_storage_type>(
_table.column(0).type(),
element_hasher_with_seed<hash_function, Nullate>{_has_nulls, _seed},
_table.column(0),
row_index));
auto const initial_hash = cudf::detail::hash_combine(
hash_value_type{0},
type_dispatcher<dispatch_storage_type>(
_table.column(0).type(),
element_hasher_with_seed<hash_function, Nullate>{_has_nulls, _seed},
_table.column(0),
row_index));

// Hashes an element in a column
auto hasher = [=](size_type column_index) {
Expand All @@ -533,7 +530,9 @@ class row_hasher {
thrust::make_counting_iterator(_table.num_columns()),
hasher,
initial_hash,
hash_combiner);
[](hash_value_type lhs, hash_value_type rhs) {
return cudf::detail::hash_combine(lhs, rhs);
});
}

private:
Expand Down