Skip to content

Commit

Permalink
Revert "Remove template from hashing class, template on operator() an…
Browse files Browse the repository at this point in the history
…d compute instead."

This reverts commit 466243b.
  • Loading branch information
bdice committed Mar 7, 2022
1 parent 0d9df03 commit c972ba4
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 80 deletions.
132 changes: 74 additions & 58 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_32 {
using result_type = hash_value_type;

Expand Down Expand Up @@ -114,8 +115,7 @@ struct MurmurHash3_32 {
}

// TODO Do we need this operator() and/or compute? Probably not both.
template <typename T>
[[nodiscard]] result_type __device__ inline operator()(T const& key) const
[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
{
return compute(key);
}
Expand Down Expand Up @@ -187,72 +187,82 @@ struct MurmurHash3_32 {
};

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const
hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
{
return this->compute(static_cast<uint8_t>(key));
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const
hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
return this->compute_floating_point(key);
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const
hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::string_view const& key) const
hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal32 const& key) const
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal64 const& key) const
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal128 const& key) const
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::list_view const&) const
hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
{
cudf_assert(false && "List column hashing is not supported");
return 0;
}

template <>
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::struct_view const&) const
hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
cudf::struct_view const& key) const
{
cudf_assert(false && "Direct hashing of struct_view is not supported");
return 0;
}

template <typename Key>
struct SparkMurmurHash3_32 {
using result_type = hash_value_type;

SparkMurmurHash3_32() = default;
constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}

[[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
__device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
{
return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r))
return (x << r) | (x >> (32 - r));
}

__device__ inline uint32_t fmix32(uint32_t h) const
Expand All @@ -265,11 +275,7 @@ struct SparkMurmurHash3_32 {
return h;
}

template <typename T>
result_type __device__ inline operator()(T const& key) const
{
return compute(key);
}
result_type __device__ inline operator()(Key const& key) const { return compute(key); }

// compute wrapper for floating point types
template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
Expand Down Expand Up @@ -346,72 +352,52 @@ struct SparkMurmurHash3_32 {
};

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32<bool>::operator()(bool const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32<int8_t>::operator()(int8_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32<uint8_t>::operator()(uint8_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32<int16_t>::operator()(int16_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint16_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
uint16_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return this->compute<uint64_t>(key.value());
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return this->compute<uint64_t>(key.value());
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
// Generates the Spark MurmurHash3 hash value, mimicking the conversion:
Expand Down Expand Up @@ -453,37 +439,66 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::list_view const&) const
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
{
cudf_assert(false && "List column hashing is not supported");
return 0;
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::struct_view const&) const
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operator()(
cudf::struct_view const& key) const
{
cudf_assert(false && "Direct hashing of struct_view is not supported");
return 0;
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

/**
* @brief This hash function simply returns the value that is asked to be hash
* reinterpreted as the result_type of the functor.
*/
template <typename Key>
struct IdentityHash {
using result_type = hash_value_type;
IdentityHash() = default;
constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}

template <typename T, std::enable_if_t<!std::is_arithmetic_v<T>>* = nullptr>
constexpr result_type operator()(T const& key) const
template <typename return_type = result_type>
constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
Key const& key) const
{
cudf_assert(false && "IdentityHash does not support this data type");
return 0;
}

template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
constexpr result_type operator()(T const& key) const
template <typename return_type = result_type>
constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
Key const& key) const
{
return static_cast<result_type>(key);
}
Expand All @@ -492,4 +507,5 @@ struct IdentityHash {
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
};

using default_hash = MurmurHash3_32;
template <typename Key>
using default_hash = MurmurHash3_32<Key>;
10 changes: 5 additions & 5 deletions cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -430,14 +430,14 @@ class row_lexicographic_comparator {
* @tparam hash_function Hash functor to use for hashing elements.
* @tparam Nullate A cudf::nullate type describing how to check for nulls.
*/
template <typename hash_function, typename Nullate>
template <template <typename> class hash_function, typename Nullate>
class element_hasher {
public:
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
__device__ hash_value_type operator()(column_device_view col, size_type row_index) const
{
if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
return hash_function{}(col.element<T>(row_index));
return hash_function<T>{}(col.element<T>(row_index));
}

template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
Expand All @@ -450,7 +450,7 @@ class element_hasher {
Nullate has_nulls;
};

template <typename hash_function, typename Nullate>
template <template <typename> class hash_function, typename Nullate>
class element_hasher_with_seed {
public:
__device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Expand All @@ -467,7 +467,7 @@ class element_hasher_with_seed {
__device__ hash_value_type operator()(column_device_view col, size_type row_index) const
{
if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
return hash_function{_seed}(col.element<T>(row_index));
return hash_function<T>{_seed}(col.element<T>(row_index));
}

template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
Expand All @@ -489,7 +489,7 @@ class element_hasher_with_seed {
* @tparam hash_function Hash functor to use for hashing elements.
* @tparam Nullate A cudf::nullate type describing how to check for nulls.
*/
template <typename hash_function, typename Nullate>
template <template <typename> class hash_function, typename Nullate>
class row_hasher {
public:
row_hasher() = delete;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/hash/concurrent_unordered_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
*/
template <typename Key,
typename Element,
typename Hasher = default_hash,
typename Hasher = default_hash<Key>,
typename Equality = equal_to<Key>,
typename Allocator = default_allocator<thrust::pair<Key, Element>>>
class concurrent_unordered_map {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/hash/concurrent_unordered_multimap.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
* Copyright (c) 2017-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,7 @@ template <typename Key,
typename size_type,
Key unused_key,
Element unused_element,
typename Hasher = default_hash,
typename Hasher = default_hash<Key>,
typename Equality = equal_to<Key>,
typename Allocator = managed_allocator<thrust::pair<Key, Element>>,
bool count_collisions = false>
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/hash/hashing.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ std::vector<column_view> to_leaf_columns(IterType iter_begin, IterType iter_end)

} // namespace

template <typename hash_function>
template <template <typename> class hash_function>
std::unique_ptr<column> serial_murmur_hash3_32(table_view const& input,
uint32_t seed,
rmm::cuda_stream_view stream,
Expand Down
Loading

0 comments on commit c972ba4

Please sign in to comment.