Skip to content

Commit

Permalink
Remove template from hashing class, template on operator() and comput…
Browse files Browse the repository at this point in the history
…e instead.
  • Loading branch information
bdice committed Mar 1, 2022
1 parent 0651f38 commit 466243b
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 99 deletions.
131 changes: 58 additions & 73 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
template <typename Key>
struct MurmurHash3_32 {
using result_type = hash_value_type;

Expand Down Expand Up @@ -115,7 +114,8 @@ struct MurmurHash3_32 {
}

// TODO Do we need this operator() and/or compute? Probably not both.
[[nodiscard]] result_type __device__ inline operator()(Key const& key) const
template <typename T>
[[nodiscard]] result_type __device__ inline operator()(T const& key) const
{
return compute(key);
}
Expand Down Expand Up @@ -187,82 +187,72 @@ struct MurmurHash3_32 {
};

template <>
hash_value_type __device__ inline MurmurHash3_32<bool>::operator()(bool const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const
{
return this->compute(static_cast<uint8_t>(key));
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline MurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32<float>::operator()(float const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline MurmurHash3_32<double>::operator()(double const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::string_view const& key) const
{
return this->compute_floating_point(key);
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal32 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal64 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal128 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::list_view const& key) const
{
cudf_assert(false && "List column hashing is not supported");
return 0;
}

template <>
hash_value_type __device__ inline MurmurHash3_32<cudf::struct_view>::operator()(
cudf::struct_view const& key) const
hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::struct_view const& key) const
{
cudf_assert(false && "Direct hashing of struct_view is not supported");
return 0;
}

template <typename Key>
struct SparkMurmurHash3_32 {
using result_type = hash_value_type;

SparkMurmurHash3_32() = default;
constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {}

__device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
[[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const
{
return (x << r) | (x >> (32 - r));
return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r))
}

__device__ inline uint32_t fmix32(uint32_t h) const
Expand All @@ -275,7 +265,11 @@ struct SparkMurmurHash3_32 {
return h;
}

result_type __device__ inline operator()(Key const& key) const { return compute(key); }
template <typename T>
result_type __device__ inline operator()(T const& key) const
{
return compute(key);
}

// compute wrapper for floating point types
template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
Expand Down Expand Up @@ -352,52 +346,72 @@ struct SparkMurmurHash3_32 {
};

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<bool>::operator()(bool const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<int8_t>::operator()(int8_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<uint8_t>::operator()(uint8_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<int16_t>::operator()(int16_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<uint16_t>::operator()(
uint16_t const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint16_t const& key) const
{
return this->compute<uint32_t>(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal32>::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
numeric::decimal32 const& key) const
{
return this->compute<uint64_t>(key.value());
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal64>::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
numeric::decimal64 const& key) const
{
return this->compute<uint64_t>(key.value());
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
numeric::decimal128 const& key) const
{
// Generates the Spark MurmurHash3 hash value, mimicking the conversion:
Expand Down Expand Up @@ -439,66 +453,38 @@ hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::oper
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::list_view const& key) const
{
cudf_assert(false && "List column hashing is not supported");
return 0;
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::struct_view>::operator()(
hash_value_type __device__ inline SparkMurmurHash3_32::operator()(
cudf::struct_view const& key) const
{
cudf_assert(false && "Direct hashing of struct_view is not supported");
return 0;
}

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template <>
hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return this->compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(float const& key) const
{
return this->compute_floating_point(key);
}

template <>
hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(double const& key) const
{
return this->compute_floating_point(key);
}

/**
* @brief This hash function simply returns the value that is asked to be hash
* reinterpreted as the result_type of the functor.
*/
template <typename Key>
struct IdentityHash {
using result_type = hash_value_type;
IdentityHash() = default;
constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}

template <typename return_type = result_type>
constexpr std::enable_if_t<!std::is_arithmetic_v<Key>, return_type> operator()(
Key const& key) const
template <typename T, std::enable_if_t<!std::is_arithmetic_v<T>>* = nullptr>
constexpr result_type operator()(T const& key) const
{
cudf_assert(false && "IdentityHash does not support this data type");
return 0;
}

template <typename return_type = result_type>
constexpr std::enable_if_t<std::is_arithmetic_v<Key>, return_type> operator()(
Key const& key) const
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
constexpr result_type operator()(T const& key) const
{
return static_cast<result_type>(key);
}
Expand All @@ -507,5 +493,4 @@ struct IdentityHash {
uint32_t m_seed{cudf::DEFAULT_HASH_SEED};
};

template <typename Key>
using default_hash = MurmurHash3_32<Key>;
using default_hash = MurmurHash3_32;
10 changes: 5 additions & 5 deletions cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -430,14 +430,14 @@ class row_lexicographic_comparator {
* @tparam hash_function Hash functor to use for hashing elements.
* @tparam Nullate A cudf::nullate type describing how to check for nulls.
*/
template <template <typename> class hash_function, typename Nullate>
template <typename hash_function, typename Nullate>
class element_hasher {
public:
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
__device__ hash_value_type operator()(column_device_view col, size_type row_index) const
{
if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
return hash_function<T>{}(col.element<T>(row_index));
return hash_function{}(col.element<T>(row_index));
}

template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
Expand All @@ -450,7 +450,7 @@ class element_hasher {
Nullate has_nulls;
};

template <template <typename> class hash_function, typename Nullate>
template <typename hash_function, typename Nullate>
class element_hasher_with_seed {
public:
__device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Expand All @@ -467,7 +467,7 @@ class element_hasher_with_seed {
__device__ hash_value_type operator()(column_device_view col, size_type row_index) const
{
if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
return hash_function<T>{_seed}(col.element<T>(row_index));
return hash_function{_seed}(col.element<T>(row_index));
}

template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
Expand All @@ -489,7 +489,7 @@ class element_hasher_with_seed {
* @tparam hash_function Hash functor to use for hashing elements.
* @tparam Nullate A cudf::nullate type describing how to check for nulls.
*/
template <template <typename> class hash_function, typename Nullate>
template <typename hash_function, typename Nullate>
class row_hasher {
public:
row_hasher() = delete;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/hash/concurrent_unordered_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
*/
template <typename Key,
typename Element,
typename Hasher = default_hash<Key>,
typename Hasher = default_hash,
typename Equality = equal_to<Key>,
typename Allocator = default_allocator<thrust::pair<Key, Element>>>
class concurrent_unordered_map {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/hash/concurrent_unordered_multimap.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION.
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,7 @@ template <typename Key,
typename size_type,
Key unused_key,
Element unused_element,
typename Hasher = default_hash<Key>,
typename Hasher = default_hash,
typename Equality = equal_to<Key>,
typename Allocator = managed_allocator<thrust::pair<Key, Element>>,
bool count_collisions = false>
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/hash/hashing.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ std::vector<column_view> to_leaf_columns(IterType iter_begin, IterType iter_end)

} // namespace

template <template <typename> class hash_function>
template <typename hash_function>
std::unique_ptr<column> serial_murmur_hash3_32(table_view const& input,
uint32_t seed,
rmm::cuda_stream_view stream,
Expand Down
Loading

0 comments on commit 466243b

Please sign in to comment.