diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 3faff257cdf..1a56410e846 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -83,6 +83,7 @@ void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destinatio // algorithms are optimized for their respective platforms. You can still // compile and run any of them on any platform, but your performance with the // non-native version will be less than optimal. +template struct MurmurHash3_32 { using result_type = hash_value_type; @@ -114,8 +115,7 @@ struct MurmurHash3_32 { } // TODO Do we need this operator() and/or compute? Probably not both. - template - [[nodiscard]] result_type __device__ inline operator()(T const& key) const + [[nodiscard]] result_type __device__ inline operator()(Key const& key) const { return compute(key); } @@ -187,72 +187,82 @@ struct MurmurHash3_32 { }; template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const { return this->compute(static_cast(key)); } +/** + * @brief Specialization of MurmurHash3_32 operator for strings. + */ template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::string_view const& key) const { - return this->compute_floating_point(key); + auto const data = reinterpret_cast(key.data()); + auto const len = key.size_bytes(); + return this->compute_bytes(data, len); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const { return this->compute_floating_point(key); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::string_view const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const { - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return this->compute_bytes(data, len); + return this->compute_floating_point(key); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal32 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal32 const& key) const { return this->compute(key.value()); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal64 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal64 const& key) const { return this->compute(key.value()); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(numeric::decimal128 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal128 const& key) const { return this->compute(key.value()); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::list_view const&) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::list_view const& key) const { cudf_assert(false && "List column hashing is not supported"); return 0; } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(cudf::struct_view const&) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::struct_view const& key) const { cudf_assert(false && "Direct hashing of struct_view is not supported"); return 0; } +template struct SparkMurmurHash3_32 { using result_type = hash_value_type; SparkMurmurHash3_32() = default; constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} - [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const + __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const { - return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r)) + return (x << r) | (x >> (32 - r)); } __device__ inline uint32_t fmix32(uint32_t h) const @@ -265,11 +275,7 @@ struct SparkMurmurHash3_32 { return h; } - template - result_type __device__ inline operator()(T const& key) const - { - return compute(key); - } + result_type __device__ inline operator()(Key const& key) const { return compute(key); } // compute wrapper for floating point types template >* = nullptr> @@ -346,72 +352,52 @@ struct SparkMurmurHash3_32 { }; template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const { return this->compute(key); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const { return this->compute(key); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const { return this->compute(key); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const { return this->compute(key); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint16_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + uint16_t const& key) const { return this->compute(key); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const -{ - return this->compute_floating_point(key); -} - -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const -{ - return this->compute_floating_point(key); -} - -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()( - cudf::string_view const& key) const -{ - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return this->compute_bytes(data, len); -} - -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()( +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal32 const& key) const { return this->compute(key.value()); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()( +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal64 const& key) const { return this->compute(key.value()); } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()( +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal128 const& key) const { // Generates the Spark MurmurHash3 hash value, mimicking the conversion: @@ -453,37 +439,66 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operator()( } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::list_view const&) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::list_view const& key) const { cudf_assert(false && "List column hashing is not supported"); return 0; } template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(cudf::struct_view const&) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::struct_view const& key) const { cudf_assert(false && "Direct hashing of struct_view is not supported"); return 0; } +/** + * @brief Specialization of MurmurHash3_32 operator for strings. + */ +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::string_view const& key) const +{ + auto const data = reinterpret_cast(key.data()); + auto const len = key.size_bytes(); + return this->compute_bytes(data, len); +} + +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const +{ + return this->compute_floating_point(key); +} + +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const +{ + return this->compute_floating_point(key); +} + /** * @brief This hash function simply returns the value that is asked to be hash * reinterpreted as the result_type of the functor. */ +template struct IdentityHash { using result_type = hash_value_type; IdentityHash() = default; constexpr IdentityHash(uint32_t seed) : m_seed(seed) {} - template >* = nullptr> - constexpr result_type operator()(T const& key) const + template + constexpr std::enable_if_t, return_type> operator()( + Key const& key) const { cudf_assert(false && "IdentityHash does not support this data type"); return 0; } - template >* = nullptr> - constexpr result_type operator()(T const& key) const + template + constexpr std::enable_if_t, return_type> operator()( + Key const& key) const { return static_cast(key); } @@ -492,4 +507,5 @@ struct IdentityHash { uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; -using default_hash = MurmurHash3_32; +template +using default_hash = MurmurHash3_32; diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 730078a2675..6ff4833cb14 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -430,14 +430,14 @@ class row_lexicographic_comparator { * @tparam hash_function Hash functor to use for hashing elements. * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template +template