diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index e79107e32cf..7f3c05134e2 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include using hash_value_type = uint32_t; @@ -231,6 +232,9 @@ MD5ListHasher::operator()(column_device_view data_col, } struct MD5Hash { + MD5Hash() = default; + constexpr MD5Hash(uint32_t seed) : m_seed(seed) {} + void __device__ finalize(md5_intermediate_data* hash_state, char* result_location) const { auto const full_length = (static_cast(hash_state->message_length)) << 3; @@ -302,6 +306,9 @@ struct MD5Hash { { md5_process(col.element(row_index), hash_state); } + + private: + uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; template <> @@ -372,7 +379,7 @@ struct MurmurHash3_32 { using result_type = hash_value_type; MurmurHash3_32() = default; - CUDA_HOST_DEVICE_CALLABLE MurmurHash3_32(uint32_t seed) : m_seed(seed) {} + constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {} CUDA_DEVICE_CALLABLE uint32_t rotl32(uint32_t x, int8_t r) const { @@ -469,7 +476,7 @@ struct MurmurHash3_32 { } private: - uint32_t m_seed{0}; + uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; template <> @@ -564,7 +571,7 @@ struct SparkMurmurHash3_32 { using result_type = hash_value_type; SparkMurmurHash3_32() = default; - CUDA_HOST_DEVICE_CALLABLE SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} + constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} CUDA_DEVICE_CALLABLE uint32_t rotl32(uint32_t x, int8_t r) const { @@ -636,7 +643,7 @@ struct SparkMurmurHash3_32 { } private: - uint32_t m_seed{0}; + uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; template <> @@ -772,6 +779,8 @@ SparkMurmurHash3_32::operator()(double const& key) const template struct IdentityHash { using result_type = hash_value_type; + IdentityHash() = default; + constexpr IdentityHash(uint32_t seed) : m_seed(seed) {} /** * @brief Combines two hash values into a new single hash value. Called @@ -784,7 +793,7 @@ struct IdentityHash { * * @returns A hash value that intelligently combines the lhs and rhs hash values */ - CUDA_HOST_DEVICE_CALLABLE result_type hash_combine(result_type lhs, result_type rhs) const + constexpr result_type hash_combine(result_type lhs, result_type rhs) const { result_type combined{lhs}; @@ -794,19 +803,22 @@ struct IdentityHash { } template - CUDA_HOST_DEVICE_CALLABLE std::enable_if_t::value, return_type> - operator()(Key const& key) const + constexpr std::enable_if_t::value, return_type> operator()( + Key const& key) const { cudf_assert(false && "IdentityHash does not support this data type"); return 0; } template - CUDA_HOST_DEVICE_CALLABLE std::enable_if_t::value, return_type> - operator()(Key const& key) const + constexpr std::enable_if_t::value, return_type> operator()( + Key const& key) const { return static_cast(key); } + + private: + uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; }; template diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 3f95b8b417b..0fb5002a953 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -39,7 +39,7 @@ std::unique_ptr hash( table_view const& input, hash_id hash_function = hash_id::HASH_MURMUR3, std::vector const& initial_hash = {}, - uint32_t seed = 0, + uint32_t seed = DEFAULT_HASH_SEED, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index ddde26ec762..6b1ad7db08b 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -83,6 +83,9 @@ std::pair, std::vector> partition( * @param input The table to partition * @param columns_to_hash Indices of input columns to hash * @param num_partitions The number of partitions to use + * @param hash_function Optional hash id that chooses the hash function to use + * @param seed Optional seed value to the hash function + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory. * * @returns An output table and a vector of row offsets to each partition @@ -92,6 +95,7 @@ std::pair, std::vector> hash_partition( std::vector const& columns_to_hash, int num_partitions, hash_id hash_function = hash_id::HASH_MURMUR3, + uint32_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index decd2879f54..61d714c5538 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -428,6 +428,7 @@ template