From 600b872f3dc5ea9d072273f9eb89acaf9fdc476c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 8 Mar 2022 14:07:01 -0600 Subject: [PATCH] Refactor hash function templates and `hash_combine` (#10379) This PR refactors a few pieces of libcudf's hash functions: - Define the utility function `hash_combine` only once (with 32/64 bit overloads), rather than several times in the codebase - ~Remove class template parameter from `MurmurHash3_32` and related classes. This template parameter was redundant. We already use a template for the argument of the `compute` method, which is called by `operator()`, so I put the template parameter on `operator()` instead of the whole class. I think this removal of the template parameter could be considered API-breaking so I added the `breaking` label.~ I retracted this change after conversation with @jrhemstad. I'll look into a different way to do this soon, using a dispatch-to-invoke approach as in #8217. This addresses part of issue #10081. I have a few more things I'd like to try, but this felt like a nicely-scoped PR so I stopped here for the moment. I benchmarked the code before and after making these changes and saw a small but consistent decrease in runtime. The benchmarks in `HashBenchmark/{HASH_MURMUR3,HASH_SERIAL_MURMUR3,HASH_SPARK_MURMUR3}_{nulls,no_nulls}/*` all decreased or saw no change in runtime, with a geometric mean of 2.87% less time. The benchmarks in `Hashing/hash_partition/*` all decreased or saw no change in runtime, with a geometric mean of 2.37% less time. For both sets of benchmarks, the largest data sizes saw more significant decreases in runtime, with a best-improvement of 7.38% less time in `HashBenchmark/HASH_MURMUR3_nulls/16777216` (similar for other large data sizes) and a best-improvement of 10.54% less time in `Hashing/hash_partition/1048576/256/64` (similar for other large data sizes). Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Conor Hoekstra (https://github.com/codereport) URL: https://github.com/rapidsai/cudf/pull/10379 --- cpp/include/cudf/detail/hashing.hpp | 35 ++++- .../cudf/detail/utilities/hash_functions.cuh | 124 +++++------------- cpp/include/cudf/table/row_operators.cuh | 23 ++-- 3 files changed, 72 insertions(+), 110 deletions(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 0fc807593fb..5854654b436 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,20 +61,41 @@ std::unique_ptr serial_murmur_hash3_32( * http://www.boost.org/LICENSE_1_0.txt) */ /** - * @brief Combines two hashed values into a single hashed value. + * @brief Combines two hash values into a single hash value. * - * Adapted from Boost hash_combine function, modified for 64-bit + * Taken from the Boost hash_combine function. * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html * - * @param lhs The first hashed value - * @param rhs The second hashed value + * @param lhs The first hash value + * @param rhs The second hash value + * @return Combined hash value + */ +constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs) +{ + return lhs ^ (rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2)); +} + +/* Copyright 2005-2014 Daniel James. + * + * Use, modification and distribution is subject to the Boost Software + * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +/** + * @brief Combines two hash values into a single hash value. + * + * Adapted from Boost hash_combine function and modified for 64-bit. + * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + * + * @param lhs The first hash value + * @param rhs The second hash value * @return Combined hash value */ constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { - lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2); - return lhs; + return lhs ^ (rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2)); } + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 6cf1acd2f5a..7eefdc90f4b 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -90,9 +90,9 @@ struct MurmurHash3_32 { MurmurHash3_32() = default; constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {} - [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const + [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const { - return (x << r) | (x >> (32 - r)); + return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r)) } [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const @@ -114,32 +114,6 @@ struct MurmurHash3_32 { return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); } - /* Copyright 2005-2014 Daniel James. - * - * Use, modification and distribution is subject to the Boost Software - * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - /** - * @brief Combines two hash values into a new single hash value. Called - * repeatedly to create a hash value from several variables. - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - * - * @param lhs The first hash value to combine - * @param rhs The second hash value to combine - * - * @returns A hash value that intelligently combines the lhs and rhs hash values - */ - constexpr result_type hash_combine(result_type lhs, result_type rhs) const - { - result_type combined{lhs}; - - combined ^= rhs + 0x9e3779b9 + (combined << 6) + (combined >> 2); - - return combined; - } - // TODO Do we need this operator() and/or compute? Probably not both. [[nodiscard]] result_type __device__ inline operator()(Key const& key) const { @@ -218,28 +192,25 @@ hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& k return this->compute(static_cast(key)); } -/** - * @brief Specialization of MurmurHash3_32 operator for strings. - */ template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - cudf::string_view const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const { - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return this->compute_bytes(data, len); + return this->compute_floating_point(key); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const { return this->compute_floating_point(key); } template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::string_view const& key) const { - return this->compute_floating_point(key); + auto const data = reinterpret_cast(key.data()); + auto const len = key.size_bytes(); + return this->compute_bytes(data, len); } template <> @@ -286,9 +257,9 @@ struct SparkMurmurHash3_32 { SparkMurmurHash3_32() = default; constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} - __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const + [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, uint32_t r) const { - return (x << r) | (x >> (32 - r)); + return __funnelshift_l(x, x, r); // Equivalent to (x << r) | (x >> (32 - r)) } __device__ inline uint32_t fmix32(uint32_t h) const @@ -408,6 +379,27 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operator()( return this->compute(key); } +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const +{ + return this->compute_floating_point(key); +} + +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const +{ + return this->compute_floating_point(key); +} + +template <> +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::string_view const& key) const +{ + auto const data = reinterpret_cast(key.data()); + auto const len = key.size_bytes(); + return this->compute_bytes(data, len); +} + template <> hash_value_type __device__ inline SparkMurmurHash3_32::operator()( numeric::decimal32 const& key) const @@ -480,30 +472,6 @@ hash_value_type __device__ inline SparkMurmurHash3_32::operat return 0; } -/** - * @brief Specialization of MurmurHash3_32 operator for strings. - */ -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()( - cudf::string_view const& key) const -{ - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return this->compute_bytes(data, len); -} - -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const -{ - return this->compute_floating_point(key); -} - -template <> -hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const -{ - return this->compute_floating_point(key); -} - /** * @brief This hash function simply returns the value that is asked to be hash * reinterpreted as the result_type of the functor. @@ -514,32 +482,6 @@ struct IdentityHash { IdentityHash() = default; constexpr IdentityHash(uint32_t seed) : m_seed(seed) {} - /* Copyright 2005-2014 Daniel James. - * - * Use, modification and distribution is subject to the Boost Software - * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - /** - * @brief Combines two hash values into a new single hash value. Called - * repeatedly to create a hash value from several variables. - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - * - * @param lhs The first hash value to combine - * @param rhs The second hash value to combine - * - * @returns A hash value that intelligently combines the lhs and rhs hash values - */ - constexpr result_type hash_combine(result_type lhs, result_type rhs) const - { - result_type combined{lhs}; - - combined ^= rhs + 0x9e3779b9 + (combined << 6) + (combined >> 2); - - return combined; - } - template constexpr std::enable_if_t, return_type> operator()( Key const& key) const diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 5572c98fa58..6ff4833cb14 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -503,18 +504,14 @@ class row_hasher { __device__ auto operator()(size_type row_index) const { - auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) { - return hash_function{}.hash_combine(lhs, rhs); - }; - // Hash the first column w/ the seed - auto const initial_hash = - hash_combiner(hash_value_type{0}, - type_dispatcher( - _table.column(0).type(), - element_hasher_with_seed{_has_nulls, _seed}, - _table.column(0), - row_index)); + auto const initial_hash = cudf::detail::hash_combine( + hash_value_type{0}, + type_dispatcher( + _table.column(0).type(), + element_hasher_with_seed{_has_nulls, _seed}, + _table.column(0), + row_index)); // Hashes an element in a column auto hasher = [=](size_type column_index) { @@ -533,7 +530,9 @@ class row_hasher { thrust::make_counting_iterator(_table.num_columns()), hasher, initial_hash, - hash_combiner); + [](hash_value_type lhs, hash_value_type rhs) { + return cudf::detail::hash_combine(lhs, rhs); + }); } private: