From 541c5bff2bc0e6f9cd055b9fdd0d82cfbae72805 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 19 Jul 2023 12:10:22 -0400 Subject: [PATCH] Add XXHash_64 hash function to cudf (#13612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add XXHash_64 hash function to libcudf ``` std::unique_ptr xxhash_64( table_view const& input, uint64_t seed, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) - Daniel Jünger (https://github.com/sleeepyjack) URL: https://github.com/rapidsai/cudf/pull/13612 --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/hashing.hpp | 27 +- cpp/include/cudf/hashing/detail/hashing.hpp | 5 + cpp/src/hash/xxhash_64.cu | 337 ++++++++++++++++++++ cpp/tests/CMakeLists.txt | 8 +- cpp/tests/hashing/xxhash_64_test.cpp | 177 ++++++++++ 6 files changed, 549 insertions(+), 6 deletions(-) create mode 100644 cpp/src/hash/xxhash_64.cu create mode 100644 cpp/tests/hashing/xxhash_64_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1f63650d3a0..61662668d45 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -347,6 +347,7 @@ add_library( src/hash/murmurhash3_x86_32.cu src/hash/murmurhash3_x64_128.cu src/hash/spark_murmurhash3_x86_32.cu + src/hash/xxhash_64.cu src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/to_arrow.cu diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 83a0432a182..72e32715ed4 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -70,10 +70,11 @@ std::unique_ptr hash( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +//! Hash APIs namespace hashing { /** - * @brief Computes the MurmurHash3 32-bit of each row in the given table + * @brief Computes the MurmurHash3 32-bit hash value of each row in the given table * * This function computes the hash of each column using the `seed` for the first column * and the resulting hash as a seed for the next column and so on. @@ -93,7 +94,7 @@ std::unique_ptr murmurhash3_x86_32( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Computes the hash values of each row in the input set of columns + * @brief Computes the MurmurHash3 64-bit hash value of each row in the given table * * This function takes a 64-bit seed value and returns hash values using the * MurmurHash3_x64_128 algorithm. The hash produces in two uint64 values per row. @@ -112,7 +113,7 @@ std::unique_ptr murmurhash3_x64_128( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Computes the MurmurHash3 32-bit of each row in the given table + * @brief Computes the MurmurHash3 32-bit hash value of each row in the given table * * This function computes the hash similar to MurmurHash3_x86_32 with special processing * to match Spark's implementation results. @@ -131,7 +132,7 @@ std::unique_ptr spark_murmurhash3_x86_32( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Computes the MD5 hash of each row in the given table + * @brief Computes the MD5 hash value of each row in the given table * * @param input The table of columns to hash * @param stream CUDA stream used for device memory operations and kernel launches @@ -144,6 +145,24 @@ std::unique_ptr md5( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Computes the XXHash_64 hash value of each row in the given table + * + * This function takes a 64-bit seed value and returns a column of type UINT64. + * + * @param input The table of columns to hash + * @param seed Optional seed value to use for the hash function + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * + * @returns A column where each row is the hash of a row from the input + */ +std::unique_ptr xxhash_64( + table_view const& input, + uint64_t seed = DEFAULT_HASH_SEED, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace hashing /** @} */ // end of group diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp index c8ed9ac2a0d..f08d0fbb849 100644 --- a/cpp/include/cudf/hashing/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -46,6 +46,11 @@ std::unique_ptr md5(table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +std::unique_ptr xxhash_64(table_view const& input, + uint64_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); + /* Copyright 2005-2014 Daniel James. * * Use, modification and distribution is subject to the Boost Software diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu new file mode 100644 index 00000000000..e17bc134420 --- /dev/null +++ b/cpp/src/hash/xxhash_64.cu @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace hashing { +namespace detail { + +namespace { + +using hash_value_type = uint64_t; + +template +struct XXHash_64 { + using result_type = hash_value_type; + + constexpr XXHash_64() = default; + constexpr XXHash_64(hash_value_type seed) : m_seed(seed) {} + + __device__ inline uint32_t getblock32(std::byte const* data, std::size_t offset) const + { + // Read a 4-byte value from the data pointer as individual bytes for safe + // unaligned access (very likely for string types). + auto block = reinterpret_cast(data + offset); + return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); + } + + __device__ inline uint64_t getblock64(std::byte const* data, std::size_t offset) const + { + uint64_t result = getblock32(data, offset + 4); + result = result << 32; + return result | getblock32(data, offset); + } + + result_type __device__ inline operator()(Key const& key) const { return compute(key); } + + template + result_type __device__ inline compute(T const& key) const + { + auto data = device_span(reinterpret_cast(&key), sizeof(T)); + return compute_bytes(data); + } + + result_type __device__ inline compute_remaining_bytes(device_span& in, + std::size_t offset, + result_type h64) const + { + // remaining data can be processed in 8-byte chunks + if ((in.size() % 32) >= 8) { + for (; offset <= in.size() - 8; offset += 8) { + uint64_t k1 = getblock64(in.data(), offset) * prime2; + + k1 = rotate_bits_left(k1, 31) * prime1; + h64 ^= k1; + h64 = rotate_bits_left(h64, 27) * prime1 + prime4; + } + } + + // remaining data can be processed in 4-byte chunks + if ((in.size() % 8) >= 4) { + for (; offset <= in.size() - 4; offset += 4) { + h64 ^= (getblock32(in.data(), offset) & 0xfffffffful) * prime1; + h64 = rotate_bits_left(h64, 23) * prime2 + prime3; + } + } + + // and the rest + if (in.size() % 4) { + while (offset < in.size()) { + h64 ^= (std::to_integer(in[offset]) & 0xff) * prime5; + h64 = rotate_bits_left(h64, 11) * prime1; + ++offset; + } + } + return h64; + } + + result_type __device__ compute_bytes(device_span& in) const + { + uint64_t offset = 0; + uint64_t h64; + // data can be processed in 32-byte chunks + if (in.size() >= 32) { + auto limit = in.size() - 32; + uint64_t v1 = m_seed + prime1 + prime2; + uint64_t v2 = m_seed + prime2; + uint64_t v3 = m_seed; + uint64_t v4 = m_seed - prime1; + + do { + // pipeline 4*8byte computations + v1 += getblock64(in.data(), offset) * prime2; + v1 = rotate_bits_left(v1, 31); + v1 *= prime1; + offset += 8; + v2 += getblock64(in.data(), offset) * prime2; + v2 = rotate_bits_left(v2, 31); + v2 *= prime1; + offset += 8; + v3 += getblock64(in.data(), offset) * prime2; + v3 = rotate_bits_left(v3, 31); + v3 *= prime1; + offset += 8; + v4 += getblock64(in.data(), offset) * prime2; + v4 = rotate_bits_left(v4, 31); + v4 *= prime1; + offset += 8; + } while (offset <= limit); + + h64 = rotate_bits_left(v1, 1) + rotate_bits_left(v2, 7) + rotate_bits_left(v3, 12) + + rotate_bits_left(v4, 18); + + v1 *= prime2; + v1 = rotate_bits_left(v1, 31); + v1 *= prime1; + h64 ^= v1; + h64 = h64 * prime1 + prime4; + + v2 *= prime2; + v2 = rotate_bits_left(v2, 31); + v2 *= prime1; + h64 ^= v2; + h64 = h64 * prime1 + prime4; + + v3 *= prime2; + v3 = rotate_bits_left(v3, 31); + v3 *= prime1; + h64 ^= v3; + h64 = h64 * prime1 + prime4; + + v4 *= prime2; + v4 = rotate_bits_left(v4, 31); + v4 *= prime1; + h64 ^= v4; + h64 = h64 * prime1 + prime4; + } else { + h64 = m_seed + prime5; + } + + h64 += in.size(); + + h64 = compute_remaining_bytes(in, offset, h64); + + return finalize(h64); + } + + constexpr __host__ __device__ std::uint64_t finalize(std::uint64_t h) const noexcept + { + h ^= h >> 33; + h *= prime2; + h ^= h >> 29; + h *= prime3; + h ^= h >> 32; + return h; + } + + private: + hash_value_type m_seed{}; + static constexpr uint64_t prime1 = 0x9e3779b185ebca87ul; + static constexpr uint64_t prime2 = 0xc2b2ae3d27d4eb4ful; + static constexpr uint64_t prime3 = 0x165667b19e3779f9ul; + static constexpr uint64_t prime4 = 0x85ebca77c2b2ae63ul; + static constexpr uint64_t prime5 = 0x27d4eb2f165667c5ul; +}; + +template <> +hash_value_type __device__ inline XXHash_64::operator()(bool const& key) const +{ + return compute(static_cast(key)); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()(float const& key) const +{ + return compute(normalize_nans(key)); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()(double const& key) const +{ + return compute(normalize_nans(key)); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()( + cudf::string_view const& key) const +{ + auto const len = key.size_bytes(); + auto data = device_span(reinterpret_cast(key.data()), len); + return compute_bytes(data); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()( + numeric::decimal32 const& key) const +{ + return compute(key.value()); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()( + numeric::decimal64 const& key) const +{ + return compute(key.value()); +} + +template <> +hash_value_type __device__ inline XXHash_64::operator()( + numeric::decimal128 const& key) const +{ + return compute(key.value()); +} + +/** + * @brief Computes the hash value of a row in the given table. + * + * @tparam Nullate A cudf::nullate type describing whether to check for nulls. + */ +template +class device_row_hasher { + public: + device_row_hasher(Nullate nulls, table_device_view const& t, hash_value_type seed) + : _check_nulls(nulls), _table(t), _seed(seed) + { + } + + __device__ auto operator()(size_type row_index) const noexcept + { + return cudf::detail::accumulate( + _table.begin(), + _table.end(), + _seed, + [row_index, nulls = _check_nulls] __device__(auto hash, auto column) { + return cudf::type_dispatcher( + column.type(), element_hasher_adapter{}, column, row_index, nulls, hash); + }); + } + + /** + * @brief Computes the hash value of an element in the given column. + */ + class element_hasher_adapter { + public: + template ())> + __device__ hash_value_type operator()(column_device_view const& col, + size_type const row_index, + Nullate const _check_nulls, + hash_value_type const _seed) const noexcept + { + if (_check_nulls && col.is_null(row_index)) { + return std::numeric_limits::max(); + } + auto const hasher = XXHash_64{_seed}; + return hasher(col.element(row_index)); + } + + template ())> + __device__ hash_value_type operator()(column_device_view const&, + size_type const, + Nullate const, + hash_value_type const) const noexcept + { + CUDF_UNREACHABLE("Unsupported type for XXHash_64"); + } + }; + + Nullate const _check_nulls; + table_device_view const _table; + hash_value_type const _seed; +}; + +} // namespace + +std::unique_ptr xxhash_64(table_view const& input, + uint64_t seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto output = make_numeric_column(data_type(type_to_id()), + input.num_rows(), + mask_state::UNALLOCATED, + stream, + mr); + + // Return early if there's nothing to hash + if (input.num_columns() == 0 || input.num_rows() == 0) { return output; } + + bool const nullable = has_nulls(input); + auto const input_view = table_device_view::create(input, stream); + auto output_view = output->mutable_view(); + + // Compute the hash value for each row + thrust::tabulate(rmm::exec_policy(stream), + output_view.begin(), + output_view.end(), + device_row_hasher(nullable, *input_view, seed)); + + return output; +} + +} // namespace detail + +std::unique_ptr xxhash_64(table_view const& input, + uint64_t seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::xxhash_64(input, seed, stream, mr); +} + +} // namespace hashing +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 142d7790cf0..8a0942eec0d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -164,8 +164,12 @@ ConfigureTest(DATETIME_OPS_TEST datetime/datetime_ops_test.cpp) # ################################################################################################## # * hashing tests --------------------------------------------------------------------------------- ConfigureTest( - HASHING_TEST hashing/md5_test.cpp hashing/murmurhash3_x86_32_test.cpp - hashing/murmurhash3_x64_128_test.cpp hashing/spark_murmurhash3_x86_32_test.cpp + HASHING_TEST + hashing/md5_test.cpp + hashing/murmurhash3_x86_32_test.cpp + hashing/murmurhash3_x64_128_test.cpp + hashing/spark_murmurhash3_x86_32_test.cpp + hashing/xxhash_64_test.cpp ) # ################################################################################################## diff --git a/cpp/tests/hashing/xxhash_64_test.cpp b/cpp/tests/hashing/xxhash_64_test.cpp new file mode 100644 index 00000000000..5916c4c2fb9 --- /dev/null +++ b/cpp/tests/hashing/xxhash_64_test.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +using NumericTypesNoBools = + cudf::test::Concat; + +template +class XXHash_64_TestTyped : public cudf::test::BaseFixture {}; + +TYPED_TEST_SUITE(XXHash_64_TestTyped, NumericTypesNoBools); + +TYPED_TEST(XXHash_64_TestTyped, TestAllNumeric) +{ + using T = TypeParam; + auto col1 = cudf::test::fixed_width_column_wrapper{ + {-1, -1, 0, 2, 22, 0, 11, 12, 116, 32, 0, 42, 7, 62, 1, -22, 0, 0}, + {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}}; + auto col2 = cudf::test::fixed_width_column_wrapper{ + {-1, -1, 0, 2, 22, 1, 11, 12, 116, 32, 0, 42, 7, 62, 1, -22, 1, -22}, + {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}}; + + auto output1 = cudf::hashing::xxhash_64(cudf::table_view({col1})); + auto output2 = cudf::hashing::xxhash_64(cudf::table_view({col2})); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view()); + + constexpr uint64_t seed = 7; + + output1 = cudf::hashing::xxhash_64(cudf::table_view({col1}), seed); + output2 = cudf::hashing::xxhash_64(cudf::table_view({col2}), seed); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view()); +} + +class XXHash_64_Test : public cudf::test::BaseFixture {}; + +TEST_F(XXHash_64_Test, TestInteger) +{ + auto col1 = + cudf::test::fixed_width_column_wrapper{{-127, + -70000, + 0, + 200000, + 128, + std::numeric_limits::max(), + std::numeric_limits::min(), + std::numeric_limits::lowest()}}; + + auto const output = cudf::hashing::xxhash_64(cudf::table_view({col1})); + + // these were generated using the CPU compiled version of the cuco xxhash_64 source + // https://github.com/NVIDIA/cuCollections/blob/dev/include/cuco/detail/hash_functions/xxhash.cuh + auto expected = cudf::test::fixed_width_column_wrapper({4827426872506142937ul, + 13867166853951622683ul, + 4246796580750024372ul, + 17339819992360460003ul, + 7292178400482025765ul, + 2971168436322821236ul, + 9380524276503839603ul, + 9380524276503839603ul}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output->view(), expected); +} + +TEST_F(XXHash_64_Test, TestDouble) +{ + auto col1 = + cudf::test::fixed_width_column_wrapper{{-127., + -70000.125, + 0.0, + 200000.5, + 128.5, + -0.0, + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::max(), + std::numeric_limits::min(), + std::numeric_limits::lowest()}}; + + auto const output = cudf::hashing::xxhash_64(cudf::table_view({col1})); + + // these were generated using the CPU compiled version of the cuco xxhash_64 source + // https://github.com/NVIDIA/cuCollections/blob/dev/include/cuco/detail/hash_functions/xxhash.cuh + auto expected = cudf::test::fixed_width_column_wrapper({16892115221677838993ul, + 1686446903308179321ul, + 3803688792395291579ul, + 18250447068822614389ul, + 3511911086082166358ul, + 4558309869707674848ul, + 18031741628920313605ul, + 16838308782748609196ul, + 3127544388062992779ul, + 1692401401506680154ul, + 13770442912356326755ul}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output->view(), expected); +} + +TEST_F(XXHash_64_Test, StringType) +{ + // clang-format off + auto col1 = cudf::test::strings_column_wrapper( + {"The", + "quick", + "brown fox", + "jumps over the lazy dog.", + "I am Jack's complete lack of null value", + "A very long (greater than 128 bytes/characters) to test a very long string. " + "2nd half of the very long string to verify the long string hashing happening.", + "Some multi-byte characters here: ééé", + "ééé", + "ééé ééé", + "ééé ééé ééé ééé", + "", + "!@#$%^&*(())", + "0123456789", + "{}|:<>?,./;[]=-"}); + // clang-format on + + auto output = cudf::hashing::xxhash_64(cudf::table_view({col1})); + + // these were generated using the CPU compiled version of the cuco xxhash_64 source + // https://github.com/NVIDIA/cuCollections/blob/dev/include/cuco/detail/hash_functions/xxhash.cuh + // Also verified these with https://pypi.org/project/xxhash/ + // using xxhash.xxh64(bytes(s,'utf-8')).intdigest() + auto expected = cudf::test::fixed_width_column_wrapper({4686269239494003989ul, + 6715983472207430822ul, + 8148134898123095730ul, + 17291005374665645904ul, + 2631835514925512071ul, + 4181420602165187991ul, + 8749004388517322364ul, + 17701789113925815768ul, + 8612485687958712810ul, + 5148645515269989956ul, + 17241709254077376921ul, + 7379359170906687646ul, + 4566581271137380327ul, + 17962149534752128981ul}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output->view(), expected); +} + +TEST_F(XXHash_64_Test, TestFixedPoint) +{ + auto const col1 = cudf::test::fixed_point_column_wrapper( + {0, 100, -100, -999999999, 999999999}, numeric::scale_type{-3}); + auto const output = cudf::hashing::xxhash_64(cudf::table_view({col1})); + + // these were generated using the CPU compiled version of the cuco xxhash_64 source + // https://github.com/NVIDIA/cuCollections/blob/dev/include/cuco/detail/hash_functions/xxhash.cuh + // and passing the 'value' of each input (without the scale) as the decimal-type + auto expected = cudf::test::fixed_width_column_wrapper({4246796580750024372ul, + 5959467639951725378ul, + 4122185689695768261ul, + 3249245648192442585ul, + 8009575895491381648ul}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(output->view(), expected); +}