Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XXHash_64 hash function to cudf #13612

Merged
merged 48 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
05868c9
Add XXHash_64 hash function to cudf
davidwendt Jun 23, 2023
a4dd39b
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 23, 2023
f39bba0
fix typo in constant
davidwendt Jun 26, 2023
53603fc
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 26, 2023
f9436d3
fix rotate function
davidwendt Jun 27, 2023
f318e72
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 27, 2023
de00dc4
add string test; convert primes to hex; fix getblock logic
davidwendt Jun 27, 2023
660357e
fully-qualify calls to detail functions
davidwendt Jun 28, 2023
25202b6
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 29, 2023
a5a0d4b
fix bug in xxhash finalize step
davidwendt Jun 29, 2023
c9c0624
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 29, 2023
df5b8db
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 29, 2023
e4abab3
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jun 30, 2023
aaafd8e
add gtests for integer, double, fixed-point
davidwendt Jul 1, 2023
ac16d14
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 1, 2023
e3b6839
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 5, 2023
9dc57e4
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 6, 2023
3a5dd7d
fix merge conflicts
davidwendt Jul 10, 2023
3b4dbb8
rename hash64 to xxhash64
davidwendt Jul 10, 2023
41c64b4
local conflict fix
davidwendt Jul 10, 2023
7d09f76
resolve final conflict
davidwendt Jul 10, 2023
c55175e
fix merge conflicts
davidwendt Jul 11, 2023
1ce895d
fix cmake format style violation
davidwendt Jul 11, 2023
65b6bad
forgot to include the style fix
davidwendt Jul 11, 2023
a541a3e
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 12, 2023
9e2ab2c
undo unintentional unchanges
davidwendt Jul 12, 2023
6c5176b
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 12, 2023
cdac656
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 12, 2023
f4f98e0
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 13, 2023
fd7bbd4
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 13, 2023
2e89f98
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 14, 2023
cd4acbb
fix merge conflicts
davidwendt Jul 16, 2023
5fc46b7
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 17, 2023
a586b5b
change names of the gtests
davidwendt Jul 17, 2023
2585657
fix hash_functions.cuh references
davidwendt Jul 17, 2023
6ecf2a5
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 17, 2023
e1e7b8d
rename test source file
davidwendt Jul 17, 2023
9a64914
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 17, 2023
0624314
Merge branch 'fea-xxhash64' of github.com:davidwendt/cudf into fea-xx…
davidwendt Jul 17, 2023
cdec016
rename xxhash64 to xxhash_64
davidwendt Jul 17, 2023
b8f7b72
Merge branch 'fea-xxhash64' of github.com:davidwendt/cudf into fea-xx…
davidwendt Jul 18, 2023
3586abe
fix merge conflicts
davidwendt Jul 18, 2023
74cda04
fix cmake style violation
davidwendt Jul 18, 2023
e2b9197
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 18, 2023
d47d624
add some const decls
davidwendt Jul 18, 2023
606b736
fix doxygen wording for the hash APIs
davidwendt Jul 18, 2023
a5e3838
use device-span
davidwendt Jul 19, 2023
f1f39e9
Merge branch 'branch-23.08' into fea-xxhash64
davidwendt Jul 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ add_library(
src/hash/murmurhash3_x86_32.cu
src/hash/murmurhash3_x64_128.cu
src/hash/spark_murmurhash3_x86_32.cu
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/to_arrow.cu
Expand Down
19 changes: 19 additions & 0 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ std::unique_ptr<column> hash(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

//! Hash APIs
namespace hashing {

/**
Expand Down Expand Up @@ -144,6 +145,24 @@ std::unique_ptr<column> md5(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the hash value of each row in the input set of columns
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
*
* This function takes a 64-bit seed value and returns a column of type UINT64.
*
* @param input The table of columns to hash
* @param seed Optional seed value to use for the hash function
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*
* @returns A column where each row is the hash of a row from the input
*/
std::unique_ptr<column> xxhash_64(
table_view const& input,
uint64_t seed = DEFAULT_HASH_SEED,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace hashing

/** @} */ // end of group
Expand Down
5 changes: 5 additions & 0 deletions cpp/include/cudf/hashing/detail/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ std::unique_ptr<column> md5(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

std::unique_ptr<column> xxhash_64(table_view const& input,
uint64_t seed,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource* mr);

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
Expand Down
336 changes: 336 additions & 0 deletions cpp/src/hash/xxhash_64.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/hashing/detail/hashing.hpp>
#include <cudf/table/table_device_view.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/tabulate.h>

namespace cudf {
namespace hashing {
namespace detail {

namespace {

using hash_value_type = uint64_t;

template <typename Key>
struct XXHash_64 {
using result_type = hash_value_type;
davidwendt marked this conversation as resolved.
Show resolved Hide resolved

constexpr XXHash_64() = default;
constexpr XXHash_64(hash_value_type seed) : m_seed(seed) {}

__device__ inline uint32_t getblock32(std::byte const* data, cudf::size_type offset) const
{
// Read a 4-byte value from the data pointer as individual bytes for safe
// unaligned access (very likely for string types).
auto block = reinterpret_cast<uint8_t const*>(data + offset);
return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24);
Comment on lines +48 to +49

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will always emit 4x pipelined LDG.E.U8. I wonder if we should add an extra path that performs a single LDG.E.32 in case the pointer is aligned correctly.
Dumb question: When can the start of a string be not aligned to 4 bytes?

Instead of loading and shifting the result, a common pattern is to use a memcpy for this:

uint32_t ret;
memcpy(&ret, block, sizeof(uint32_t));
return ret;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A string is almost never aligned to 4 bytes. A string is rarely allocated individually but usually part of a larger contiguous block of memory.
The plan is to move these block functions into a separate utilities header where I think we could optimize based on type.
Reference #13706

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A string is rarely allocated individually but usually part of a larger contiguous block of memory.

Good point! Let's leave this as-is for now then.

}

__device__ inline uint64_t getblock64(std::byte const* data, cudf::size_type offset) const
{
uint64_t result = getblock32(data, offset + 4);
result = result << 32;
return result | getblock32(data, offset);
}

result_type __device__ inline operator()(Key const& key) const { return compute(key); }

template <typename T>
result_type __device__ inline compute(T const& key) const
{
return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(T));
}

result_type __device__ inline compute_remaining_bytes(std::byte const* data,
cudf::size_type nbytes,
cudf::size_type offset,
result_type h64) const
{
// remaining data can be processed in 8-byte chunks
if ((nbytes % 32) >= 8) {
for (; offset <= nbytes - 8; offset += 8) {
uint64_t k1 = getblock64(data, offset) * prime2;

k1 = rotate_bits_left(k1, 31) * prime1;
h64 ^= k1;
h64 = rotate_bits_left(h64, 27) * prime1 + prime4;
}
}

// remaining data can be processed in 4-byte chunks
if ((nbytes % 8) >= 4) {
for (; offset <= nbytes - 4; offset += 4) {
h64 ^= (getblock32(data, offset) & 0xfffffffful) * prime1;
h64 = rotate_bits_left(h64, 23) * prime2 + prime3;
}
}

// and the rest
if (nbytes % 4) {
while (offset < nbytes) {
h64 ^= (static_cast<uint8_t>(data[offset]) & 0xff) * prime5;
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
h64 = rotate_bits_left(h64, 11) * prime1;
++offset;
}
}
return h64;
}

result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const nbytes) const
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
{
uint64_t offset = 0;
uint64_t h64;
// data can be processed in 32-byte chunks
if (nbytes >= 32) {
auto limit = nbytes - 32;
uint64_t v1 = m_seed + prime1 + prime2;
uint64_t v2 = m_seed + prime2;
uint64_t v3 = m_seed;
uint64_t v4 = m_seed - prime1;

do {
// pipeline 4*8byte computations
v1 += getblock64(data, offset) * prime2;
v1 = rotate_bits_left(v1, 31);
v1 *= prime1;
offset += 8;
v2 += getblock64(data, offset) * prime2;
v2 = rotate_bits_left(v2, 31);
v2 *= prime1;
offset += 8;
v3 += getblock64(data, offset) * prime2;
v3 = rotate_bits_left(v3, 31);
v3 *= prime1;
offset += 8;
v4 += getblock64(data, offset) * prime2;
v4 = rotate_bits_left(v4, 31);
v4 *= prime1;
offset += 8;
} while (offset <= limit);

h64 = rotate_bits_left(v1, 1) + rotate_bits_left(v2, 7) + rotate_bits_left(v3, 12) +
rotate_bits_left(v4, 18);

v1 *= prime2;
v1 = rotate_bits_left(v1, 31);
v1 *= prime1;
h64 ^= v1;
h64 = h64 * prime1 + prime4;

v2 *= prime2;
v2 = rotate_bits_left(v2, 31);
v2 *= prime1;
h64 ^= v2;
h64 = h64 * prime1 + prime4;

v3 *= prime2;
v3 = rotate_bits_left(v3, 31);
v3 *= prime1;
h64 ^= v3;
h64 = h64 * prime1 + prime4;

v4 *= prime2;
v4 = rotate_bits_left(v4, 31);
v4 *= prime1;
h64 ^= v4;
h64 = h64 * prime1 + prime4;
} else {
h64 = m_seed + prime5;
}

h64 += nbytes;

h64 = compute_remaining_bytes(data, nbytes, offset, h64);

return finalize(h64);
}

constexpr __host__ __device__ std::uint64_t finalize(std::uint64_t h) const noexcept
{
h ^= h >> 33;
h *= prime2;
h ^= h >> 29;
h *= prime3;
h ^= h >> 32;
return h;
}

private:
hash_value_type m_seed{};
static constexpr uint64_t prime1 = 0x9e3779b185ebca87ul;
static constexpr uint64_t prime2 = 0xc2b2ae3d27d4eb4ful;
static constexpr uint64_t prime3 = 0x165667b19e3779f9ul;
static constexpr uint64_t prime4 = 0x85ebca77c2b2ae63ul;
static constexpr uint64_t prime5 = 0x27d4eb2f165667c5ul;
};

template <>
hash_value_type __device__ inline XXHash_64<bool>::operator()(bool const& key) const
{
return compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline XXHash_64<float>::operator()(float const& key) const
{
return compute(normalize_nans(key));
}

template <>
hash_value_type __device__ inline XXHash_64<double>::operator()(double const& key) const
{
return compute(normalize_nans(key));
}

template <>
hash_value_type __device__ inline XXHash_64<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
auto const data = reinterpret_cast<std::byte const*>(key.data());
auto const len = key.size_bytes();
return compute_bytes(data, len);
}

template <>
hash_value_type __device__ inline XXHash_64<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return compute(key.value());
}

template <>
hash_value_type __device__ inline XXHash_64<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return compute(key.value());
}

template <>
hash_value_type __device__ inline XXHash_64<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return compute(key.value());
}

/**
* @brief Computes the hash value of a row in the given table.
*
* @tparam Nullate A cudf::nullate type describing whether to check for nulls.
*/
template <typename Nullate>
class device_row_hasher {
public:
device_row_hasher(Nullate nulls, table_device_view const& t, hash_value_type seed)
: _check_nulls(nulls), _table(t), _seed(seed)
{
}

__device__ auto operator()(size_type row_index) const noexcept
{
return cudf::detail::accumulate(
_table.begin(),
_table.end(),
_seed,
[row_index, nulls = _check_nulls] __device__(auto hash, auto column) {
return cudf::type_dispatcher(
column.type(), element_hasher_adapter{}, column, row_index, nulls, hash);
});
}

/**
* @brief Computes the hash value of an element in the given column.
*/
class element_hasher_adapter {
public:
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
__device__ hash_value_type operator()(column_device_view const& col,
size_type row_index,
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
Nullate const _check_nulls,
hash_value_type const _seed) const noexcept
{
if (_check_nulls && col.is_null(row_index)) {
return std::numeric_limits<hash_value_type>::max();
}
auto const hasher = XXHash_64<T>{_seed};
return hasher(col.element<T>(row_index));
}

template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
__device__ hash_value_type operator()(column_device_view const&,
size_type,
Nullate const,
hash_value_type const) const noexcept
{
CUDF_UNREACHABLE("Unsupported type for XXHash_64");
}
};

Nullate const _check_nulls;
table_device_view const _table;
hash_value_type const _seed;
};

} // namespace

std::unique_ptr<column> xxhash_64(table_view const& input,
uint64_t seed,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto output = make_numeric_column(data_type(type_to_id<hash_value_type>()),
input.num_rows(),
mask_state::UNALLOCATED,
stream,
mr);

// Return early if there's nothing to hash
if (input.num_columns() == 0 || input.num_rows() == 0) { return output; }

bool const nullable = has_nulls(input);
auto const input_view = table_device_view::create(input, stream);
auto output_view = output->mutable_view();

// Compute the hash value for each row
thrust::tabulate(rmm::exec_policy(stream),
output_view.begin<hash_value_type>(),
output_view.end<hash_value_type>(),
device_row_hasher(nullable, *input_view, seed));

return output;
}

} // namespace detail

std::unique_ptr<column> xxhash_64(table_view const& input,
uint64_t seed,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::xxhash_64(input, seed, stream, mr);
}

} // namespace hashing
} // namespace cudf
Loading