From 8860baf33fb8f24f08380cda39027a88f0b9b1e0 Mon Sep 17 00:00:00 2001 From: rwlee Date: Mon, 4 Jan 2021 13:22:25 -0800 Subject: [PATCH 01/20] Spark Murmur3 hash functionality(#7024) Resolves #6863 Expands existing murmur3 hashing functionality to match Spark's murmur3 hashing algorithm by modifying tail processing for unaligned bytes and processing booleans as 32bit integers rather than singular bytes. Authors: - Ryan Lee - rwlee Approvers: - Jake Hemstad - null - Robert (Bobby) Evans - GALI PREM SAGAR URL: https://github.com/rapidsai/cudf/pull/7024 --- cpp/include/cudf/detail/hashing.hpp | 1 + .../cudf/detail/utilities/hash_functions.cuh | 234 ++++++++++++++---- cpp/include/cudf/types.hpp | 9 +- cpp/src/hash/hashing.cu | 10 +- cpp/tests/hashing/hash_test.cpp | 65 ++++- .../java/ai/rapids/cudf/ColumnVector.java | 47 +++- .../main/java/ai/rapids/cudf/HashType.java | 3 +- .../java/ai/rapids/cudf/ColumnVectorTest.java | 137 +++++++--- python/cudf/cudf/_lib/cpp/types.pxd | 1 + 9 files changed, 419 insertions(+), 88 deletions(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index 18f97f00f21..06f523c2320 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -46,6 +46,7 @@ std::unique_ptr md5_hash( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +template