From c99a37fb804282565204fd1544d145177620c0a3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 23 Dec 2021 17:40:31 -0600 Subject: [PATCH 01/15] Remove deprecated method Series.hash_encode. (#9942) This PR removes the deprecated method `Series.hash_encode`. Resolves #9475. Follows up on #9457, #9381. This PR also removes libcudf code paths used solely for this Python method. Users may replace code like `series.hash_encode(stop, use_name=False)` with `series.hash_values(method="murmur3") % stop`. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Conor Hoekstra (https://github.com/codereport) URL: https://github.com/rapidsai/cudf/pull/9942 --- cpp/include/cudf/detail/hashing.hpp | 14 ++--- cpp/include/cudf/hashing.hpp | 9 +-- cpp/include/cudf/table/row_operators.cuh | 48 --------------- cpp/src/hash/hashing.cu | 6 +- cpp/src/hash/murmur_hash.cu | 25 ++------ cpp/tests/hashing/hash_test.cpp | 50 +++++++-------- docs/cudf/source/api_docs/series.rst | 2 - python/cudf/cudf/_lib/cpp/hash.pxd | 3 +- python/cudf/cudf/_lib/hash.pyx | 4 +- python/cudf/cudf/core/frame.py | 4 +- python/cudf/cudf/core/series.py | 77 ------------------------ python/cudf/cudf/tests/test_dataframe.py | 38 ------------ 12 files changed, 46 insertions(+), 234 deletions(-) diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index bd5c8a42a51..0fc807593fb 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -32,17 +32,15 @@ namespace detail { */ std::unique_ptr hash( table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - cudf::host_span initial_hash = {}, - uint32_t seed = 0, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + hash_id hash_function = hash_id::HASH_MURMUR3, + uint32_t seed = 0, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr murmur_hash3_32( table_view const& input, - cudf::host_span initial_hash = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr md5_hash( table_view const& input, diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 6b281c3f7f4..cce05042917 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -31,8 +31,6 @@ namespace cudf { * * @param input The table of columns to hash. * @param hash_function The hash function enum to use. - * @param initial_hash Optional host_span of initial hash values for each column. - * If this span is empty then each element will be hashed as-is. * @param seed Optional seed value to use for the hash function. * @param mr Device memory resource used to allocate the returned column's device memory. * @@ -40,10 +38,9 @@ namespace cudf { */ std::unique_ptr hash( table_view const& input, - hash_id hash_function = hash_id::HASH_MURMUR3, - cudf::host_span initial_hash = {}, - uint32_t seed = DEFAULT_HASH_SEED, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + hash_id hash_function = hash_id::HASH_MURMUR3, + uint32_t seed = DEFAULT_HASH_SEED, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 0f3ca073380..32ddd1ef49a 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -539,52 +539,4 @@ class row_hasher { uint32_t _seed{DEFAULT_HASH_SEED}; }; -/** - * @brief Computes the hash value of a row in the given table, combined with an - * initial hash value for each column. - * - * @tparam hash_function Hash functor to use for hashing elements. - * @tparam Nullate A cudf::nullate type describing how to check for nulls. - */ -template