Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated hash() and spark_murmurhash3_x86_32() #15375

Merged
merged 17 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ add_library(
src/groupby/sort/group_replace_nulls.cu
src/groupby/sort/group_sum_scan.cu
src/groupby/sort/sort_helper.cu
src/hash/hashing.cu
src/hash/md5_hash.cu
src/hash/murmurhash3_x86_32.cu
src/hash/murmurhash3_x64_128.cu
Expand All @@ -353,7 +352,6 @@ add_library(
src/hash/sha256_hash.cu
src/hash/sha384_hash.cu
src/hash/sha512_hash.cu
src/hash/spark_murmurhash3_x86_32.cu
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/from_arrow.cu
Expand Down
52 changes: 0 additions & 52 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,42 +34,11 @@ namespace cudf {
*/
using hash_value_type = uint32_t;

/**
* @brief Identifies the hash function to be used
*
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_SPARK_MURMUR3, ///< Spark Murmur3 hash function
HASH_MD5 ///< MD5 hash function
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/**
* @brief Computes the hash value of each row in the input set of columns.
*
* @deprecated Since 23.08
*
* @param input The table of columns to hash
* @param hash_function The hash function enum to use
* @param seed Optional seed value to use for the hash function
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*
* @returns A column where each row is the hash of a column from the input
*/
[[deprecated]] std::unique_ptr<column> hash(
table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
uint32_t seed = DEFAULT_HASH_SEED,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

//! Hash APIs
namespace hashing {

Expand Down Expand Up @@ -112,27 +81,6 @@ std::unique_ptr<table> murmurhash3_x64_128(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the MurmurHash3 32-bit hash value of each row in the given table
*
* @deprecated Since 24.04
*
* This function computes the hash similar to MurmurHash3_x86_32 with special processing
* to match Spark's implementation results.
*
* @param input The table of columns to hash
* @param seed Optional seed value to use for the hash function
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*
* @returns A column where each row is the hash of a row from the input
*/
[[deprecated]] std::unique_ptr<column> spark_murmurhash3_x86_32(
table_view const& input,
uint32_t seed = DEFAULT_HASH_SEED,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the MD5 hash value of each row in the given table
*
Expand Down
5 changes: 0 additions & 5 deletions cpp/include/cudf/hashing/detail/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ std::unique_ptr<table> murmurhash3_x64_128(table_view const& input,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource* mr);

std::unique_ptr<column> spark_murmurhash3_x86_32(table_view const& input,
uint32_t seed,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource* mr);

std::unique_ptr<column> md5(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
Expand Down
10 changes: 9 additions & 1 deletion cpp/include/cudf/partitioning.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,6 +33,14 @@ namespace cudf {
* @brief Column partitioning APIs
*/

/**
* @brief Identifies the hash function to be used in hash partitioning
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3 ///< Murmur3 hash function
};

/**
* @brief Partitions rows of `t` according to the mapping specified by
* `partition_map`.
Expand Down
53 changes: 0 additions & 53 deletions cpp/src/hash/hashing.cu

This file was deleted.

Loading
Loading