diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 51d58383de4..0f287ad74a7 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, NVIDIA CORPORATION. + * Copyright (c) 2017-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index cce05042917..e4e94074fb8 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include namespace cudf { /** @@ -26,6 +25,22 @@ namespace cudf { * @file */ +/** + * @brief Identifies the hash function to be used + */ +enum class hash_id { + HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed + HASH_MURMUR3, ///< Murmur3 hash function + HASH_MD5, ///< MD5 hash function + HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function + HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function +}; + +/** + * @brief The default seed value for hash functions + */ +static constexpr uint32_t DEFAULT_HASH_SEED = 0; + /** * @brief Computes the hash value of each row in the input set of columns. * diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 6b1ad7db08b..3ffd9a87d39 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 6222b2e680e..76e2589a5a9 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -326,21 +326,5 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh */ std::size_t size_of(data_type t); -/** - * @brief Identifies the hash function to be used - */ -enum class hash_id { - HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed - HASH_MURMUR3, ///< Murmur3 hash function - HASH_MD5, ///< MD5 hash function - HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function - HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function -}; - -/** - * @brief The default seed value for hash functions - */ -static constexpr uint32_t DEFAULT_HASH_SEED = 0; - /** @} */ } // namespace cudf diff --git a/python/cudf/cudf/_lib/cpp/hash.pxd b/python/cudf/cudf/_lib/cpp/hash.pxd index fd9992152a6..41d10b7b6da 100644 --- a/python/cudf/cudf/_lib/cpp/hash.pxd +++ b/python/cudf/cudf/_lib/cpp/hash.pxd @@ -1,18 +1,25 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport uint32_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector -cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil: + + ctypedef enum hash_id "cudf::hash_id": + HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY" + HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3" + HASH_MD5 "cudf::hash_id::HASH_MD5" + HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3" + HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3" + cdef unique_ptr[column] hash "cudf::hash" ( const table_view& input, - const libcudf_types.hash_id hash_function, + const hash_id hash_function, const uint32_t seed ) except + diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd index 23727a20ec2..b1a257feedf 100644 --- a/python/cudf/cudf/_lib/cpp/types.pxd +++ b/python/cudf/cudf/_lib/cpp/types.pxd @@ -81,13 +81,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: DECIMAL64 "cudf::type_id::DECIMAL64" DECIMAL128 "cudf::type_id::DECIMAL128" - ctypedef enum hash_id "cudf::hash_id": - HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY" - HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3" - HASH_MD5 "cudf::hash_id::HASH_MD5" - HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3" - HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3" - cdef cppclass data_type: data_type() except + data_type(const data_type&) except + diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index adc48159aac..301f571f5fb 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport uint32_t from libcpp cimport bool @@ -10,7 +10,7 @@ from libcpp.vector cimport vector cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.hash cimport hash as cpp_hash +from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view @@ -58,11 +58,11 @@ def hash(source_table, str method, int seed=0): cdef table_view c_source_view = table_view_from_table( source_table, ignore_index=True) cdef unique_ptr[column] c_result - cdef libcudf_types.hash_id c_hash_function + cdef cpp_hash_id c_hash_function if method == "murmur3": - c_hash_function = libcudf_types.hash_id.HASH_MURMUR3 + c_hash_function = cpp_hash_id.HASH_MURMUR3 elif method == "md5": - c_hash_function = libcudf_types.hash_id.HASH_MD5 + c_hash_function = cpp_hash_id.HASH_MD5 else: raise ValueError(f"Unsupported hash function: {method}") with nogil: