Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move hash type declarations to hashing.hpp #10320

Merged
merged 1 commit into from
Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION.
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand Down
19 changes: 17 additions & 2 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>

namespace cudf {
/**
Expand All @@ -26,6 +25,22 @@ namespace cudf {
* @file
*/

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/**
* @brief Computes the hash value of each row in the input set of columns.
*
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/partitioning.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@

#pragma once

#include <cudf/types.hpp>
#include <cudf/hashing.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down
18 changes: 1 addition & 17 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
* Copyright (c) 2018-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -326,21 +326,5 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh
*/
std::size_t size_of(data_type t);

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/** @} */
} // namespace cudf
13 changes: 10 additions & 3 deletions python/cudf/cudf/_lib/cpp/hash.pxd
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport uint32_t
from libcpp.memory cimport unique_ptr
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view


cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil:

ctypedef enum hash_id "cudf::hash_id":
HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3"
HASH_MD5 "cudf::hash_id::HASH_MD5"
HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3"
HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3"

cdef unique_ptr[column] hash "cudf::hash" (
const table_view& input,
const libcudf_types.hash_id hash_function,
const hash_id hash_function,
const uint32_t seed
) except +
7 changes: 0 additions & 7 deletions python/cudf/cudf/_lib/cpp/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
DECIMAL64 "cudf::type_id::DECIMAL64"
DECIMAL128 "cudf::type_id::DECIMAL128"

ctypedef enum hash_id "cudf::hash_id":
HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3"
HASH_MD5 "cudf::hash_id::HASH_MD5"
HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3"
HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3"

cdef cppclass data_type:
data_type() except +
data_type(const data_type&) except +
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/hash.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport uint32_t
from libcpp cimport bool
Expand All @@ -10,7 +10,7 @@ from libcpp.vector cimport vector
cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.hash cimport hash as cpp_hash
from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id
from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
Expand Down Expand Up @@ -58,11 +58,11 @@ def hash(source_table, str method, int seed=0):
cdef table_view c_source_view = table_view_from_table(
source_table, ignore_index=True)
cdef unique_ptr[column] c_result
cdef libcudf_types.hash_id c_hash_function
cdef cpp_hash_id c_hash_function
if method == "murmur3":
c_hash_function = libcudf_types.hash_id.HASH_MURMUR3
c_hash_function = cpp_hash_id.HASH_MURMUR3
elif method == "md5":
c_hash_function = libcudf_types.hash_id.HASH_MD5
c_hash_function = cpp_hash_id.HASH_MD5
else:
raise ValueError(f"Unsupported hash function: {method}")
with nogil:
Expand Down