Skip to content

Commit

Permalink
Move hash type declarations to hashing.hpp (#10320)
Browse files Browse the repository at this point in the history
Move the `hash_id` and `DEFAULT_HASH_SEED` declarations from `types.hpp` to `hashing.hpp`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - https://github.com/brandon-b-miller
  - Bradley Dice (https://github.com/bdice)

URL: #10320
  • Loading branch information
davidwendt authored Feb 18, 2022
1 parent b28bad6 commit ec614ac
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 36 deletions.
1 change: 1 addition & 0 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand Down
19 changes: 17 additions & 2 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>

namespace cudf {
/**
Expand All @@ -26,6 +25,22 @@ namespace cudf {
* @file
*/

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/**
* @brief Computes the hash value of each row in the input set of columns.
*
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/partitioning.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@

#pragma once

#include <cudf/types.hpp>
#include <cudf/hashing.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down
18 changes: 1 addition & 17 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
* Copyright (c) 2018-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -326,21 +326,5 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh
*/
std::size_t size_of(data_type t);

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/** @} */
} // namespace cudf
13 changes: 10 additions & 3 deletions python/cudf/cudf/_lib/cpp/hash.pxd
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport uint32_t
from libcpp.memory cimport unique_ptr
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view


cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil:

ctypedef enum hash_id "cudf::hash_id":
HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3"
HASH_MD5 "cudf::hash_id::HASH_MD5"
HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3"
HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3"

cdef unique_ptr[column] hash "cudf::hash" (
const table_view& input,
const libcudf_types.hash_id hash_function,
const hash_id hash_function,
const uint32_t seed
) except +
7 changes: 0 additions & 7 deletions python/cudf/cudf/_lib/cpp/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
DECIMAL64 "cudf::type_id::DECIMAL64"
DECIMAL128 "cudf::type_id::DECIMAL128"

ctypedef enum hash_id "cudf::hash_id":
HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3"
HASH_MD5 "cudf::hash_id::HASH_MD5"
HASH_SERIAL_MURMUR3 "cudf::hash_id::HASH_SERIAL_MURMUR3"
HASH_SPARK_MURMUR3 "cudf::hash_id::HASH_SPARK_MURMUR3"

cdef cppclass data_type:
data_type() except +
data_type(const data_type&) except +
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/hash.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport uint32_t
from libcpp cimport bool
Expand All @@ -10,7 +10,7 @@ from libcpp.vector cimport vector
cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.hash cimport hash as cpp_hash
from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id
from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
Expand Down Expand Up @@ -58,11 +58,11 @@ def hash(source_table, str method, int seed=0):
cdef table_view c_source_view = table_view_from_table(
source_table, ignore_index=True)
cdef unique_ptr[column] c_result
cdef libcudf_types.hash_id c_hash_function
cdef cpp_hash_id c_hash_function
if method == "murmur3":
c_hash_function = libcudf_types.hash_id.HASH_MURMUR3
c_hash_function = cpp_hash_id.HASH_MURMUR3
elif method == "md5":
c_hash_function = libcudf_types.hash_id.HASH_MD5
c_hash_function = cpp_hash_id.HASH_MD5
else:
raise ValueError(f"Unsupported hash function: {method}")
with nogil:
Expand Down

0 comments on commit ec614ac

Please sign in to comment.