Skip to content

Commit

Permalink
refactor that details proposed code
Browse files Browse the repository at this point in the history
  • Loading branch information
rwlee committed Jul 7, 2020
1 parent 0b7ad98 commit b626432
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 1 deletion.
15 changes: 15 additions & 0 deletions cpp/include/cudf/detail/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,24 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

std::unique_ptr<column> identity_hash(table_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

std::unique_ptr<column> murmur_hash3_32(table_view const& input,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

std::unique_ptr<column> md5_hash(table_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);


} // namespace detail
} // namespace cudf
1 change: 1 addition & 0 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace cudf {
* @returns A column where each row is the hash of a column from the input
*/
std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

Expand Down
9 changes: 9 additions & 0 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,5 +269,14 @@ inline bool operator==(data_type const& lhs, data_type const& rhs) { return lhs.
*/
std::size_t size_of(data_type t);

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5 ///< MD5 hash function
};

/** @} */
} // namespace cudf
22 changes: 21 additions & 1 deletion cpp/src/hash/hashing.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cudf/partitioning.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/types.hpp>

#include <thrust/tabulate.h>

Expand Down Expand Up @@ -634,9 +635,27 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
}

std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function,
std::vector<uint32_t> const& initial_hash,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
switch(hash_function) {
// case(hash_id::HASH_IDENTITY) :
// return identity_hash(input);
case(hash_id::HASH_MURMUR3) :
return murmur_hash3_32(input, initial_hash, mr, stream);
// case(hash_id::HASH_MD5) :
// return md5_hash(input, mr, stream);
default :
return NULL;
}
}

std::unique_ptr<column> murmur_hash3_32(table_view const& input,
std::vector<uint32_t> const& initial_hash,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
// TODO this should be UINT32
auto output = make_numeric_column(
Expand Down Expand Up @@ -688,11 +707,12 @@ std::unique_ptr<column> hash(table_view const& input,
} // namespace detail

std::unique_ptr<column> hash(table_view const& input,
hash_id hash_function,
std::vector<uint32_t> const& initial_hash,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::hash(input, initial_hash, mr);
return detail::hash(input, hash_function, initial_hash, mr);
}

} // namespace cudf

0 comments on commit b626432

Please sign in to comment.