Skip to content

Commit

Permalink
Switch hash constant handling
Browse files Browse the repository at this point in the history
  • Loading branch information
rwlee committed Aug 11, 2020
1 parent 6aaa7b8 commit 3b68a9d
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 155 deletions.
1 change: 0 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,6 @@ add_library(cudf
src/stream_compaction/drop_duplicates.cu
src/datetime/datetime_ops.cu
src/hash/hashing.cu
src/hash/hash_constants.cu
src/partitioning/partitioning.cu
src/quantiles/quantile.cu
src/quantiles/quantiles.cu
Expand Down
15 changes: 3 additions & 12 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,6 @@ void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destina
}

struct MD5Hash {
__device__ MD5Hash(md5_hash_constants_type const* hash_constants,
md5_shift_constants_type const* shift_constants)
: d_hash_constants(hash_constants), d_shift_constants(shift_constants)
{
}

/**
* @brief Core MD5 algorithm implementation. Processes a single 512-bit chunk,
* updating the hash value so far. Does not zero out the buffer contents.
Expand Down Expand Up @@ -92,11 +86,11 @@ struct MD5Hash {

uint32_t buffer_element_as_int;
std::memcpy(&buffer_element_as_int, hash_state->buffer + g * 4, 4);
F = F + A + d_hash_constants[j] + buffer_element_as_int;
F = F + A + md5_hash_constants[j] + buffer_element_as_int;
A = D;
D = C;
C = B;
B = B + __funnelshift_l(F, F, d_shift_constants[((j / 16) * 4) + (j % 4)]);
B = B + __funnelshift_l(F, F, md5_shift_constants[((j / 16) * 4) + (j % 4)]);
}

hash_state->hash_value[0] += A;
Expand All @@ -106,6 +100,7 @@ struct MD5Hash {

hash_state->buffer_length = 0;
}

/**
* @brief Core MD5 element processing function
*/
Expand Down Expand Up @@ -217,10 +212,6 @@ struct MD5Hash {
{
process(col.element<T>(row_index), hash_state);
}

private:
md5_hash_constants_type const* d_hash_constants;
md5_shift_constants_type const* d_shift_constants;
};

template <>
Expand Down
5 changes: 0 additions & 5 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,5 @@ std::unique_ptr<column> hash(table_view const& input,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

std::unique_ptr<column> murmur_hash3_32(
table_view const& input,
std::vector<uint32_t> const& initial_hash = {},
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/** @} */ // end of group
} // namespace cudf
89 changes: 0 additions & 89 deletions cpp/src/hash/hash_constants.cu

This file was deleted.

51 changes: 28 additions & 23 deletions cpp/src/hash/hash_constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,37 @@ struct md5_intermediate_data {
// Type for the shift constants table.
using md5_shift_constants_type = uint32_t;

/**
* @brief Returns pointer to device memory that contains the static
* md5 shift constants table. On first call, this will copy the table into
* device memory and is guaranteed to be thread-safe.
*
* This table is used in the MD5 hash to lookup the number of bits
* to rotate left during each hash iteration.
*
* @return Device memory pointer to the MD5 shift constants table.
*/
const md5_shift_constants_type* get_md5_shift_constants();
__device__ __constant__ md5_shift_constants_type md5_shift_constants[16] = {
7,
12,
17,
22,
5,
9,
14,
20,
4,
11,
16,
23,
6,
10,
15,
21,
};

// Type for the hash constants table.
using md5_hash_constants_type = uint32_t;

/**
* @brief Returns pointer to device memory that contains the static
* md5 hash constants table. On first call, this will copy the table into
* device memory and is guaranteed to be thread-safe.
*
* This table is used in the MD5 hash to lookup values added to
* the hash during each hash iteration.
*
* @return Device memory pointer to the MD5 hash constants table.
*/
const md5_hash_constants_type* get_md5_hash_constants();

__device__ __constant__ md5_hash_constants_type md5_hash_constants[64] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
};
} // namespace detail
} // namespace cudf
43 changes: 18 additions & 25 deletions cpp/src/hash/hashing.cu
Original file line number Diff line number Diff line change
Expand Up @@ -684,32 +684,25 @@ std::unique_ptr<column> md5_hash(table_view const& input,
bool const nullable = has_nulls(input);
auto const device_input = table_device_view::create(input, stream);

// Fetch hash constants
md5_shift_constants_type const* shift_constants = get_md5_shift_constants();
md5_hash_constants_type const* hash_constants = get_md5_hash_constants();

// Hash each row, hashing each element sequentially left to right
thrust::for_each(rmm::exec_policy(stream)->on(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
[d_chars,
device_input = *device_input,
hash_constants = hash_constants,
shift_constants = shift_constants,
has_nulls = nullable] __device__(auto row_index) {
md5_intermediate_data hash_state;
MD5Hash hasher = MD5Hash(hash_constants, shift_constants);
for (int col_index = 0; col_index < device_input.num_columns(); col_index++) {
if (device_input.column(col_index).is_valid(row_index)) {
cudf::type_dispatcher(device_input.column(col_index).type(),
hasher,
device_input.column(col_index),
row_index,
&hash_state);
}
}
hasher.finalize(&hash_state, d_chars + (row_index * 32));
});
thrust::for_each(
rmm::exec_policy(stream)->on(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
[d_chars, device_input = *device_input, has_nulls = nullable] __device__(auto row_index) {
md5_intermediate_data hash_state;
MD5Hash hasher = MD5Hash{};
for (int col_index = 0; col_index < device_input.num_columns(); col_index++) {
if (device_input.column(col_index).is_valid(row_index)) {
cudf::type_dispatcher(device_input.column(col_index).type(),
hasher,
device_input.column(col_index),
row_index,
&hash_state);
}
}
hasher.finalize(&hash_state, d_chars + (row_index * 32));
});

return make_strings_column(input.num_rows(),
std::move(offsets_column),
Expand Down

0 comments on commit 3b68a9d

Please sign in to comment.