Skip to content

Commit

Permalink
Add bitmask as hash join member
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Apr 29, 2022
1 parent 92e5527 commit 7b2f5f6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 8 deletions.
8 changes: 5 additions & 3 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

Expand Down Expand Up @@ -68,9 +69,10 @@ struct hash_join {
hash_join& operator=(hash_join&&) = delete;

private:
bool const _is_empty; ///< true if `_hash_table` is empty
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
bool const _is_empty; ///< true if `_hash_table` is empty
rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
cudf::structs::detail::flattened_table
_flattened_build_table; ///< flattened data structures for `_build`
map_type _hash_table; ///< hash table built on `_build`
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream)
: _is_empty{build.num_rows() == 0},
_composite_bitmask{cudf::detail::bitmask_and(build, stream).first},
_nulls_equal{compare_nulls},
_hash_table{compute_hash_table_size(build.num_rows()),
std::numeric_limits<hash_value_type>::max(),
Expand All @@ -302,7 +303,11 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,

if (_is_empty) { return; }

cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream);
cudf::detail::build_join_hash_table(_build,
_hash_table,
_nulls_equal,
static_cast<bitmask_type const*>(_composite_bitmask.data()),
stream);
}

template <typename Hasher>
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/join/join_common_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,15 @@ get_trivial_left_join_indices(
* @param build Table of columns used to build join hash.
* @param hash_table Build hash table.
* @param nulls_equal Flag to denote nulls are equal or not.
* @param bitmask Bitmask to denote whether a row is valid.
* @param stream CUDA stream used for device memory operations and kernel launches.
*
*/
template <typename MultimapType>
void build_join_hash_table(cudf::table_view const& build,
MultimapType& hash_table,
null_equality const nulls_equal,
[[maybe_unused]] bitmask_type const* bitmask,
rmm::cuda_stream_view stream)
{
auto build_table_ptr = cudf::table_device_view::create(build, stream);
Expand All @@ -168,8 +170,7 @@ void build_join_hash_table(cudf::table_view const& build,
hash_table.insert(iter, iter + build_table_num_rows, stream.value());
} else {
thrust::counting_iterator<size_type> stencil(0);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
row_is_valid pred{bitmask};

// insert valid rows
hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ mixed_join(
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
build_join_hash_table(build, hash_table, compare_nulls, stream);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
build_join_hash_table(
build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
auto hash_table_view = hash_table.get_device_view();

auto left_conditional_view = table_device_view::create(left_conditional, stream);
Expand Down Expand Up @@ -381,7 +383,9 @@ compute_mixed_join_output_size(table_view const& left_equality,
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
build_join_hash_table(build, hash_table, compare_nulls, stream);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
build_join_hash_table(
build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
auto hash_table_view = hash_table.get_device_view();

auto left_conditional_view = table_device_view::create(left_conditional, stream);
Expand Down

0 comments on commit 7b2f5f6

Please sign in to comment.