Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add row bitmask as a detail::hash_join member #10248

Merged
merged 18 commits into from
May 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/include/cudf/detail/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

Expand Down Expand Up @@ -68,9 +69,10 @@ struct hash_join {
hash_join& operator=(hash_join&&) = delete;

private:
bool const _is_empty; ///< true if `_hash_table` is empty
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
bool const _is_empty; ///< true if `_hash_table` is empty
rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid
cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal
cudf::table_view _build; ///< input table to build the hash map
cudf::structs::detail::flattened_table
_flattened_build_table; ///< flattened data structures for `_build`
map_type _hash_table; ///< hash table built on `_build`
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/join/hash_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream)
: _is_empty{build.num_rows() == 0},
_composite_bitmask{cudf::detail::bitmask_and(build, stream).first},
_nulls_equal{compare_nulls},
_hash_table{compute_hash_table_size(build.num_rows()),
std::numeric_limits<hash_value_type>::max(),
Expand All @@ -302,7 +303,11 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,

if (_is_empty) { return; }

cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream);
cudf::detail::build_join_hash_table(_build,
_hash_table,
_nulls_equal,
static_cast<bitmask_type const*>(_composite_bitmask.data()),
stream);
}

template <typename Hasher>
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/join/join_common_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,15 @@ get_trivial_left_join_indices(
* @param build Table of columns used to build join hash.
* @param hash_table Build hash table.
* @param nulls_equal Flag to denote nulls are equal or not.
* @param bitmask Bitmask to denote whether a row is valid.
* @param stream CUDA stream used for device memory operations and kernel launches.
*
*/
template <typename MultimapType>
void build_join_hash_table(cudf::table_view const& build,
MultimapType& hash_table,
null_equality const nulls_equal,
[[maybe_unused]] bitmask_type const* bitmask,
rmm::cuda_stream_view stream)
{
auto build_table_ptr = cudf::table_device_view::create(build, stream);
Expand All @@ -168,8 +170,7 @@ void build_join_hash_table(cudf::table_view const& build,
hash_table.insert(iter, iter + build_table_num_rows, stream.value());
} else {
thrust::counting_iterator<size_type> stencil(0);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
row_is_valid pred{bitmask};

// insert valid rows
hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ mixed_join(
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
build_join_hash_table(build, hash_table, compare_nulls, stream);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
build_join_hash_table(
build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
auto hash_table_view = hash_table.get_device_view();

auto left_conditional_view = table_device_view::create(left_conditional, stream);
Expand Down Expand Up @@ -381,7 +383,9 @@ compute_mixed_join_output_size(table_view const& left_equality,
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
build_join_hash_table(build, hash_table, compare_nulls, stream);
auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
build_join_hash_table(
build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
auto hash_table_view = hash_table.get_device_view();

auto left_conditional_view = table_device_view::create(left_conditional, stream);
Expand Down