Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not add nulls to the hash table when null_equality::NOT_EQUAL is passed to left_semi_join and left_anti_join #8277

Merged
merged 7 commits into from
May 24, 2021
Merged
136 changes: 130 additions & 6 deletions cpp/benchmarks/join/join_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ template <typename key_type, typename payload_type>
class Join : public cudf::benchmark {
};

template <typename key_type, typename payload_type, bool Nullable>
static void BM_join(benchmark::State &state)
template <typename key_type, typename payload_type, bool Nullable, typename Join>
static void BM_join(Join JoinFunc, benchmark::State& state)
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
{
const cudf::size_type build_table_size{(cudf::size_type)state.range(0)};
const cudf::size_type probe_table_size{(cudf::size_type)state.range(1)};
Expand Down Expand Up @@ -105,20 +105,69 @@ static void BM_join(benchmark::State &state)
for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);

auto result = cudf::inner_join(
auto result = JoinFunc(
probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL);
}
}

#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) { BM_join<key_type, payload_type, nullable>(st); }
#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) \
{ \
auto join = [](cudf::table_view const& left, \
cudf::table_view const& right, \
std::vector<cudf::size_type> const& left_on, \
std::vector<cudf::size_type> const& right_on, \
cudf::null_equality compare_nulls) { \
return cudf::inner_join(left, right, left_on, right_on, compare_nulls); \
}; \
BM_join<key_type, payload_type, nullable>(join, st); \
}

JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t, false);
JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t, false);
JOIN_BENCHMARK_DEFINE(join_32bit_nulls, int32_t, int32_t, true);
JOIN_BENCHMARK_DEFINE(join_64bit_nulls, int64_t, int64_t, true);

#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) \
{ \
auto join = [](cudf::table_view const& left, \
cudf::table_view const& right, \
std::vector<cudf::size_type> const& left_on, \
std::vector<cudf::size_type> const& right_on, \
cudf::null_equality compare_nulls) { \
return cudf::left_anti_join(left, right, left_on, right_on, compare_nulls); \
}; \
BM_join<key_type, payload_type, nullable>(join, st); \
}

LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, int32_t, false);
LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, int64_t, false);
LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, int32_t, true);
LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, int64_t, true);

#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) \
{ \
auto join = [](cudf::table_view const& left, \
cudf::table_view const& right, \
std::vector<cudf::size_type> const& left_on, \
std::vector<cudf::size_type> const& right_on, \
cudf::null_equality compare_nulls) { \
return cudf::left_semi_join(left, right, left_on, right_on, compare_nulls); \
}; \
BM_join<key_type, payload_type, nullable>(join, st); \
}

LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, int32_t, false);
LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit, int64_t, int64_t, false);
LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit_nulls, int32_t, int32_t, true);
LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit_nulls, int64_t, int64_t, true);

// join -----------------------------------------------------------------------
BENCHMARK_REGISTER_F(Join, join_32bit)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
Expand Down Expand Up @@ -154,3 +203,78 @@ BENCHMARK_REGISTER_F(Join, join_64bit_nulls)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

// left anti-join -------------------------------------------------------------
BENCHMARK_REGISTER_F(Join, left_anti_join_32bit)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
->Args({100'000, 400'000})
->Args({100'000, 1'000'000})
->Args({10'000'000, 10'000'000})
->Args({10'000'000, 40'000'000})
->Args({10'000'000, 100'000'000})
->Args({100'000'000, 100'000'000})
->Args({80'000'000, 240'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_anti_join_64bit)
->Unit(benchmark::kMillisecond)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_anti_join_32bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
->Args({100'000, 400'000})
->Args({100'000, 1'000'000})
->Args({10'000'000, 10'000'000})
->Args({10'000'000, 40'000'000})
->Args({10'000'000, 100'000'000})
->Args({100'000'000, 100'000'000})
->Args({80'000'000, 240'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_anti_join_64bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

// left semi-join -------------------------------------------------------------
BENCHMARK_REGISTER_F(Join, left_semi_join_32bit)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
->Args({100'000, 400'000})
->Args({100'000, 1'000'000})
->Args({10'000'000, 10'000'000})
->Args({10'000'000, 40'000'000})
->Args({10'000'000, 100'000'000})
->Args({100'000'000, 100'000'000})
->Args({80'000'000, 240'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_semi_join_64bit)
->Unit(benchmark::kMillisecond)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_semi_join_32bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
->Args({100'000, 400'000})
->Args({100'000, 1'000'000})
->Args({10'000'000, 10'000'000})
->Args({10'000'000, 40'000'000})
->Args({10'000'000, 100'000'000})
->Args({100'000'000, 100'000'000})
->Args({80'000'000, 240'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, left_semi_join_64bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

4 changes: 2 additions & 2 deletions cpp/include/cudf/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,13 @@ std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
* TableB: {{1, 2, 3}, {1, 2, 5}}
* left_on: {0}
* right_on: {1}
* Result: {{0}, {1}}
* Result: {{0}}
*
* TableA: {{0, 1, 2}, {1, 2, 5}}
* TableB: {{1, 2, 3}}
* left_on: {0}
* right_on: {0}
* Result: { {0} {1} }
* Result: { {0}, {1} }
* @endcode
*
* @throw cudf::logic_error if number of elements in `left_on` or `right_on`
Expand Down
22 changes: 16 additions & 6 deletions cpp/src/join/semi_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,22 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
equality_build);
auto hash_table = *hash_table_ptr;

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
right_num_rows,
[hash_table] __device__(size_type idx) mutable {
hash_table.insert(thrust::make_pair(idx, true));
});
// if compare_nulls == NOT_EQUAL, we can simply ignore any rows that are
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also adjust the hash table size based on the number of valid rows? Not sure how important this is for join operations, just a thought.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like this seems to work. I have to do a count_unset_bits() on the aggregate bitmask though. This also feels mildly spooky to change as I'm trying to treat lightly in this unfamiliar module :)

auto const row_bitmask = (compare_nulls == null_equality::EQUAL)
                           ? rmm::device_buffer{}
                           : cudf::detail::bitmask_and(right_flattened_keys, stream);
size_type right_row_ignore_count = compare_nulls == null_equality::EQUAL ? 0 : 
  count_unset_bits(static_cast<const bitmask_type*>(row_bitmask.data()), 0, right_num_rows);
...
size_t const hash_table_size = compute_hash_table_size(right_num_rows - right_row_ignore_count);

// entirely NULL as they will never compare to equal.
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
auto const row_bitmask = (compare_nulls == null_equality::EQUAL)
? rmm::device_buffer{0, stream}
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
: cudf::detail::bitmask_and(right_flattened_keys, stream);
// skip rows that are null here.
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
right_num_rows,
[hash_table, row_bitmask = static_cast<bitmask_type const*>(row_bitmask.data())] __device__(
size_type idx) mutable {
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
if (!row_bitmask || cudf::bit_is_set(row_bitmask, idx)) {
harrism marked this conversation as resolved.
Show resolved Hide resolved
hash_table.insert(thrust::make_pair(idx, true));
}
});

//
// Now we have a hash table, we need to iterate over the rows of the left table
Expand Down