-
Notifications
You must be signed in to change notification settings - Fork 915
/
mixed_join_semi.cu
569 lines (512 loc) · 25.5 KB
/
mixed_join_semi.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/ast/detail/expression_parser.hpp>
#include <cudf/ast/expressions.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/join.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
#include <join/hash_join.cuh>
#include <join/join_common_utils.cuh>
#include <join/join_common_utils.hpp>
#include <join/mixed_join_kernels_semi.cuh>
#include <rmm/cuda_stream_view.hpp>
#include <optional>
#include <utility>
namespace cudf {
namespace detail {
namespace {
/**
* @brief Device functor to create a pair of hash value and index for a given row.
*/
struct make_pair_function_semi {
__device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
{
// The value is irrelevant since we only ever use the hash map to check for
// membership of a particular row index.
return cuco::make_pair(static_cast<hash_value_type>(i), 0);
}
};
/**
* @brief Equality comparator that composes two row_equality comparators.
*/
class double_row_equality {
public:
double_row_equality(row_equality equality_comparator, row_equality conditional_comparator)
: _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator}
{
}
__device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
{
return _equality_comparator(lhs_row_index, rhs_row_index) &&
_conditional_comparator(lhs_row_index, rhs_row_index);
}
private:
row_equality _equality_comparator;
row_equality _conditional_comparator;
};
} // namespace
std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
join_kind join_type,
std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) &&
(join_type != join_kind::FULL_JOIN),
"Inner, left, and full joins should use mixed_join.");
CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
"The left conditional and equality tables must have the same number of rows.");
CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
"The right conditional and equality tables must have the same number of rows.");
auto const right_num_rows{right_conditional.num_rows()};
auto const left_num_rows{left_conditional.num_rows()};
auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
// The "outer" table is the larger of the two tables. The kernels are
// launched with one thread per row of the outer table, which also means that
// it is the probe table for the hash
auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
// We can immediately filter out cases where the right table is empty. In
// some cases, we return all the rows of the left table with a corresponding
// null index for the right table; in others, we return an empty output.
if (right_num_rows == 0) {
switch (join_type) {
// Anti and semi return all the row indices from left
// with a corresponding NULL from the right.
case join_kind::LEFT_ANTI_JOIN:
return get_trivial_left_join_indices(left_conditional, stream).first;
// Inner and left semi joins return empty output because no matches can exist.
case join_kind::LEFT_SEMI_JOIN:
return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
default: CUDF_FAIL("Invalid join kind."); break;
}
} else if (left_num_rows == 0) {
switch (join_type) {
// Anti and semi joins both return empty sets.
case join_kind::LEFT_ANTI_JOIN:
case join_kind::LEFT_SEMI_JOIN:
return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
default: CUDF_FAIL("Invalid join kind."); break;
}
}
// If evaluating the expression may produce null outputs we create a nullable
// output column and follow the null-supporting expression evaluation code
// path.
auto const has_nulls =
cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
auto const parser = ast::detail::expression_parser{
binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
"The expression must produce a boolean output.");
// TODO: The non-conditional join impls start with a dictionary matching,
// figure out what that is and what it's needed for (and if conditional joins
// need to do the same).
auto& probe = swap_tables ? right_equality : left_equality;
auto& build = swap_tables ? left_equality : right_equality;
auto probe_view = table_device_view::create(probe, stream);
auto build_view = table_device_view::create(build, stream);
auto left_conditional_view = table_device_view::create(left_conditional, stream);
auto right_conditional_view = table_device_view::create(right_conditional, stream);
auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view;
row_equality equality_probe{
cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};
semi_map_type hash_table{compute_hash_table_size(build.num_rows()),
std::numeric_limits<hash_value_type>::max(),
cudf::detail::JoinNoneValue,
detail::hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};
// Create hash table containing all keys found in right table
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
row_hash const hash_build{build_nulls, *build_view};
// Since we may see multiple rows that are identical in the equality tables
// but differ in the conditional tables, the equality comparator used for
// insertion must account for both sets of tables. An alternative solution
// would be to use a multimap, but that solution would store duplicates where
// equality and conditional rows are equal, so this approach is preferable.
// One way to make this solution even more efficient would be to only include
// the columns of the conditional table that are used by the expression, but
// that requires additional plumbing through the AST machinery and is out of
// scope for now.
row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls};
row_equality equality_build_conditional{
build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls};
double_row_equality equality_build{equality_build_equality, equality_build_conditional};
make_pair_function_semi pair_func_build{};
auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
// skip rows that are null here.
if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value());
} else {
thrust::counting_iterator<cudf::size_type> stencil(0);
auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream);
row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
// insert valid rows
hash_table.insert_if(
iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value());
}
auto hash_table_view = hash_table.get_device_view();
// For inner joins we support optimizing the join by launching one thread for
// whichever table is larger rather than always using the left table.
detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
join_kind const kernel_join_type =
join_type == join_kind::FULL_JOIN ? join_kind::LEFT_JOIN : join_type;
// If the join size data was not provided as an input, compute it here.
std::size_t join_size;
// Using an optional because we only need to allocate a new vector if one was
// not passed as input, and rmm::device_uvector is not default constructible
std::optional<rmm::device_uvector<size_type>> matches_per_row{};
device_span<size_type const> matches_per_row_span{};
if (output_size_data.has_value()) {
join_size = output_size_data->first;
matches_per_row_span = output_size_data->second;
} else {
// Allocate storage for the counter used to get the size of the join output
rmm::device_scalar<std::size_t> size(0, stream, mr);
matches_per_row =
rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
// Note that the view goes out of scope after this else statement, but the
// data owned by matches_per_row stays alive so the data pointer is valid.
auto mutable_matches_per_row_span = cudf::device_span<size_type>{
matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
matches_per_row_span = cudf::device_span<size_type const>{
matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
if (has_nulls) {
compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
mutable_matches_per_row_span);
} else {
compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
mutable_matches_per_row_span);
}
join_size = size.value(stream);
}
if (join_size == 0) { return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr); }
// Given the number of matches per row, we need to compute the offsets for insertion.
auto join_result_offsets =
rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
thrust::exclusive_scan(rmm::exec_policy{stream},
matches_per_row_span.begin(),
matches_per_row_span.end(),
join_result_offsets.begin());
auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
auto const& join_output_l = left_indices->data();
if (has_nulls) {
mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables);
} else {
mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables);
}
return left_indices;
}
std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>>
compute_mixed_join_output_size_semi(table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
join_kind join_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(
(join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) &&
(join_type != join_kind::FULL_JOIN),
"Inner, left, and full join size estimation should use compute_mixed_join_output_size.");
CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
"The left conditional and equality tables must have the same number of rows.");
CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
"The right conditional and equality tables must have the same number of rows.");
auto const right_num_rows{right_conditional.num_rows()};
auto const left_num_rows{left_conditional.num_rows()};
auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
// The "outer" table is the larger of the two tables. The kernels are
// launched with one thread per row of the outer table, which also means that
// it is the probe table for the hash
auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
auto matches_per_row = std::make_unique<rmm::device_uvector<size_type>>(
static_cast<std::size_t>(outer_num_rows), stream, mr);
auto matches_per_row_span = cudf::device_span<size_type>{
matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
// We can immediately filter out cases where one table is empty. In
// some cases, we return all the rows of the other table with a corresponding
// null index for the empty table; in others, we return an empty output.
if (right_num_rows == 0) {
switch (join_type) {
// Left, left anti, and full all return all the row indices from left
// with a corresponding NULL from the right.
case join_kind::LEFT_ANTI_JOIN: {
thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1);
return {left_num_rows, std::move(matches_per_row)};
}
// Inner and left semi joins return empty output because no matches can exist.
case join_kind::LEFT_SEMI_JOIN: return {0, std::move(matches_per_row)};
default: CUDF_FAIL("Invalid join kind."); break;
}
} else if (left_num_rows == 0) {
switch (join_type) {
// Left, left anti, left semi, and inner joins all return empty sets.
case join_kind::LEFT_ANTI_JOIN:
case join_kind::LEFT_SEMI_JOIN: {
thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
return {0, std::move(matches_per_row)};
}
default: CUDF_FAIL("Invalid join kind."); break;
}
}
// If evaluating the expression may produce null outputs we create a nullable
// output column and follow the null-supporting expression evaluation code
// path.
auto const has_nulls =
cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
auto const parser = ast::detail::expression_parser{
binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
"The expression must produce a boolean output.");
// TODO: The non-conditional join impls start with a dictionary matching,
// figure out what that is and what it's needed for (and if conditional joins
// need to do the same).
auto& probe = swap_tables ? right_equality : left_equality;
auto& build = swap_tables ? left_equality : right_equality;
auto probe_view = table_device_view::create(probe, stream);
auto build_view = table_device_view::create(build, stream);
auto left_conditional_view = table_device_view::create(left_conditional, stream);
auto right_conditional_view = table_device_view::create(right_conditional, stream);
auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view;
row_equality equality_probe{
cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};
semi_map_type hash_table{compute_hash_table_size(build.num_rows()),
std::numeric_limits<hash_value_type>::max(),
cudf::detail::JoinNoneValue,
detail::hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};
// Create hash table containing all keys found in right table
// TODO: To add support for nested columns we will need to flatten in many
// places. However, this probably isn't worth adding any time soon since we
// won't be able to support AST conditions for those types anyway.
auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
row_hash const hash_build{build_nulls, *build_view};
// Since we may see multiple rows that are identical in the equality tables
// but differ in the conditional tables, the equality comparator used for
// insertion must account for both sets of tables. An alternative solution
// would be to use a multimap, but that solution would store duplicates where
// equality and conditional rows are equal, so this approach is preferable.
// One way to make this solution even more efficient would be to only include
// the columns of the conditional table that are used by the expression, but
// that requires additional plumbing through the AST machinery and is out of
// scope for now.
row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls};
row_equality equality_build_conditional{
build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls};
double_row_equality equality_build{equality_build_equality, equality_build_conditional};
make_pair_function_semi pair_func_build{};
auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
// skip rows that are null here.
if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value());
} else {
thrust::counting_iterator<cudf::size_type> stencil(0);
auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream);
row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
// insert valid rows
hash_table.insert_if(
iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value());
}
auto hash_table_view = hash_table.get_device_view();
// For inner joins we support optimizing the join by launching one thread for
// whichever table is larger rather than always using the left table.
detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
// Allocate storage for the counter used to get the size of the join output
rmm::device_scalar<std::size_t> size(0, stream, mr);
// Determine number of output rows without actually building the output to simply
// find what the size of the output will be.
if (has_nulls) {
compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
matches_per_row_span);
} else {
compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
matches_per_row_span);
}
return {size.value(stream), std::move(matches_per_row)};
}
} // namespace detail
std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_semi_join_size(
table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::compute_mixed_join_output_size_semi(left_equality,
right_equality,
left_conditional,
right_conditional,
binary_predicate,
compare_nulls,
detail::join_kind::LEFT_SEMI_JOIN,
rmm::cuda_stream_default,
mr);
}
std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::mixed_join_semi(left_equality,
right_equality,
left_conditional,
right_conditional,
binary_predicate,
compare_nulls,
detail::join_kind::LEFT_SEMI_JOIN,
output_size_data,
rmm::cuda_stream_default,
mr);
}
std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_anti_join_size(
table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::compute_mixed_join_output_size_semi(left_equality,
right_equality,
left_conditional,
right_conditional,
binary_predicate,
compare_nulls,
detail::join_kind::LEFT_ANTI_JOIN,
rmm::cuda_stream_default,
mr);
}
std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
table_view const& left_equality,
table_view const& right_equality,
table_view const& left_conditional,
table_view const& right_conditional,
ast::expression const& binary_predicate,
null_equality compare_nulls,
std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::mixed_join_semi(left_equality,
right_equality,
left_conditional,
right_conditional,
binary_predicate,
compare_nulls,
detail::join_kind::LEFT_ANTI_JOIN,
output_size_data,
rmm::cuda_stream_default,
mr);
}
} // namespace cudf