diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernels.cu index 5638f0ddd38..efaea841e45 100644 --- a/cpp/src/join/mixed_join_kernels.cu +++ b/cpp/src/join/mixed_join_kernels.cu @@ -35,18 +35,19 @@ namespace detail { namespace cg = cooperative_groups; template -__global__ void mixed_join(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - size_type* join_output_l, - size_type* join_output_r, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables) +__launch_bounds__(block_size) __global__ + void mixed_join(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables) { // Normally the casting of a shared memory array is used to create multiple // arrays of different types from the shared memory buffer, but here it is diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index c8cfc9998f0..63a69554245 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -32,17 +32,18 @@ namespace detail { namespace cg = cooperative_groups; template -__global__ void mixed_join_semi(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::semi_map_type::device_view hash_table_view, - size_type* join_output_l, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables) +__launch_bounds__(block_size) __global__ + void mixed_join_semi(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::semi_map_type::device_view hash_table_view, + size_type* join_output_l, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables) { // Normally the casting of a shared memory array is used to create multiple // arrays of different types from the shared memory buffer, but here it is diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernels.cu index 1a08b8792c2..22c71bfc33a 100644 --- a/cpp/src/join/mixed_join_size_kernels.cu +++ b/cpp/src/join/mixed_join_size_kernels.cu @@ -35,7 +35,7 @@ namespace detail { namespace cg = cooperative_groups; template -__global__ void compute_mixed_join_output_size( +__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size( table_device_view left_table, table_device_view right_table, table_device_view probe, diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu index 2c077a698f8..f6b9fb85bbb 100644 --- a/cpp/src/join/mixed_join_size_kernels_semi.cu +++ b/cpp/src/join/mixed_join_size_kernels_semi.cu @@ -32,7 +32,7 @@ namespace detail { namespace cg = cooperative_groups; template -__global__ void compute_mixed_join_output_size_semi( +__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size_semi( table_device_view left_table, table_device_view right_table, table_device_view probe,