Skip to content

Commit

Permalink
Use cudf::thread_index_type in concatenate.cu. (#13906)
Browse files Browse the repository at this point in the history
This PR uses `cudf::thread_index_type` in `concatenate.cu` to avoid risk of overflow.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: #13906
  • Loading branch information
bdice authored Sep 2, 2023
1 parent 2b7294b commit bbbb143
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
size_type number_of_mask_bits,
size_type* out_valid_count)
{
size_type mask_index = threadIdx.x + blockIdx.x * blockDim.x;

auto active_mask = __ballot_sync(0xFFFF'FFFFu, mask_index < number_of_mask_bits);
auto tidx = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();
auto active_mask = __ballot_sync(0xFFFF'FFFFu, tidx < number_of_mask_bits);

size_type warp_valid_count = 0;

while (mask_index < number_of_mask_bits) {
while (tidx < number_of_mask_bits) {
auto const mask_index = static_cast<cudf::size_type>(tidx);
size_type const source_view_index =
thrust::upper_bound(
thrust::seq, output_offsets, output_offsets + number_of_views, mask_index) -
Expand All @@ -141,8 +142,8 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
warp_valid_count += __popc(new_word);
}

mask_index += blockDim.x * gridDim.x;
active_mask = __ballot_sync(active_mask, mask_index < number_of_mask_bits);
tidx += stride;
active_mask = __ballot_sync(active_mask, tidx < number_of_mask_bits);
}

using detail::single_lane_block_sum_reduce;
Expand Down Expand Up @@ -195,7 +196,8 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
auto const output_size = output_view.size();
auto* output_data = output_view.data<T>();

int64_t output_index = threadIdx.x + blockIdx.x * blockDim.x;
auto output_index = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();
size_type warp_valid_count = 0;

unsigned active_mask;
Expand Down Expand Up @@ -224,7 +226,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
warp_valid_count += __popc(new_word);
}

output_index += blockDim.x * gridDim.x;
output_index += stride;
if (Nullable) { active_mask = __ballot_sync(active_mask, output_index < output_size); }
}

Expand Down

0 comments on commit bbbb143

Please sign in to comment.