Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cudf::thread_index_type in concatenate.cu. #13906

Merged
merged 5 commits into from
Sep 2, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
size_type number_of_mask_bits,
size_type* out_valid_count)
{
size_type mask_index = threadIdx.x + blockIdx.x * blockDim.x;
cudf::thread_index_type tidx = threadIdx.x + blockIdx.x * blockDim.x;

auto active_mask = __ballot_sync(0xFFFF'FFFFu, mask_index < number_of_mask_bits);
auto active_mask = __ballot_sync(0xFFFF'FFFFu, tidx < number_of_mask_bits);

size_type warp_valid_count = 0;

while (mask_index < number_of_mask_bits) {
while (tidx < number_of_mask_bits) {
auto const mask_index = static_cast<cudf::size_type>(tidx);
size_type const source_view_index =
thrust::upper_bound(
thrust::seq, output_offsets, output_offsets + number_of_views, mask_index) -
Expand All @@ -141,8 +142,8 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
warp_valid_count += __popc(new_word);
}

mask_index += blockDim.x * gridDim.x;
active_mask = __ballot_sync(active_mask, mask_index < number_of_mask_bits);
tidx += blockDim.x * gridDim.x;
active_mask = __ballot_sync(active_mask, tidx < number_of_mask_bits);
}

using detail::single_lane_block_sum_reduce;
Expand Down Expand Up @@ -195,8 +196,8 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
auto const output_size = output_view.size();
auto* output_data = output_view.data<T>();

int64_t output_index = threadIdx.x + blockIdx.x * blockDim.x;
size_type warp_valid_count = 0;
cudf::thread_index_type output_index = threadIdx.x + blockIdx.x * blockDim.x;
size_type warp_valid_count = 0;

unsigned active_mask;
if (Nullable) { active_mask = __ballot_sync(0xFFFF'FFFFu, output_index < output_size); }
Expand Down