From d07f581c7669eb9cd17a5e1a0b2e7d07d52b1ad6 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 11 Dec 2020 15:41:07 -0500 Subject: [PATCH] Address PR comments --- CHANGELOG.md | 2 -- .../cudf/column/column_device_view.cuh | 23 +++++++++++++++ cpp/src/bitmask/null_mask.cu | 28 ++----------------- cpp/src/io/orc/stripe_enc.cu | 14 +++------- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 344e2a2a609..2064f0ea04d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,8 +18,6 @@ ## Bug Fixes -- PR #6889 Fix nullmask offset handling in parquet and orc writer - - PR #6922 Fix N/A detection for empty fields in CSV reader - PR #6912 Fix rmm_mode=managed parameter for gtests - PR #6945 Fix groupby agg/apply behaviour when no key columns are provided diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 0f9bcfd5cd9..a41edea6f6c 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -795,6 +795,29 @@ __device__ inline numeric::decimal64 const column_device_view::element + word_index(source_begin_bit + + destination_word_index * detail::size_in_bits())) { + next_word = source[source_word_index + 1]; + } + return __funnelshift_r(curr_word, next_word, source_begin_bit); +} + /** * @brief value accessor of column without null bitmask * A unary functor returns scalar value at `id`. diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 67a2b25cf2f..6a6e3f3f9f0 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -224,28 +224,6 @@ __global__ void count_set_bits_kernel(bitmask_type const *bitmask, if (threadIdx.x == 0) { atomicAdd(global_count, block_count); } } -/** - * @brief Convenience function to get offset word from a bitmask - * - * @see copy_offset_bitmask - * @see offset_bitmask_and - */ -__device__ bitmask_type get_mask_offset_word(bitmask_type const *__restrict__ source, - size_type destination_word_index, - size_type source_begin_bit, - size_type source_end_bit) -{ - size_type source_word_index = destination_word_index + word_index(source_begin_bit); - bitmask_type curr_word = source[source_word_index]; - bitmask_type next_word = 0; - if (word_index(source_end_bit) > - word_index(source_begin_bit + - destination_word_index * detail::size_in_bits())) { - next_word = source[source_word_index + 1]; - } - return __funnelshift_r(curr_word, next_word, source_begin_bit); -} - /** * For each range `[first_bit_indices[i], last_bit_indices[i])` * (where 0 <= i < `num_ranges`), count the number of bits set outside the range @@ -332,8 +310,8 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; destination_word_index < number_of_mask_words; destination_word_index += blockDim.x * gridDim.x) { - destination[destination_word_index] = - get_mask_offset_word(source, destination_word_index, source_begin_bit, source_end_bit); + destination[destination_word_index] = detail::get_mask_offset_word( + source, destination_word_index, source_begin_bit, source_end_bit); } } @@ -360,7 +338,7 @@ __global__ void offset_bitmask_and(bitmask_type *__restrict__ destination, destination_word_index += blockDim.x * gridDim.x) { bitmask_type destination_word = ~bitmask_type{0}; // All bits 1 for (size_type i = 0; i < num_sources; i++) { - destination_word &= get_mask_offset_word( + destination_word &= detail::get_mask_offset_word( source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size); } diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 49085314762..4fd5b765d11 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include "orc_common.h" @@ -715,16 +717,8 @@ __global__ void __launch_bounds__(block_size) size_type current_valid_offset = row + s->chunk.column_offset; size_type next_valid_offset = current_valid_offset + min(32, s->chunk.valid_rows); - size_type current_byte_index = current_valid_offset / 32; - size_type next_byte_index = next_valid_offset / 32; - - bitmask_type current_mask_word = s->chunk.valid_map_base[current_byte_index]; - bitmask_type next_mask_word = 0; - if (next_byte_index != current_byte_index) { - next_mask_word = s->chunk.valid_map_base[next_byte_index]; - } - bitmask_type mask = - __funnelshift_r(current_mask_word, next_mask_word, current_valid_offset); + bitmask_type mask = cudf::detail::get_mask_offset_word( + s->chunk.valid_map_base, 0, current_valid_offset, next_valid_offset); valid = 0xff & mask; } else { valid = 0xff;