From b8baed53e8e9ed9560973a118d7a8bdc09485bae Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Mon, 19 Apr 2021 10:46:11 -0500 Subject: [PATCH] Fix ORC reader issue with bystream reader (#7988) Resolves issue with ORC reader. There were two issues, There was a missing check to keep number of streams that needs to be accessed. The position which was being used to calculate buffer length was wrong, and assigned non-zero value for a stream whose length is zero. Authors: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) Approvers: - Ayush Dattagupta (https://github.com/ayushdg) - Paul Taylor (https://github.com/trxcllnt) - Devavret Makkar (https://github.com/devavret) URL: https://github.com/rapidsai/cudf/pull/7988 --- cpp/src/io/orc/stripe_data.cu | 4 ++-- cpp/src/io/orc/stripe_init.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index cd031af0dfb..4abccb5bf25 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -149,9 +149,9 @@ static __device__ void bytestream_init(volatile orc_bytestream_s *bs, const uint8_t *base, uint32_t len) { - uint32_t pos = static_cast(7 & reinterpret_cast(base)); + uint32_t pos = (len > 0) ? static_cast(7 & reinterpret_cast(base)) : 0; bs->base = base - pos; - bs->pos = (len > 0) ? pos : 0; + bs->pos = pos; bs->len = (len + pos + 7) & ~7; bs->fill_pos = 0; bs->fill_count = min(bs->len, bytestream_buffer_size) >> 3; diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 61917403b41..42cb15a56b7 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -40,7 +40,7 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat int strm_id = blockIdx.x * 4 + (threadIdx.x / 32); int lane_id = threadIdx.x % 32; - if (lane_id == 0) { s->info = strm_info[strm_id]; } + if (strm_id < num_streams && lane_id == 0) { s->info = strm_info[strm_id]; } __syncthreads(); if (strm_id < num_streams) {