diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f060a164be..892979631fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,7 @@ - PR #3256 Fix orc writer crash with multiple string columns - PR #3211 Fix breaking change caused by rapidsai/rmm#167 - PR #3265 Fix dangling pointer in `is_sorted` +- PR #3267 ORC writer: fix incorrect ByteRLE encoding of long literal runs # cuDF 0.10.0 (16 Oct 2019) diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b1ab59d6822..72f38b74152 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -228,7 +228,7 @@ static __device__ uint32_t ByteRLE(orcenc_state_s *s, const uint8_t *inbuf, uint if (t < literal_run) { uint32_t run_id = t >> 7; - uint32_t run = (run_id == num_runs - 1) ? literal_run & 0x7f : 0x80; + uint32_t run = min(literal_run - run_id * 128, 128); if (!(t & 0x7f)) dst[run_id + t] = 0x100 - run; dst[run_id + t + 1] = (cid == CI_PRESENT) ? __brev(v0) >> 24 : v0; @@ -254,11 +254,10 @@ static __device__ uint32_t ByteRLE(orcenc_state_s *s, const uint8_t *inbuf, uint inpos += 130; repeat_run -= 130; } - if (!flush) + if (!flush && repeat_run == numvals) { // Wait for more data in case we can continue the run later - if (repeat_run == numvals && !flush) - break; + break; } if (repeat_run >= 3) {