From 1f98a3a0a88bb136b0223cfd6730f7f83592e6ce Mon Sep 17 00:00:00 2001 From: Olivier Lapicque Date: Mon, 6 May 2019 16:14:59 -0700 Subject: [PATCH 1/3] Fix potentially uninitialized tail rows when skiprow!=0 --- cpp/src/io/orc/stripe_data.cu | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index f5cf5c6ac1b..e83cc1e92c4 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1376,8 +1376,16 @@ static __device__ void DecodeRowPositions(orcdec_state_s *s, size_t first_row, i { if (t == 0) { - s->u.rowdec.nz_count = min(min(s->chunk.skip_count, s->top.data.max_vals), NTHREADS); - s->chunk.skip_count -= s->u.rowdec.nz_count; + if (s->chunk.skip_count != 0) + { + s->u.rowdec.nz_count = min(min(s->chunk.skip_count, s->top.data.max_vals), NTHREADS); + s->chunk.skip_count -= s->u.rowdec.nz_count; + s->top.data.nrows = s->u.rowdec.nz_count; + } + else + { + s->u.rowdec.nz_count = 0; + } } __syncthreads(); if (t < s->u.rowdec.nz_count) @@ -1576,7 +1584,7 @@ gpuDecodeOrcColumnData(ColumnDesc *chunks, DictionaryEntry *global_dictionary, i __syncthreads(); if (t == 0) { - s->top.data.cur_row = s->chunk.start_row; + s->top.data.cur_row = min(s->chunk.start_row, max((int32_t)(first_row - s->chunk.skip_count), 0)); s->top.data.end_row = s->chunk.start_row + s->chunk.num_rows; s->top.data.buffered_count = 0; if (s->top.data.end_row > first_row + max_num_rows) From 89426d04e899c59ef71fff3487a83d696570d390 Mon Sep 17 00:00:00 2001 From: Olivier Lapicque Date: Mon, 6 May 2019 16:20:42 -0700 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9515c390de..85c836a518e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -135,6 +135,7 @@ - PR #1618 ORC reader: fix assert & data output when nrows/skiprows isn't aligned to stripe boundaries - PR #1631 Fix failure of TYPES_TEST on some gcc-7 based systems. - PR #1641 CSV Reader: Fix skip_blank_lines behavior with Windows line terminators (\r\n) +- PR #1648 ORC reader: fix non-deterministic output when skiprows is non-zero # cuDF 0.6.1 (25 Mar 2019) From 17df0c16a13adb815b00c95ceae83ad2ac398b72 Mon Sep 17 00:00:00 2001 From: Olivier Lapicque Date: Mon, 6 May 2019 16:53:45 -0700 Subject: [PATCH 3/3] Multi-stripe starting row fix --- cpp/src/io/orc/stripe_data.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index e83cc1e92c4..6375338a907 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1584,7 +1584,7 @@ gpuDecodeOrcColumnData(ColumnDesc *chunks, DictionaryEntry *global_dictionary, i __syncthreads(); if (t == 0) { - s->top.data.cur_row = min(s->chunk.start_row, max((int32_t)(first_row - s->chunk.skip_count), 0)); + s->top.data.cur_row = max(s->chunk.start_row, max((int32_t)(first_row - s->chunk.skip_count), 0)); s->top.data.end_row = s->chunk.start_row + s->chunk.num_rows; s->top.data.buffered_count = 0; if (s->top.data.end_row > first_row + max_num_rows)