From a08ec0e1b0609716e72d81b266eeb54670ed2e33 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)"
 <42624703+rgsl888prabhu@users.noreply.github.com>
Date: Mon, 15 Feb 2021 16:39:42 -0600
Subject: [PATCH] Fix skiprows issue with ORC Reader (#7359)

closes #7343

The validity bits in streams are placed msb to lsb in a byte, [True, False, True. False. True, True, True, False] -> 10101110.
So, when it is being analyzed as 32 bit chunk, we can't apply mask directly, which caused this issue. `__brev(__byte_perm(bits, 0, 0x0123)) ` takes care of that issue and rearranges the bits as per the expectation.

Authors:
  - Ram (Ramakrishna Prabhu) (@rgsl888prabhu)

Approvers:
  - GALI PREM SAGAR (@galipremsagar)
  - Vukasin Milovanovic (@vuule)

URL: https://github.com/rapidsai/cudf/pull/7359
---
 cpp/src/io/orc/stripe_data.cu      |  6 ++++--
 python/cudf/cudf/tests/test_orc.py | 25 +++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index 6f326fc0576..1af5e088c22 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -1209,8 +1209,10 @@ __global__ void __launch_bounds__(block_size)
         uint32_t skippedrows = min(static_cast<uint32_t>(first_row - row_in), nrows);
         uint32_t skip_count  = 0;
         for (uint32_t i = t * 32; i < skippedrows; i += 32 * 32) {
-          uint32_t bits = s->vals.u32[i >> 5];
-          if (i + 32 > skippedrows) { bits &= (1 << (skippedrows - i)) - 1; }
+          // Need to arrange the bytes to apply mask properly.
+          uint32_t bits = (i + 32 <= skippedrows) ? s->vals.u32[i >> 5]
+                                                  : (__byte_perm(s->vals.u32[i >> 5], 0, 0x0123) &
+                                                     (0xffffffffu << (0x20 - skippedrows + i)));
           skip_count += __popc(bits);
         }
         skip_count = warp_reduce(temp_storage.wr_storage[t / 32]).Sum(skip_count);
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 62696303ecb..ed91e909f25 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -8,6 +8,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.orc
+import pyorc
 import pytest
 
 import cudf
@@ -318,6 +319,30 @@ def test_orc_read_rows(datadir, skiprows, num_rows):
     np.testing.assert_allclose(pdf, gdf)
 
 
+def test_orc_read_skiprows(tmpdir):
+    buff = BytesIO()
+    df = pd.DataFrame(
+        {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]},
+        dtype=pd.BooleanDtype(),
+    )
+    writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
+    tuples = list(
+        map(
+            lambda x: (None,) if x[0] is pd.NA else x,
+            list(df.itertuples(index=False, name=None)),
+        )
+    )
+    writer.writerows(tuples)
+    writer.close()
+
+    skiprows = 10
+
+    expected = cudf.read_orc(buff)[skiprows::].reset_index(drop=True)
+    got = cudf.read_orc(buff, skiprows=skiprows)
+
+    assert_eq(expected, got)
+
+
 def test_orc_reader_uncompressed_block(datadir):
     path = datadir / "uncompressed_snappy.orc"
     try: