From 411564b4234bf7ff95d2f1834fb458ad66906581 Mon Sep 17 00:00:00 2001
From: Mike Wilson <knobby@burntsheep.com>
Date: Wed, 4 May 2022 04:50:22 +0000
Subject: [PATCH 1/3] simplifying skiprows test

---
 python/cudf/cudf/tests/test_orc.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index c28358f5fa0..460557370ad 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -301,27 +301,26 @@ def test_orc_read_rows(datadir, skiprows, num_rows):
     assert_eq(pdf, gdf)
 
 
-def test_orc_read_skiprows(tmpdir):
+def test_orc_read_skiprows():
     buff = BytesIO()
-    df = pd.DataFrame(
-        {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]},
-        dtype=pd.BooleanDtype(),
-    )
+    data = [
+        True,
+        None,
+        True,
+        False,
+        None,
+        True,
+        True,
+        False,
+    ]
     writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
-    tuples = list(
-        map(
-            lambda x: (None,) if x[0] is pd.NA else (bool(x[0]),),
-            list(df.itertuples(index=False, name=None)),
-        )
-    )
-    writer.writerows(tuples)
+    writer.writerows([(d,) for d in data])
     writer.close()
 
-    skiprows = 10
+    skiprows = 3
 
-    expected = cudf.read_orc(buff)[skiprows::].reset_index(drop=True)
+    expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True)
     got = cudf.read_orc(buff, skiprows=skiprows)
-
     assert_eq(expected, got)
 
 

From 3fe49546f80e5710e7886ec0589b5c0c4a85470a Mon Sep 17 00:00:00 2001
From: Mike Wilson <knobby@burntsheep.com>
Date: Thu, 5 May 2022 00:54:35 +0000
Subject: [PATCH 2/3] updating from review comments

---
 python/cudf/cudf/tests/test_orc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 460557370ad..2b5f0972afe 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -317,7 +317,9 @@ def test_orc_read_skiprows():
     writer.writerows([(d,) for d in data])
     writer.close()
 
-    skiprows = 3
+    # testing 10 skiprows due to a boolean specific bug fix that didn't
+    # repro for other sizes of data
+    skiprows = 10
 
     expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True)
     got = cudf.read_orc(buff, skiprows=skiprows)

From e9425f685e7873f758758ccad01bc6827b521e31 Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Wed, 4 May 2022 22:05:51 -0400
Subject: [PATCH 3/3] Updating test data to match original test

---
 python/cudf/cudf/tests/test_orc.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 2b5f0972afe..e94888fc770 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -305,13 +305,21 @@ def test_orc_read_skiprows():
     buff = BytesIO()
     data = [
         True,
-        None,
+        False,
         True,
         False,
         None,
         True,
         True,
+        True,
+        False,
+        None,
         False,
+        False,
+        True,
+        True,
+        True,
+        True,
     ]
     writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
     writer.writerows([(d,) for d in data])