From 411564b4234bf7ff95d2f1834fb458ad66906581 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Wed, 4 May 2022 04:50:22 +0000 Subject: [PATCH 1/3] simplifying skiprows test --- python/cudf/cudf/tests/test_orc.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index c28358f5fa0..460557370ad 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -301,27 +301,26 @@ def test_orc_read_rows(datadir, skiprows, num_rows): assert_eq(pdf, gdf) -def test_orc_read_skiprows(tmpdir): +def test_orc_read_skiprows(): buff = BytesIO() - df = pd.DataFrame( - {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]}, - dtype=pd.BooleanDtype(), - ) + data = [ + True, + None, + True, + False, + None, + True, + True, + False, + ] writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean())) - tuples = list( - map( - lambda x: (None,) if x[0] is pd.NA else (bool(x[0]),), - list(df.itertuples(index=False, name=None)), - ) - ) - writer.writerows(tuples) + writer.writerows([(d,) for d in data]) writer.close() - skiprows = 10 + skiprows = 3 - expected = cudf.read_orc(buff)[skiprows::].reset_index(drop=True) + expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True) got = cudf.read_orc(buff, skiprows=skiprows) - assert_eq(expected, got) From 3fe49546f80e5710e7886ec0589b5c0c4a85470a Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Thu, 5 May 2022 00:54:35 +0000 Subject: [PATCH 2/3] updating from review comments --- python/cudf/cudf/tests/test_orc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 460557370ad..2b5f0972afe 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -317,7 +317,9 @@ def test_orc_read_skiprows(): writer.writerows([(d,) for d in data]) writer.close() - skiprows = 3 + # testing 10 skiprows due to a boolean specific bug fix that didn't + # repro for other sizes of data + skiprows = 10 expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True) got = cudf.read_orc(buff, skiprows=skiprows) From e9425f685e7873f758758ccad01bc6827b521e31 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Wed, 4 May 2022 22:05:51 -0400 Subject: [PATCH 3/3] Updating test data to match original test --- python/cudf/cudf/tests/test_orc.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 2b5f0972afe..e94888fc770 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -305,13 +305,21 @@ def test_orc_read_skiprows(): buff = BytesIO() data = [ True, - None, + False, True, False, None, True, True, + True, + False, + None, False, + False, + True, + True, + True, + True, ] writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean())) writer.writerows([(d,) for d in data])