diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc new file mode 100644 index 00000000000..1c661e1c6f0 Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc differ diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc new file mode 100644 index 00000000000..edc1094a186 Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc differ diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc new file mode 100644 index 00000000000..fe5f57af14c Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc differ diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc new file mode 100644 index 00000000000..53c323436d6 Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc differ diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc new file mode 100644 index 00000000000..1bb4079c492 Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc differ diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc new file mode 100644 index 00000000000..a457b8285bd Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc differ diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 1699c11617a..48f1a49b7f4 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1815,3 +1815,22 @@ def test_statistics_string_sum(): file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff]) assert_eq(file_stats[0]["str"].get("sum"), sum(len(s) for s in strings)) + + +@pytest.mark.parametrize( + "fname", + [ + "TestOrcFile.Hive.OneEmptyMap.orc", + "TestOrcFile.Hive.OneEmptyList.orc", + "TestOrcFile.Hive.OneNullStruct.orc", + "TestOrcFile.Hive.EmptyListStripe.orc", + "TestOrcFile.Hive.NullStructStripe.orc", + "TestOrcFile.Hive.AllNulls.orc", + ], +) +def test_reader_empty_stripe(datadir, fname): + path = datadir / fname + + expected = pd.read_orc(path) + got = cudf.read_orc(path) + assert_eq(expected, got)