diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index d26cf19deaf..972a93e55ec 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -138,14 +138,6 @@ cdef Column update_column_struct_field_names( ): cdef vector[string] field_names - if is_struct_dtype(col): - field_names.reserve(len(col.base_children)) - for i in range(info.children.size()): - field_names.push_back(info.children[i].name) - col = col._rename_fields( - field_names - ) - if col.children: children = list(col.children) for i, child in enumerate(children): @@ -154,4 +146,13 @@ cdef Column update_column_struct_field_names( info.children[i] ) col.set_base_children(tuple(children)) + + if is_struct_dtype(col): + field_names.reserve(len(col.base_children)) + for i in range(info.children.size()): + field_names.push_back(info.children[i].name) + col = col._rename_fields( + field_names + ) + return col diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.NestedStructDataFrame.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.NestedStructDataFrame.orc new file mode 100644 index 00000000000..6cfb2238150 Binary files /dev/null and b/python/cudf/cudf/tests/data/orc/TestOrcFile.NestedStructDataFrame.orc differ diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 0f769d17015..efa4359c68e 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1185,7 +1185,21 @@ def test_writer_timestamp_stream_size(datadir, tmpdir): def test_no_row_group_index_orc_read(datadir, fname): fpath = datadir / fname - got = pa.orc.ORCFile(fpath).read() - expect = cudf.read_orc(fpath) + expect = pa.orc.ORCFile(fpath).read() + got = cudf.read_orc(fpath) - assert got.equals(expect.to_arrow()) + assert expect.equals(got.to_arrow()) + + +def test_names_in_struct_dtype_nesting(datadir): + fname = datadir / "TestOrcFile.NestedStructDataFrame.orc" + + expect = pa.orc.ORCFile(fname).read() + got = cudf.read_orc(fname) + + # test dataframes + assert expect.equals(got.to_arrow()) + + edf = cudf.DataFrame(expect.to_pandas()) + # test schema + assert edf.dtypes.equals(got.dtypes)