diff --git a/cpp/src/io/utilities/column_utils.cuh b/cpp/src/io/utilities/column_utils.cuh index 1faab805811..ecb74173a46 100644 --- a/cpp/src/io/utilities/column_utils.cuh +++ b/cpp/src/io/utilities/column_utils.cuh @@ -67,13 +67,17 @@ rmm::device_uvector create_leaf_column_device_views( size_type index) mutable { col_desc[index].parent_column = parent_col_view.begin() + index; column_device_view col = parent_col_view.column(index); - if (col_desc[index].stats_dtype != dtype_byte_array) { - // traverse till leaf column - while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { - col = (col.type().id() == type_id::LIST) - ? col.child(lists_column_view::child_column_index) - : col.child(0); + // traverse till leaf column + while (cudf::is_nested(col.type())) { + auto const child = (col.type().id() == type_id::LIST) + ? col.child(lists_column_view::child_column_index) + : col.child(0); + // stop early if writing a byte array + if (col_desc[index].stats_dtype == dtype_byte_array && + (child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8)) { + break; } + col = child; } // Store leaf_column to device storage column_device_view* leaf_col_ptr = leaf_columns.begin() + index; diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 0350bfe2981..774c58f1ecf 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -4250,6 +4250,9 @@ TEST_F(ParquetWriterTest, ByteArrayStats) read_footer(source, &fmd); + EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::Type::BYTE_ARRAY); + EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::Type::BYTE_ARRAY); + auto const stats0 = parse_statistics(fmd.row_groups[0].columns[0]); auto const stats1 = parse_statistics(fmd.row_groups[0].columns[1]);