From 400883f17194fd583ddcadc2d3a976d46a8173e4 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 31 Mar 2021 08:03:20 -0700 Subject: [PATCH] add mixed column names with non-string types validation in parquet writer --- python/cudf/cudf/_lib/parquet.pyx | 3 +++ python/cudf/cudf/tests/test_parquet.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index d8b4fbbbe4b..4ea2adec23a 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -312,6 +312,9 @@ cpdef write_parquet( num_index_cols_meta = 0 for i, name in enumerate(table._column_names, num_index_cols_meta): + if not isinstance(name, str): + raise ValueError("parquet must have string column names") + tbl_meta.get().column_metadata[i].set_name(name.encode()) _set_col_metadata( table[name]._column, tbl_meta.get().column_metadata[i] diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index fe418d1ade1..4781ff995b0 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -19,7 +19,7 @@ import cudf from cudf.io.parquet import ParquetWriter, merge_parquet_filemetadata from cudf.tests import dataset_generator as dg -from cudf.tests.utils import assert_eq +from cudf.tests.utils import assert_eq, assert_exceptions_equal @pytest.fixture(scope="module") @@ -1937,3 +1937,15 @@ def test_parquet_writer_decimal(tmpdir): got = pd.read_parquet(fname) assert_eq(gdf, got) + + +def test_parquet_writer_column_validation(): + df = cudf.DataFrame({1: [1, 2, 3], "1": ["a", "b", "c"]}) + pdf = df.to_pandas() + + assert_exceptions_equal( + lfunc=df.to_parquet, + rfunc=pdf.to_parquet, + lfunc_args_and_kwargs=(["cudf.parquet"],), + rfunc_args_and_kwargs=(["pandas.parquet"],), + )