From c05dbed52fdd15757e40463a64ce757d6cd21b46 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 31 Mar 2021 12:32:09 -0500
Subject: [PATCH] Add column names validation in parquet writer (#7786)

Fixes: #7738

Parquet writer requires all column names to be of string types, added a validation similar to that of pandas.

Authors:
  - GALI PREM SAGAR (@galipremsagar)

Approvers:
  - Michael Wang (@isVoid)
  - Keith Kraus (@kkraus14)

URL: https://github.com/rapidsai/cudf/pull/7786
---
 python/cudf/cudf/_lib/parquet.pyx      |  3 +++
 python/cudf/cudf/tests/test_parquet.py | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index d8b4fbbbe4b..4ea2adec23a 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -312,6 +312,9 @@ cpdef write_parquet(
         num_index_cols_meta = 0
 
     for i, name in enumerate(table._column_names, num_index_cols_meta):
+        if not isinstance(name, str):
+            raise ValueError("parquet must have string column names")
+
         tbl_meta.get().column_metadata[i].set_name(name.encode())
         _set_col_metadata(
             table[name]._column, tbl_meta.get().column_metadata[i]
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index fe418d1ade1..4781ff995b0 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -19,7 +19,7 @@
 import cudf
 from cudf.io.parquet import ParquetWriter, merge_parquet_filemetadata
 from cudf.tests import dataset_generator as dg
-from cudf.tests.utils import assert_eq
+from cudf.tests.utils import assert_eq, assert_exceptions_equal
 
 
 @pytest.fixture(scope="module")
@@ -1937,3 +1937,15 @@ def test_parquet_writer_decimal(tmpdir):
 
     got = pd.read_parquet(fname)
     assert_eq(gdf, got)
+
+
+def test_parquet_writer_column_validation():
+    df = cudf.DataFrame({1: [1, 2, 3], "1": ["a", "b", "c"]})
+    pdf = df.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=df.to_parquet,
+        rfunc=pdf.to_parquet,
+        lfunc_args_and_kwargs=(["cudf.parquet"],),
+        rfunc_args_and_kwargs=(["pandas.parquet"],),
+    )