From 6894263481c954c7874776b0bd0cde967cc97b73 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 22 Mar 2023 16:47:06 -0700
Subject: [PATCH 1/2] Fix set_nullability

---
 python/cudf/cudf/_lib/parquet.pyx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 59571b0e4b3..0ebda815ee9 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -698,7 +698,14 @@ cdef _set_col_metadata(
     column_in_metadata& col_meta,
     bool force_nullable_schema
 ):
-    col_meta.set_nullability(force_nullable_schema or col.nullable)
+    if force_nullable_schema or not col.nullable:
+        # only need to set/define `_nullable` in libcudf
+        # for two cases:
+        # 1. if `force_nullable_schema` is true.
+        # 2. if `force_nullable_schema` is false, and
+        # we want to enforce it to a column that has
+        # no-nulls. (A scenario for Chunked parquet writer).
+        col_meta.set_nullability(force_nullable_schema)
 
     if is_struct_dtype(col):
         for i, (child_col, name) in enumerate(

From 7a50f0de34dc70f5eae7ee0b7ada2eb94c37fdbe Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 24 Mar 2023 10:29:33 -0700
Subject: [PATCH 2/2] change force_nullable_schema to single writer only

---
 python/cudf/cudf/_lib/parquet.pyx      | 30 ++++++--------------------
 python/cudf/cudf/tests/test_parquet.py | 20 -----------------
 2 files changed, 6 insertions(+), 44 deletions(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 0ebda815ee9..923f5c4089f 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -470,16 +470,6 @@ cdef class ParquetWriter:
     max_page_size_rows: int, default 20000
         Maximum number of rows of each page of the output.
         By default, 20000 will be used.
-    force_nullable_schema : bool, default True.
-        If True, writes all columns as `null` in schema.
-        If False, columns are written as `null` if they contain null values,
-        otherwise as `not null`.
-
-    Notes
-    -----
-    `DataFrame.to_parquet` and `ParquetWriter` differ in the default
-    value for `force_nullable_schema` to enable all the chunks being
-    written by chunked parquet writer to be schema identical.
 
     See Also
     --------
@@ -497,15 +487,13 @@ cdef class ParquetWriter:
     cdef size_type row_group_size_rows
     cdef size_t max_page_size_bytes
     cdef size_type max_page_size_rows
-    cdef bool force_nullable_schema
 
     def __cinit__(self, object filepath_or_buffer, object index=None,
                   object compression="snappy", str statistics="ROWGROUP",
                   int row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT,
                   int row_group_size_rows=1000000,
                   int max_page_size_bytes=524288,
-                  int max_page_size_rows=20000,
-                  bool force_nullable_schema=True):
+                  int max_page_size_rows=20000):
         filepaths_or_buffers = (
             list(filepath_or_buffer)
             if is_list_like(filepath_or_buffer)
@@ -520,7 +508,6 @@ cdef class ParquetWriter:
         self.row_group_size_rows = row_group_size_rows
         self.max_page_size_bytes = max_page_size_bytes
         self.max_page_size_rows = max_page_size_rows
-        self.force_nullable_schema = force_nullable_schema
 
     def write_table(self, table, object partitions_info=None):
         """ Writes a single table to the file """
@@ -615,7 +602,6 @@ cdef class ParquetWriter:
             _set_col_metadata(
                 table[name]._column,
                 self.tbl_meta.get().column_metadata[i],
-                self.force_nullable_schema
             )
 
         index = (
@@ -696,16 +682,12 @@ cdef cudf_io_types.compression_type _get_comp_type(object compression):
 cdef _set_col_metadata(
     Column col,
     column_in_metadata& col_meta,
-    bool force_nullable_schema
+    bool force_nullable_schema=False,
 ):
-    if force_nullable_schema or not col.nullable:
-        # only need to set/define `_nullable` in libcudf
-        # for two cases:
-        # 1. if `force_nullable_schema` is true.
-        # 2. if `force_nullable_schema` is false, and
-        # we want to enforce it to a column that has
-        # no-nulls. (A scenario for Chunked parquet writer).
-        col_meta.set_nullability(force_nullable_schema)
+    if force_nullable_schema:
+        # Only set nullability if `force_nullable_schema`
+        # is true.
+        col_meta.set_nullability(True)
 
     if is_struct_dtype(col):
         for i, (child_col, name) in enumerate(
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 9b783b03dad..c24ff080033 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2788,23 +2788,3 @@ def test_parquet_writer_schema_nullability(data, force_nullable_schema):
     assert pa.parquet.read_schema(file_obj).field(0).nullable == (
         force_nullable_schema or df.isnull().any().any()
     )
-
-
-@pytest.mark.parametrize("data", [{"a": [1, 2, 3, 4]}, {"b": [1, None, 2, 3]}])
-@pytest.mark.parametrize("force_nullable_schema", [True, False])
-def test_parquet_chunked_writer_schema_nullability(
-    data, force_nullable_schema
-):
-    df = cudf.DataFrame(data)
-    file_obj = BytesIO()
-
-    writer = ParquetWriter(
-        file_obj, force_nullable_schema=force_nullable_schema
-    )
-
-    writer.write_table(df)
-
-    writer.close()
-    assert pa.parquet.read_schema(file_obj).field(0).nullable == (
-        force_nullable_schema or df.isnull().any().any()
-    )