rapidsai · rapids-bot · Mar 23, 2021 · Mar 22, 2021
@@ -70,6 +70,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         column_in_metadata& set_nullability(bool nullable)
         column_in_metadata& set_list_column_as_map()
         column_in_metadata& set_int96_timestamps(bool req)
+        column_in_metadata& set_decimal_precision(uint8_t precision)
         column_in_metadata& child(size_type i)
 
     cdef cppclass table_input_metadata:

@@ -20,7 +20,8 @@ from cudf.utils.dtypes import (
     np_to_pa_dtype,
     is_categorical_dtype,
     is_list_dtype,
-    is_struct_dtype
+    is_struct_dtype,
+    is_decimal_dtype,
 )
 
 from cudf._lib.utils cimport get_column_names
@@ -310,7 +311,7 @@ cpdef write_parquet(
 
     for i, name in enumerate(table._column_names, num_index_cols_meta):
         tbl_meta.get().column_metadata[i].set_name(name.encode())
-        _set_col_children_names(
+        _set_col_metadata(
             table[name]._column, tbl_meta.get().column_metadata[i]
         )
 
@@ -448,7 +449,7 @@ cdef class ParquetWriter:
 
         for i, name in enumerate(table._column_names, num_index_cols_meta):
             self.tbl_meta.get().column_metadata[i].set_name(name.encode())
-            _set_col_children_names(
+            _set_col_metadata(
                 table[name]._column, self.tbl_meta.get().column_metadata[i]
             )
 
@@ -546,14 +547,16 @@ cdef Column _update_column_struct_field_names(
         col.set_base_children(tuple(children))
     return col
 
-cdef _set_col_children_names(Column col, column_in_metadata& col_meta):
+cdef _set_col_metadata(Column col, column_in_metadata& col_meta):
     if is_struct_dtype(col):
         for i, (child_col, name) in enumerate(
             zip(col.children, list(col.dtype.fields))
         ):
             col_meta.child(i).set_name(name.encode())
-            _set_col_children_names(child_col, col_meta.child(i))
+            _set_col_metadata(child_col, col_meta.child(i))
     elif is_list_dtype(col):
-        _set_col_children_names(col.children[1], col_meta.child(1))
+        _set_col_metadata(col.children[1], col_meta.child(1))
     else:
+        if is_decimal_dtype(col):
+            col_meta.set_decimal_precision(col.dtype.precision)
         return
@@ -23,6 +23,7 @@ from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_list_dtype,
     is_struct_dtype,
+    is_decimal_dtype,
 )
 
 
@@ -80,7 +81,11 @@ cpdef generate_pandas_metadata(Table table, index):
                 "'category' column dtypes are currently not "
                 + "supported by the gpu accelerated parquet writer"
             )
-        elif is_list_dtype(col) or is_struct_dtype(col):
+        elif (
+            is_list_dtype(col)
+            or is_struct_dtype(col)
+            or is_decimal_dtype(col)
+        ):
             types.append(col.dtype.to_arrow())
         else:
             types.append(np_to_pa_dtype(col.dtype))

@@ -1920,3 +1920,18 @@ def test_parquet_writer_nested(tmpdir, data):
 
     got = pd.read_parquet(fname)
     assert_eq(expect, got)
+
+
+def test_parquet_writer_decimal(tmpdir):
+    from cudf.core.dtypes import Decimal64Dtype
+
+    gdf = cudf.DataFrame({"val": [0.00, 0.01, 0.02]})
+
+    gdf["dec_val"] = gdf["val"].astype(Decimal64Dtype(7, 2))
+
+    fname = tmpdir.join("test_parquet_writer_decimal.parquet")
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    assert_eq(gdf, got)