rapidsai · devavret · Jul 20, 2021 · Jul 20, 2021 · Jul 20, 2021 · Jul 20, 2021
@@ -188,6 +188,7 @@ __global__ void __launch_bounds__(block_size, 1)
                 return 4 + data_col.element<string_view>(val_idx).size_bytes();
               }
             case Type::FIXED_LEN_BYTE_ARRAY:
+              if (data_col.type().id() == type_id::DECIMAL128) { return 16; }
             default: cudf_assert(false && "Unsupported type for dictionary encoding"); return 0;
           }
         }();

@@ -176,7 +176,8 @@ __global__ void __launch_bounds__(block_size)
     }
   }
   dtype     = s->col.physical_type;
-  dtype_len = (dtype == INT96)                      ? 12
+  dtype_len = (dtype == FIXED_LEN_BYTE_ARRAY)       ? 16
+              : (dtype == INT96)                    ? 12
               : (dtype == INT64 || dtype == DOUBLE) ? 8
               : (dtype == BOOLEAN)                  ? 1
                                                     : 4;
@@ -878,7 +879,8 @@ __global__ void __launch_bounds__(128, 8)
   // Encode data values
   __syncthreads();
   dtype         = s->col.physical_type;
-  dtype_len_out = (dtype == INT96)                      ? 12
+  dtype_len_out = (dtype == FIXED_LEN_BYTE_ARRAY)       ? 16
+                  : (dtype == INT96)                    ? 12
                   : (dtype == INT64 || dtype == DOUBLE) ? 8
                   : (dtype == BOOLEAN)                  ? 1
                                                         : 4;
@@ -1087,6 +1089,29 @@ __global__ void __launch_bounds__(128, 8)
             dst[pos + 3] = v >> 24;
             if (v != 0) memcpy(dst + pos + 4, str.data(), v);
           } break;
+          case FIXED_LEN_BYTE_ARRAY: {
+            if (s->col.leaf_column->type().id() == type_id::DECIMAL128) {
+              // When using FIXED_LEN_BYTE_ARRAY for decimals, the rep is encoded in big-endian
+              auto v        = s->col.leaf_column->element<numeric::decimal128>(val_idx).value();
+              auto v_       = reinterpret_cast<char*>(&v);
+              dst[pos + 0]  = v_[15];
+              dst[pos + 1]  = v_[14];
+              dst[pos + 2]  = v_[13];
+              dst[pos + 3]  = v_[12];
+              dst[pos + 4]  = v_[11];
+              dst[pos + 5]  = v_[10];
+              dst[pos + 6]  = v_[9];
+              dst[pos + 7]  = v_[8];
+              dst[pos + 8]  = v_[7];
+              dst[pos + 9]  = v_[6];
+              dst[pos + 10] = v_[5];
+              dst[pos + 11] = v_[4];
+              dst[pos + 12] = v_[3];
+              dst[pos + 13] = v_[2];
+              dst[pos + 14] = v_[1];
+              dst[pos + 15] = v_[0];
+            }
+          } break;
         }
       }
       __syncthreads();

@@ -343,7 +343,9 @@ struct leaf_schema_fn {
       col_schema.type        = Type::INT64;
       col_schema.stats_dtype = statistics_dtype::dtype_decimal64;
     } else if (std::is_same_v<T, numeric::decimal128>) {
-      CUDF_FAIL("decimal128 currently not supported for parquet writer");
+      col_schema.type        = Type::FIXED_LEN_BYTE_ARRAY;
+      col_schema.type_length = 16;
+      col_schema.stats_dtype = statistics_dtype::dtype_decimal128;
     } else {
       CUDF_FAIL("Unsupported fixed point type for parquet writer");
     }
@@ -1208,8 +1210,9 @@ void writer::impl::write(table_view const& table)
   hostdevice_2dvector<gpu::EncColumnChunk> chunks(num_rowgroups, num_columns, stream);
   for (uint32_t r = 0, global_r = global_rowgroup_base, f = 0, start_row = 0; r < num_rowgroups;
        r++, global_r++) {
-    uint32_t fragments_in_chunk = (uint32_t)(
-      (md.row_groups[global_r].num_rows + max_page_fragment_size - 1) / max_page_fragment_size);
+    uint32_t fragments_in_chunk =
+      (uint32_t)((md.row_groups[global_r].num_rows + max_page_fragment_size - 1) /
+                 max_page_fragment_size);
     md.row_groups[global_r].total_byte_size = 0;
     md.row_groups[global_r].columns.resize(num_columns);
     for (int i = 0; i < num_columns; i++) {

@@ -463,6 +463,58 @@ TEST_F(ParquetWriterTest, MultiColumn)
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
 }
 
+TEST_F(ParquetWriterTest, DecimalColumns)
+{
+  constexpr auto num_rows = 5;
+
+  // auto col0_data = random_values<bool>(num_rows);
+  auto col6_vals = random_values<int32_t>(num_rows);
+  auto col7_vals = random_values<int64_t>(num_rows);
+  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal32{col6_vals[i], numeric::scale_type{5}};
+  });
+  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal64{col6_vals[i], numeric::scale_type{5}};
+  });
+  auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal128{i * 10000, numeric::scale_type{2}};
+  });
+  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  // column_wrapper<bool> col0{
+  //    col0_data.begin(), col0_data.end(), validity};
+  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, validity};
+  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, validity};
+  column_wrapper<numeric::decimal128> col8{col8_data, col8_data + num_rows, validity};
+
+  std::vector<std::unique_ptr<column>> cols;
+  // cols.push_back(col0.release());
+  cols.push_back(col6.release());
+  cols.push_back(col7.release());
+  cols.push_back(col8.release());
+  auto expected = std::make_unique<table>(std::move(cols));
+  EXPECT_EQ(3, expected->num_columns());
+
+  cudf_io::table_input_metadata expected_metadata(*expected);
+  // expected_metadata.column_metadata[0].set_name( "bools");
+  expected_metadata.column_metadata[0].set_name("decimal32s").set_decimal_precision(10);
+  expected_metadata.column_metadata[1].set_name("decimal64s").set_decimal_precision(10);
+  expected_metadata.column_metadata[2].set_name("decimal128s").set_decimal_precision(10);
+
+  auto filepath = ("MultiColumn.parquet");
+  cudf_io::parquet_writer_options out_opts =
+    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected->view())
+      .metadata(&expected_metadata);
+  cudf_io::write_parquet(out_opts);
+
+  cudf_io::parquet_reader_options in_opts =
+    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+  auto result = cudf_io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
 TEST_F(ParquetWriterTest, MultiColumnWithNulls)
 {
   constexpr auto num_rows = 100;