rapidsai · rapids-bot · Dec 7, 2021 · Nov 30, 2021 · Dec 1, 2021 · Dec 1, 2021
@@ -7,6 +7,7 @@
 from uuid import uuid4
 
 import fsspec
+import pyarrow as pa
 from pyarrow import dataset as ds, parquet as pq
 
 import cudf
@@ -411,6 +412,25 @@ def read_parquet(
             filepaths_or_buffers.append(tmp_source)
 
     if engine == "cudf":
+        # Temporary error to probe a parquet file
+        # and raise decimal128 support error.
+        if len(filepaths_or_buffers) > 0:
+            try:
+                metadata = pq.read_metadata(filepaths_or_buffers[0])
+            except TypeError:
+                pass
+            else:
+                arrow_types = metadata.schema.to_arrow_schema().types
+                for arrow_type in arrow_types:
+                    if isinstance(arrow_type, pa.ListType):
+                        val_field_types = arrow_type.value_field.flatten()
+                        for val_field_type in val_field_types:
+                            _check_decimal128_type(val_field_type.type)
+                    elif isinstance(arrow_type, pa.StructType):
+                        _ = cudf.StructDtype.from_arrow(arrow_type)
+                    else:
+                        _check_decimal128_type(arrow_type)
+
         return libparquet.read_parquet(
             filepaths_or_buffers,
             columns=columns,
@@ -529,3 +549,11 @@ def merge_parquet_filemetadata(filemetadata_list):
 
 
 ParquetWriter = libparquet.ParquetWriter
+
+
+def _check_decimal128_type(arrow_type):
+    if isinstance(arrow_type, pa.Decimal128Type):
+        if arrow_type.precision > cudf.Decimal64Dtype.MAX_PRECISION:
+            raise NotImplementedError(
+                "Decimal type greater than Decimal64 is not " "yet supported"
+            )
@@ -629,15 +629,28 @@ def test_parquet_reader_spark_timestamps(datadir):
 def test_parquet_reader_spark_decimals(datadir):
     fname = datadir / "spark_decimal.parquet"
 
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
+    # expect = pd.read_parquet(fname)
+    with pytest.raises(
+        NotImplementedError,
+        match="Decimal type greater than Decimal64 is not yet supported",
+    ):
+        _ = cudf.read_parquet(fname)
 
     # Convert the decimal dtype from PyArrow to float64 for comparison to cuDF
     # This is because cuDF returns as float64 as it lacks an equivalent dtype
-    expect = expect.apply(pd.to_numeric)
+    # expect = expect.apply(pd.to_numeric)
 
     # np.testing.assert_allclose(expect, got)
-    assert_eq(expect, got)
+    # assert_eq(expect, got)
+
+
+def test_parquet_reader_decimal128_error_validation(datadir):
+    fname = datadir / "nested_decimal128_file.parquet"
+    with pytest.raises(
+        NotImplementedError,
+        match="Decimal type greater than Decimal64 is not yet supported",
+    ):
+        cudf.read_parquet(fname)
 
 
 def test_parquet_reader_microsecond_timestamps(datadir):