From 39f7ff413cef2e946b03c9ee5ea908bedc6591ee Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 18 Jan 2022 08:37:14 -0800
Subject: [PATCH 1/2] fix columns ordering issue

---
 python/cudf/cudf/_lib/parquet.pyx      | 13 ++++++++++---
 python/cudf/cudf/tests/test_parquet.py | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 16873435e1d..b9d4e07ec07 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -200,12 +200,19 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
 
     update_struct_field_names(df, c_out_table.metadata.schema_info)
 
-    # update the decimal precision of each column
     if meta is not None:
-        for col, col_meta in zip(column_names, meta["columns"]):
+        # Book keep each column metadata as the order
+        # of `meta["columns"]` and `column_names` are not
+        # guaranteed to be deterministic and same always.
+        meta_data_per_column = {}
+        for col_meta in meta["columns"]:
+            meta_data_per_column[col_meta['name']] = col_meta
+
+        # update the decimal precision of each column
+        for col in column_names:
             if is_decimal_dtype(df._data[col].dtype):
                 df._data[col].dtype.precision = (
-                    col_meta["metadata"]["precision"]
+                    meta_data_per_column[col]["metadata"]["precision"]
                 )
 
     # Set the index column
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 016ed1229f1..2b6ddea447b 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2373,3 +2373,21 @@ def test_parquet_writer_row_group_size(
         math.ceil(num_rows / size_rows), math.ceil(8 * num_rows / size_bytes)
     )
     assert expected_num_rows == row_groups
+
+
+def test_parquet_reader_decimal_columns():
+    df = cudf.DataFrame(
+        {
+            "col1": cudf.Series([1, 2, 3], dtype=cudf.Decimal64Dtype(10, 2)),
+            "col2": [10, 11, 12],
+            "col3": [12, 13, 14],
+            "col4": ["a", "b", "c"],
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+
+    actual = cudf.read_parquet(buffer, columns=["col3", "col2", "col1"])
+    expected = pd.read_parquet(buffer, columns=["col3", "col2", "col1"])
+
+    assert_eq(actual, expected)

From 478942e8442238ae5946146b1d57412007cf04df Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 18 Jan 2022 08:59:31 -0800
Subject: [PATCH 2/2] simplify

---
 python/cudf/cudf/_lib/parquet.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index b9d4e07ec07..8cb7dd942c1 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -204,9 +204,9 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
         # Book keep each column metadata as the order
         # of `meta["columns"]` and `column_names` are not
         # guaranteed to be deterministic and same always.
-        meta_data_per_column = {}
-        for col_meta in meta["columns"]:
-            meta_data_per_column[col_meta['name']] = col_meta
+        meta_data_per_column = {
+            col_meta['name']: col_meta for col_meta in meta["columns"]
+        }
 
         # update the decimal precision of each column
         for col in column_names: