From 08e119bdf5e47840e0f021f829e9837e75a8be48 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 3 Jun 2021 16:56:39 -0700
Subject: [PATCH 01/13] create pull request

---
 python/cudf/cudf/core/dtypes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index f0b0dbba4a5..d5b78a872df 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -237,6 +237,8 @@ def __repr__(self):
     def __hash__(self):
         return hash(self._typ)
 
+class Decimal32Dtype(_BaseDtype):
+    pass
 
 class Decimal64Dtype(_BaseDtype):
 

From ad4a10b72c0418b17e6dc2d0c8ac2a4e64f4f7f9 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 3 Jun 2021 17:03:03 -0700
Subject: [PATCH 02/13] create pull request

---
 python/cudf/cudf/core/dtypes.py | 100 +++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index d5b78a872df..41aa87a24bf 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -238,7 +238,105 @@ def __hash__(self):
         return hash(self._typ)
 
 class Decimal32Dtype(_BaseDtype):
-    pass
+
+    name = "decimal"
+    _metadata = ("precision", "scale")
+    MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max))
+
+    def __init__(self, precision, scale=0):
+        """
+        Parameters
+        ----------
+        precision : int
+            The total number of digits in each value of this dtype
+        scale : int, optional
+            The scale of the Decimal64Dtype. See Notes below.
+
+        Notes
+        -----
+            When the scale is positive:
+              - numbers with fractional parts (e.g., 0.0042) can be represented
+              - the scale is the total number of digits to the right of the
+                decimal point
+            When the scale is negative:
+              - only multiples of powers of 10 (including 10**0) can be
+                represented (e.g., 1729, 4200, 1000000)
+              - the scale represents the number of trailing zeros in the value.
+            For example, 42 is representable with precision=2 and scale=0.
+            13.0051 is representable with precision=6 and scale=4,
+            and *not* representable with precision<6 or scale<4.
+        """
+        self._validate(precision, scale)
+        self._typ = pa.decimal128(precision, scale)
+
+    @property
+    def str(self):
+        return f"decimal32({self.precision}, {self.scale})"
+
+    @property
+    def precision(self):
+        return self._typ.precision
+
+    @precision.setter
+    def precision(self, value):
+        self._validate(value, self.scale)
+        self._typ = pa.decimal128(precision=value, scale=self.scale)
+
+    @property
+    def scale(self):
+        return self._typ.scale
+
+    @property
+    def type(self):
+        # might need to account for precision and scale here
+        return decimal.Decimal
+
+    def to_arrow(self):
+        return self._typ
+
+    @classmethod
+    def from_arrow(cls, typ):
+        return cls(typ.precision, typ.scale)
+
+    @property
+    def itemsize(self):
+        return 8
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}"
+            f"(precision={self.precision}, scale={self.scale})"
+        )
+
+    def __hash__(self):
+        return hash(self._typ)
+
+    @classmethod
+    def _validate(cls, precision, scale=0):
+        if precision > Decimal64Dtype.MAX_PRECISION:
+            raise ValueError(
+                f"Cannot construct a {cls.__name__}"
+                f" with precision > {cls.MAX_PRECISION}"
+            )
+        if abs(scale) > precision:
+            raise ValueError(f"scale={scale} exceeds precision={precision}")
+
+    @classmethod
+    def _from_decimal(cls, decimal):
+        """
+        Create a cudf.Decimal32Dtype from a decimal.Decimal object
+        """
+        metadata = decimal.as_tuple()
+        precision = max(len(metadata.digits), -metadata.exponent)
+        return cls(precision, -metadata.exponent)
+
+    def serialize(self) -> Tuple[dict, list]:
+        return {"precision": self.precision, "scale": self.scale}, []
+
+    @classmethod
+    def deserialize(cls, header: dict, frames: list):
+        return cls(header["precision"], header["scale"])
+
 
 class Decimal64Dtype(_BaseDtype):
 

From 02ab5548e5ac0e9592554d43a87e02b52403a9c7 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 8 Jun 2021 22:53:18 -0700
Subject: [PATCH 03/13] .

---
 python/cudf/cudf/_lib/types.pyx | 21 +++++++++------------
 python/cudf/cudf/core/dtypes.py | 13 ++++++++++---
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index e9ed4f21ddd..9c16c593a6c 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -191,21 +191,16 @@ cdef dtype_from_structs_column_view(column_view cv):
     }
     return StructDtype(fields)
 
-cdef dtype_from_decimal_column_view(column_view cv):
-    scale = -cv.type().scale()
-    return Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=scale)
-
 cdef dtype_from_column_view(column_view cv):
     cdef libcudf_types.type_id tid = cv.type().id()
     if tid == libcudf_types.type_id.LIST:
         return dtype_from_lists_column_view(cv)
     elif tid == libcudf_types.type_id.STRUCT:
-        return dtype_from_structs_column_view(cv)
-    elif tid == libcudf_types.type_id.DECIMAL64:
-        return dtype_from_decimal_column_view(cv)
-    elif tid == libcudf_types.type_id.DECIMAL32:
-        raise NotImplementedError("decimal32 types are not supported yet. "
-                                  "Use decimal64 instead")
+        sreturn dtype_from_structs_column_view(cv)
+    elif tid ==  libcudf_types.type_id.DECIMAL64:
+        Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=-cv.type().scale())
+    elif tid ==  libcudf_types.type_id.DECIMAL32:
+        Decimal32Dtype(precision=Decimal32Dtype.MAX_PRECISION, scale=-cv.type().scale())
     else:
         return cudf_to_np_types[<underlying_type_t_type_id>(tid)]
 
@@ -214,14 +209,16 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
         tid = libcudf_types.type_id.LIST
     elif is_struct_dtype(dtype):
         tid = libcudf_types.type_id.STRUCT
-    elif is_decimal_dtype(dtype):
+    elif is_decimal64_dtype(dtype):
         tid = libcudf_types.type_id.DECIMAL64
+    elif is_decimal32_dtype(dtype):
+        tid = libcudf_types.type_id.DECIMAL32
     else:
         tid = <libcudf_types.type_id> (
             <underlying_type_t_type_id> (
                 np_to_cudf_types[np.dtype(dtype)]))
 
-    if tid == libcudf_types.type_id.DECIMAL64:
+    if isinstance(tid, [libcudf_types.type_id.DECIMAL32, libcudf_types.type_id.DECIMAL64]):
         return libcudf_types.data_type(tid, -dtype.scale)
     else:
         return libcudf_types.data_type(tid)
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 41aa87a24bf..9aebfbaf6a9 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -241,7 +241,7 @@ class Decimal32Dtype(_BaseDtype):
 
     name = "decimal"
     _metadata = ("precision", "scale")
-    MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max))
+    MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max))
 
     def __init__(self, precision, scale=0):
         """
@@ -250,7 +250,7 @@ def __init__(self, precision, scale=0):
         precision : int
             The total number of digits in each value of this dtype
         scale : int, optional
-            The scale of the Decimal64Dtype. See Notes below.
+            The scale of the Decimal32Dtype. See Notes below.
 
         Notes
         -----
@@ -313,7 +313,7 @@ def __hash__(self):
 
     @classmethod
     def _validate(cls, precision, scale=0):
-        if precision > Decimal64Dtype.MAX_PRECISION:
+        if precision > Decimal32Dtype.MAX_PRECISION:
             raise ValueError(
                 f"Cannot construct a {cls.__name__}"
                 f" with precision > {cls.MAX_PRECISION}"
@@ -337,6 +337,10 @@ def serialize(self) -> Tuple[dict, list]:
     def deserialize(cls, header: dict, frames: list):
         return cls(header["precision"], header["scale"])
 
+    @classmethod
+    def is_decimal32_dtype():
+        pass
+
 
 class Decimal64Dtype(_BaseDtype):
 
@@ -438,6 +442,9 @@ def serialize(self) -> Tuple[dict, list]:
     def deserialize(cls, header: dict, frames: list):
         return cls(header["precision"], header["scale"])
 
+    @classmethod
+    def is_decimal64_dtype():
+        pass
 
 class IntervalDtype(StructDtype):
     name = "interval"

From 0cf41a3e7bfeedf7ff7c0f92ad425db9e6c2a4b1 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 9 Jun 2021 00:38:51 -0700
Subject: [PATCH 04/13] added is_decimal32dtype/ is_decimal64_dtype

---
 python/cudf/cudf/_lib/types.pyx  |  4 ++--
 python/cudf/cudf/core/dtypes.py  |  8 +-------
 python/cudf/cudf/utils/dtypes.py | 26 +++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 9c16c593a6c..d448fc31425 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -15,7 +15,7 @@ from cudf._lib.types cimport (
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype
-from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype
+from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype, is_decimal32_dtype, is_decimal64_dtype
 
 cimport cudf._lib.cpp.types as libcudf_types
 
@@ -196,7 +196,7 @@ cdef dtype_from_column_view(column_view cv):
     if tid == libcudf_types.type_id.LIST:
         return dtype_from_lists_column_view(cv)
     elif tid == libcudf_types.type_id.STRUCT:
-        sreturn dtype_from_structs_column_view(cv)
+        return dtype_from_structs_column_view(cv)
     elif tid ==  libcudf_types.type_id.DECIMAL64:
         Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=-cv.type().scale())
     elif tid ==  libcudf_types.type_id.DECIMAL32:
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 9aebfbaf6a9..fcf09e593d1 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -237,6 +237,7 @@ def __repr__(self):
     def __hash__(self):
         return hash(self._typ)
 
+
 class Decimal32Dtype(_BaseDtype):
 
     name = "decimal"
@@ -337,10 +338,6 @@ def serialize(self) -> Tuple[dict, list]:
     def deserialize(cls, header: dict, frames: list):
         return cls(header["precision"], header["scale"])
 
-    @classmethod
-    def is_decimal32_dtype():
-        pass
-
 
 class Decimal64Dtype(_BaseDtype):
 
@@ -442,9 +439,6 @@ def serialize(self) -> Tuple[dict, list]:
     def deserialize(cls, header: dict, frames: list):
         return cls(header["precision"], header["scale"])
 
-    @classmethod
-    def is_decimal64_dtype():
-        pass
 
 class IntervalDtype(StructDtype):
     name = "interval"
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 0b59116f8e6..4311271130b 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -10,7 +10,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from pandas.core.dtypes.common import infer_dtype_from_object
+from pandas.core.dtypes.common import infer_dtype_from_object, pandas_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype, CategoricalDtypeType
 
 import cudf
@@ -290,6 +290,30 @@ def is_decimal_dtype(obj):
     )
 
 
+def is_decimal32_dtype(obj):
+    return (
+        type(obj) is cudf.core.dtypes.Decimal32Dtype
+        or obj is cudf.core.dtypes.Decimal32Dtype
+        or (
+            isinstance(obj, str)
+            and obj == cudf.core.dtypes.Decimal32Dtype.name
+        )
+        or (hasattr(obj, "dtype") and is_decimal32_dtype(obj.dtype))
+    )
+
+
+def is_decimal64_dtype(obj):
+    return (
+        type(obj) is cudf.core.dtypes.Decimal64Dtype
+        or obj is cudf.core.dtypes.Decimal64Dtype
+        or (
+            isinstance(obj, str)
+            and obj == cudf.core.dtypes.Decimal64Dtype.name
+        )
+        or (hasattr(obj, "dtype") and is_decimal64_dtype(obj.dtype))
+    )
+
+
 def _find_common_type_decimal(dtypes):
     # Find the largest scale and the largest difference between
     # precision and scale of the columns to be concatenated

From 376766dd17ba05b308325312b351a344eee25f69 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 9 Jun 2021 10:33:20 -0700
Subject: [PATCH 05/13] replaced DecimalColumn with Decimal64Column in all
 cudf-python files

---
 python/cudf/cudf/__init__.py                         |  1 +
 .../_lib/strings/convert/convert_fixed_point.pyx     |  4 ++--
 python/cudf/cudf/_lib/types.pyx                      |  6 +++---
 python/cudf/cudf/core/column/__init__.py             |  2 +-
 python/cudf/cudf/core/column/column.py               | 10 +++++-----
 python/cudf/cudf/core/column/decimal.py              | 10 +++++-----
 python/cudf/cudf/core/column/numerical.py            |  6 +++---
 python/cudf/cudf/core/column/string.py               |  2 +-
 python/cudf/cudf/core/dtypes.py                      |  2 +-
 python/cudf/cudf/core/frame.py                       |  2 +-
 python/cudf/cudf/core/series.py                      |  2 +-
 python/cudf/cudf/tests/test_decimal.py               | 12 ++++++------
 python/cudf/cudf/utils/dtypes.py                     |  8 +-------
 13 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index c8a4894f4be..5dfc241e54c 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -45,6 +45,7 @@
 from cudf.core.dtypes import (
     CategoricalDtype,
     Decimal64Dtype,
+    Decimal32Dtype,
     ListDtype,
     StructDtype,
 )
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index 38d238b8266..e002d630fc3 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -28,7 +28,7 @@ from libcpp.string cimport string
 
 def from_decimal(Column input_col):
     """
-    Converts a `DecimalColumn` to a `StringColumn`.
+    Converts a `Decimal64Column` to a `StringColumn`.
 
     Parameters
     ----------
@@ -50,7 +50,7 @@ def from_decimal(Column input_col):
 
 def to_decimal(Column input_col, object out_type):
     """
-    Returns a `DecimalColumn` from the provided `StringColumn`
+    Returns a `Decimal64Column` from the provided `StringColumn`
     using the scale in the `out_type`.
 
     Parameters
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index d448fc31425..0cc56352c5f 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -14,8 +14,8 @@ from cudf._lib.types cimport (
 )
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
-from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype
-from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype, is_decimal32_dtype, is_decimal64_dtype
+from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype, Decimal32Dtype
+from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype, is_decimal64_dtype, is_decimal32_dtype
 
 cimport cudf._lib.cpp.types as libcudf_types
 
@@ -218,7 +218,7 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
             <underlying_type_t_type_id> (
                 np_to_cudf_types[np.dtype(dtype)]))
 
-    if isinstance(tid, [libcudf_types.type_id.DECIMAL32, libcudf_types.type_id.DECIMAL64]):
+    if tid in (libcudf_types.type_id.DECIMAL64, libcudf_types.type_id.DECIMAL32):
         return libcudf_types.data_type(tid, -dtype.scale)
     else:
         return libcudf_types.data_type(tid)
diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 32cb557548f..6a494e8885c 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -21,4 +21,4 @@
 from cudf.core.column.struct import StructColumn  # noqa: F401
 from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
 from cudf.core.column.interval import IntervalColumn  # noqa: F401
-from cudf.core.column.decimal import DecimalColumn  # noqa: F401
+from cudf.core.column.decimal import Decimal64Column  # noqa: F401
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index fd3a47aea64..42f231e28c9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -289,7 +289,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
         ):
             return cudf.core.column.IntervalColumn.from_arrow(array)
         elif isinstance(array.type, pa.Decimal128Type):
-            return cudf.core.column.DecimalColumn.from_arrow(array)
+            return cudf.core.column.Decimal64Column.from_arrow(array)
 
         result = libcudf.interop.from_arrow(data, data.column_names)._data[
             "None"
@@ -978,7 +978,7 @@ def as_string_column(
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.DecimalColumn":
+    ) -> "cudf.core.column.Decimal64Column":
         raise NotImplementedError
 
     def apply_boolean_mask(self, mask) -> ColumnBase:
@@ -1490,7 +1490,7 @@ def build_column(
     elif is_decimal_dtype(dtype):
         if size is None:
             raise TypeError("Must specify size")
-        return cudf.core.column.DecimalColumn(
+        return cudf.core.column.Decimal64Column(
             data=data,
             size=size,
             offset=offset,
@@ -1963,7 +1963,7 @@ def as_column(
                                 precision=dtype.precision, scale=dtype.scale
                             ),
                         )
-                        return cudf.core.column.DecimalColumn.from_arrow(data)
+                        return cudf.core.column.Decimal64Column.from_arrow(data)
                     dtype = pd.api.types.pandas_dtype(dtype)
                     if is_categorical_dtype(dtype) or is_interval_dtype(dtype):
                         raise TypeError
@@ -2212,7 +2212,7 @@ def _copy_type_metadata_from_arrow(
     Decimal64Dtype, copy precisions.
     """
     if pa.types.is_decimal(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.DecimalColumn
+        cudf_column, cudf.core.column.Decimal64Column
     ):
         cudf_column.dtype.precision = arrow_array.type.precision
     elif pa.types.is_struct(arrow_array.type) and isinstance(
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 459cfae6fdb..d8c27ae58ed 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -24,7 +24,7 @@
 from .numerical_base import NumericalBaseColumn
 
 
-class DecimalColumn(NumericalBaseColumn):
+class Decimal64Column(NumericalBaseColumn):
     dtype: Decimal64Dtype
 
     def __truediv__(self, other):
@@ -98,7 +98,7 @@ def binary_operator(self, op, other, reflect=False):
         elif op in ("eq", "ne", "lt", "gt", "le", "ge"):
             if not isinstance(
                 other,
-                (DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar),
+                (Decimal64Dtype, cudf.core.column.NumericalColumn, cudf.Scalar),
             ):
                 raise TypeError(
                     f"Operator {op} not supported between"
@@ -145,7 +145,7 @@ def _decimal_quantile(
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.DecimalColumn":
+    ) -> "cudf.core.column.Decimal64Column":
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype)
@@ -175,7 +175,7 @@ def fillna(
         if isinstance(value, (int, Decimal)):
             value = cudf.Scalar(value, dtype=self.dtype)
         elif (
-            isinstance(value, DecimalColumn)
+            isinstance(value, Decimal64Column)
             or isinstance(value, NumericalColumn)
             and is_integer_dtype(value.dtype)
         ):
@@ -215,7 +215,7 @@ def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
         In addition to the default behavior, if `other` is also a decimal
         column the precision is copied over.
         """
-        if isinstance(other, DecimalColumn):
+        if isinstance(other, Decimal64Column):
             other.dtype.precision = self.dtype.precision  # type: ignore
         # Have to ignore typing here because it misdiagnoses super().
         return super()._copy_type_metadata(other)  # type: ignore
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index e35cc744434..9fce3c61d2f 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -139,14 +139,14 @@ def binary_operator(
                     (
                         NumericalColumn,
                         cudf.Scalar,
-                        cudf.core.column.DecimalColumn,
+                        cudf.core.column.Decimal64Column,
                     ),
                 )
                 or np.isscalar(rhs)
             ):
                 msg = "{!r} operator not supported between {} and {}"
                 raise TypeError(msg.format(binop, type(self), type(rhs)))
-            if isinstance(rhs, cudf.core.column.DecimalColumn):
+            if isinstance(rhs, cudf.core.column.Decimal64Column):
                 lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column(
                     Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)
                 )
@@ -249,7 +249,7 @@ def as_timedelta_column(
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.DecimalColumn":
+    ) -> "cudf.core.column.Decimal64Column":
         return libcudf.unary.cast(self, dtype)
 
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 0b83548a92d..44f8ab7451f 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5146,7 +5146,7 @@ def as_timedelta_column(
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
-    ) -> "cudf.core.column.DecimalColumn":
+    ) -> "cudf.core.column.Decimal64Column":
         return cpp_to_decimal(self, dtype)
 
     def as_string_column(self, dtype: Dtype, format=None) -> StringColumn:
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index fcf09e593d1..e58e4ef9d39 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -301,7 +301,7 @@ def from_arrow(cls, typ):
 
     @property
     def itemsize(self):
-        return 8
+        return 4
 
     def __repr__(self):
         return (
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 4ea3decdc50..b989e9f5731 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -504,7 +504,7 @@ def _concat(
 
         # Reassign precision for any decimal cols
         for name, col in out._data.items():
-            if isinstance(col, cudf.core.column.DecimalColumn):
+            if isinstance(col, cudf.core.column.Decimal64Column):
                 col = tables[0]._data[name]._copy_type_metadata(col)
 
         # Reassign index and column names
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index c5a7b07d778..4ed528f3e1e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2406,7 +2406,7 @@ def _concat(cls, objs, axis=0, index=True):
 
         col = _concat_columns([o._column for o in objs])
 
-        if isinstance(col, cudf.core.column.DecimalColumn):
+        if isinstance(col, cudf.core.column.Decimal64Column):
             col = objs[0]._column._copy_type_metadata(col)
 
         return cls(data=col, index=index, name=name)
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index 073a8e443c7..b93762c8e24 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.core.column import DecimalColumn, NumericalColumn
+from cudf.core.column import Decimal64Column, NumericalColumn
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.tests.utils import (
     FLOAT_TYPES,
@@ -42,13 +42,13 @@
 )
 def test_round_trip_decimal_column(data, typ):
     pa_arr = pa.array(data, type=typ)
-    col = DecimalColumn.from_arrow(pa_arr)
+    col = Decimal64Column.from_arrow(pa_arr)
     assert pa_arr.equals(col.to_arrow())
 
 
 def test_from_arrow_max_precision():
     with pytest.raises(ValueError):
-        DecimalColumn.from_arrow(
+        Decimal64Column.from_arrow(
             pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
         )
 
@@ -83,7 +83,7 @@ def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
     pa_arr = got.to_arrow().cast(
         pa.decimal128(to_dtype.precision, to_dtype.scale)
     )
-    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+    expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
 
     got = got.astype(to_dtype)
 
@@ -123,7 +123,7 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
         .cast("float64")
         .cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
     )
-    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+    expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
 
     got = got.astype(to_dtype)
 
@@ -163,7 +163,7 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
     pa_arr = got.to_arrow().cast(
         pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
     )
-    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+    expected = cudf.Series(Decimal64Column.from_arrow(pa_arr))
 
     got = got.astype(to_dtype)
 
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 4311271130b..3033cb2f328 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -280,13 +280,7 @@ def is_interval_dtype(obj):
 
 def is_decimal_dtype(obj):
     return (
-        type(obj) is cudf.core.dtypes.Decimal64Dtype
-        or obj is cudf.core.dtypes.Decimal64Dtype
-        or (
-            isinstance(obj, str)
-            and obj == cudf.core.dtypes.Decimal64Dtype.name
-        )
-        or (hasattr(obj, "dtype") and is_decimal_dtype(obj.dtype))
+        is_decimal32_dtype(obj) or is_decimal64_dtype(obj)   
     )
 
 

From 911a7c89392b483fc68da75c5efaaf73a65129fa Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Fri, 11 Jun 2021 12:34:44 -0700
Subject: [PATCH 06/13] introduced Decimal32Column in decimal.py

---
 python/cudf/cudf/_lib/types.pyx         | 35 ++++++++++----
 python/cudf/cudf/core/column/column.py  |  5 +-
 python/cudf/cudf/core/column/decimal.py | 62 ++++++++++++++++++++++++-
 python/cudf/cudf/core/dtypes.py         |  4 +-
 4 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 0cc56352c5f..43e5c213947 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -14,9 +14,19 @@ from cudf._lib.types cimport (
 )
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
-from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype, Decimal32Dtype
-from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype, is_decimal64_dtype, is_decimal32_dtype
-
+from cudf.core.dtypes import (
+    ListDtype,
+    StructDtype,
+    Decimal64Dtype,
+    Decimal32Dtype
+)
+from cudf.utils.dtypes import (
+    is_decimal_dtype,
+    is_list_dtype,
+    is_struct_dtype,
+    is_decimal64_dtype,
+    is_decimal32_dtype
+)
 cimport cudf._lib.cpp.types as libcudf_types
 
 
@@ -197,10 +207,16 @@ cdef dtype_from_column_view(column_view cv):
         return dtype_from_lists_column_view(cv)
     elif tid == libcudf_types.type_id.STRUCT:
         return dtype_from_structs_column_view(cv)
-    elif tid ==  libcudf_types.type_id.DECIMAL64:
-        Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=-cv.type().scale())
-    elif tid ==  libcudf_types.type_id.DECIMAL32:
-        Decimal32Dtype(precision=Decimal32Dtype.MAX_PRECISION, scale=-cv.type().scale())
+    elif tid == libcudf_types.type_id.DECIMAL64:
+        return Decimal64Dtype(
+            precision=Decimal64Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
+    elif tid == libcudf_types.type_id.DECIMAL32:
+        return Decimal32Dtype(
+            precision=Decimal32Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
     else:
         return cudf_to_np_types[<underlying_type_t_type_id>(tid)]
 
@@ -218,7 +234,10 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
             <underlying_type_t_type_id> (
                 np_to_cudf_types[np.dtype(dtype)]))
 
-    if tid in (libcudf_types.type_id.DECIMAL64, libcudf_types.type_id.DECIMAL32):
+    if tid in (
+        libcudf_types.type_id.DECIMAL64,
+        libcudf_types.type_id.DECIMAL32
+    ):
         return libcudf_types.data_type(tid, -dtype.scale)
     else:
         return libcudf_types.data_type(tid)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 42f231e28c9..489f5f68049 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1956,6 +1956,7 @@ def as_column(
                                 "Cannot create list column from given data"
                             )
                         return as_column(data, nan_as_null=nan_as_null)
+                    # breakpoint()
                     if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
                         data = pa.array(
                             arbitrary,
@@ -1963,7 +1964,9 @@ def as_column(
                                 precision=dtype.precision, scale=dtype.scale
                             ),
                         )
-                        return cudf.core.column.Decimal64Column.from_arrow(data)
+                        return cudf.core.column.Decimal64Column.from_arrow(
+                            data
+                        )
                     dtype = pd.api.types.pandas_dtype(dtype)
                     if is_categorical_dtype(dtype) or is_interval_dtype(dtype):
                         raise TypeError
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index d8c27ae58ed..b5acc1f151a 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -17,13 +17,66 @@
 from cudf._typing import Dtype
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, NumericalColumn, as_column
-from cudf.core.dtypes import Decimal64Dtype
+from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype
 from cudf.utils.dtypes import is_scalar
 from cudf.utils.utils import pa_mask_buffer_to_mask
 
 from .numerical_base import NumericalBaseColumn
 
 
+class Decimal32Column(NumericalBaseColumn):
+    dtype: Decimal32Dtype
+
+    @classmethod
+    def from_arrow(cls, data: pa.Array):
+        dtype = Decimal32Dtype.from_arrow(data.type)
+        mask_buf = data.buffers()[0]
+        mask = (
+            mask_buf
+            if mask_buf is None
+            else pa_mask_buffer_to_mask(mask_buf, len(data))
+        )
+        data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int32"))
+        data_32 = data_128[::4].copy()
+        return cls(
+            data=Buffer(data_32.view("uint8")),
+            size=len(data),
+            dtype=dtype,
+            offset=data.offset,
+            mask=mask,
+        )
+
+    def to_arrow(self):
+        data_buf_32 = self.base_data.to_host_array().view("int32")
+        data_buf_128 = np.empty(len(data_buf_32) * 4, dtype="int32")
+
+        # use striding to set the first 32 bits of each 128-bit chunk:
+        data_buf_128[::4] = data_buf_32
+        # use striding again to set the remaining bits of each 128-bit chunk:
+        # 0 for non-negative values, -1 for negative values:
+        data_buf_128[1::4] = np.piecewise(
+            data_buf_32, [data_buf_32 < 0], [-1, 0]
+        )
+        data_buf_128[2::4] = np.piecewise(
+            data_buf_32, [data_buf_32 < 0], [-1, 0]
+        )
+        data_buf_128[3::4] = np.piecewise(
+            data_buf_32, [data_buf_32 < 0], [-1, 0]
+        )
+        data_buf = pa.py_buffer(data_buf_128)
+        mask_buf = (
+            self.base_mask
+            if self.base_mask is None
+            else pa.py_buffer(self.base_mask.to_host_array())
+        )
+        return pa.Array.from_buffers(
+            type=self.dtype.to_arrow(),
+            offset=self._offset,
+            length=self.size,
+            buffers=[mask_buf, data_buf],
+        )
+
+
 class Decimal64Column(NumericalBaseColumn):
     dtype: Decimal64Dtype
 
@@ -60,6 +113,7 @@ def from_arrow(cls, data: pa.Array):
     def to_arrow(self):
         data_buf_64 = self.base_data.to_host_array().view("int64")
         data_buf_128 = np.empty(len(data_buf_64) * 2, dtype="int64")
+
         # use striding to set the first 64 bits of each 128-bit chunk:
         data_buf_128[::2] = data_buf_64
         # use striding again to set the remaining bits of each 128-bit chunk:
@@ -98,7 +152,11 @@ def binary_operator(self, op, other, reflect=False):
         elif op in ("eq", "ne", "lt", "gt", "le", "ge"):
             if not isinstance(
                 other,
-                (Decimal64Dtype, cudf.core.column.NumericalColumn, cudf.Scalar),
+                (
+                    Decimal64Dtype,
+                    cudf.core.column.NumericalColumn,
+                    cudf.Scalar,
+                ),
             ):
                 raise TypeError(
                     f"Operator {op} not supported between"
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index e58e4ef9d39..0b43d6d7135 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -240,7 +240,7 @@ def __hash__(self):
 
 class Decimal32Dtype(_BaseDtype):
 
-    name = "decimal"
+    name = "decimal32"
     _metadata = ("precision", "scale")
     MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max))
 
@@ -341,7 +341,7 @@ def deserialize(cls, header: dict, frames: list):
 
 class Decimal64Dtype(_BaseDtype):
 
-    name = "decimal"
+    name = "decimal64"
     _metadata = ("precision", "scale")
     MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max))
 

From 494e987ad73ba3311db08ab5024eed5ae50dc7e2 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Mon, 14 Jun 2021 10:05:51 -0700
Subject: [PATCH 07/13] fixed construct series with decimal32column

---
 python/cudf/cudf/core/column/__init__.py |  7 +++++--
 python/cudf/cudf/core/column/column.py   | 26 +++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 6a494e8885c..388d998e479 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -15,10 +15,13 @@
     serialize_columns,
 )
 from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
+from cudf.core.column.decimal import (  # noqa: F401
+    Decimal32Column,
+    Decimal64Column,
+)
+from cudf.core.column.interval import IntervalColumn  # noqa: F401
 from cudf.core.column.lists import ListColumn  # noqa: F401
 from cudf.core.column.numerical import NumericalColumn  # noqa: F401
 from cudf.core.column.string import StringColumn  # noqa: F401
 from cudf.core.column.struct import StructColumn  # noqa: F401
 from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
-from cudf.core.column.interval import IntervalColumn  # noqa: F401
-from cudf.core.column.decimal import Decimal64Column  # noqa: F401
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b080a196190..4e122443caa 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -50,6 +50,8 @@
     check_cast_unsupported_dtype,
     get_time_unit,
     is_categorical_dtype,
+    is_decimal32_dtype,
+    is_decimal64_dtype,
     is_decimal_dtype,
     is_interval_dtype,
     is_list_dtype,
@@ -1487,7 +1489,7 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_decimal_dtype(dtype):
+    elif is_decimal64_dtype(dtype):
         if size is None:
             raise TypeError("Must specify size")
         return cudf.core.column.Decimal64Column(
@@ -1499,6 +1501,18 @@ def build_column(
             null_count=null_count,
             children=children,
         )
+    elif is_decimal32_dtype(dtype):
+        if size is None:
+            raise TypeError("Must specify size")
+        return cudf.core.column.Decimal32Column(
+            data=data,
+            size=size,
+            offset=offset,
+            dtype=dtype,
+            mask=mask,
+            null_count=null_count,
+            children=children,
+        )
     elif is_interval_dtype(dtype):
         return cudf.core.column.IntervalColumn(
             dtype=dtype,
@@ -1967,6 +1981,16 @@ def as_column(
                         return cudf.core.column.Decimal64Column.from_arrow(
                             data
                         )
+                    if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
+                        data = pa.array(
+                            arbitrary,
+                            type=pa.decimal128(
+                                precision=dtype.precision, scale=dtype.scale
+                            ),
+                        )
+                        return cudf.core.column.Decimal32Column.from_arrow(
+                            data
+                        )
                     dtype = pd.api.types.pandas_dtype(dtype)
                     if is_categorical_dtype(dtype) or is_interval_dtype(dtype):
                         raise TypeError

From 95d329140112b66abbfa153595427e84106a8695 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 16 Jun 2021 16:28:04 -0700
Subject: [PATCH 08/13] .

---
 python/cudf/cudf/utils/dtypes.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 07b5389ccea..6d1078d987e 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -8,12 +8,8 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-<<<<<<< HEAD
 from pandas.core.dtypes.common import infer_dtype_from_object, pandas_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype, CategoricalDtypeType
-=======
-from pandas.core.dtypes.common import infer_dtype_from_object
->>>>>>> 716dc12437a7b3bb33e8a2ccfa6ecb2c592568c7
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120

From 00ea7960ab6145ba1b7ff069cc90ab41051818e8 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 16 Jun 2021 18:11:52 -0700
Subject: [PATCH 09/13] added to/from arrow tests for decimal32

---
 python/cudf/cudf/core/column/__init__.py | 10 ++--
 python/cudf/cudf/core/column/column.py   |  1 -
 python/cudf/cudf/core/column/decimal.py  |  4 +-
 python/cudf/cudf/tests/test_decimal.py   | 74 ++++++++++++++----------
 4 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 388d998e479..34974b54106 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -15,13 +15,13 @@
     serialize_columns,
 )
 from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
-from cudf.core.column.decimal import (  # noqa: F401
-    Decimal32Column,
-    Decimal64Column,
-)
-from cudf.core.column.interval import IntervalColumn  # noqa: F401
 from cudf.core.column.lists import ListColumn  # noqa: F401
 from cudf.core.column.numerical import NumericalColumn  # noqa: F401
 from cudf.core.column.string import StringColumn  # noqa: F401
 from cudf.core.column.struct import StructColumn  # noqa: F401
 from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
+from cudf.core.column.interval import IntervalColumn  # noqa: F401
+from cudf.core.column.decimal import (  # noqa: F401
+    Decimal32Column,
+    Decimal64Column,
+)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 4e122443caa..6776c8d195d 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1970,7 +1970,6 @@ def as_column(
                                 "Cannot create list column from given data"
                             )
                         return as_column(data, nan_as_null=nan_as_null)
-                    # breakpoint()
                     if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
                         data = pa.array(
                             arbitrary,
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index b5acc1f151a..0a8c2bae5ef 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -16,7 +16,7 @@
 )
 from cudf._typing import Dtype
 from cudf.core.buffer import Buffer
-from cudf.core.column import ColumnBase, NumericalColumn, as_column
+from cudf.core.column import ColumnBase, as_column
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype
 from cudf.utils.dtypes import is_scalar
 from cudf.utils.utils import pa_mask_buffer_to_mask
@@ -234,7 +234,7 @@ def fillna(
             value = cudf.Scalar(value, dtype=self.dtype)
         elif (
             isinstance(value, Decimal64Column)
-            or isinstance(value, NumericalColumn)
+            or isinstance(value, cudf.core.column.NumericalColumn)
             and is_integer_dtype(value.dtype)
         ):
             value = value.astype(self.dtype)
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index 70fd22778bb..2301c218d77 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.core.column import Decimal64Column, NumericalColumn
+from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.tests.utils import (
     FLOAT_TYPES,
@@ -18,42 +18,54 @@
     assert_eq,
 )
 
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-        [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-        [1],
-        [-1],
-        [1, 2, 3, 4],
-        [42, 1729, 4104],
-        [1, 2, None, 4],
-        [None, None, None],
-        [],
-    ],
-)
-@pytest.mark.parametrize(
-    "typ",
-    [
-        pa.decimal128(precision=4, scale=2),
-        pa.decimal128(precision=5, scale=3),
-        pa.decimal128(precision=6, scale=4),
-    ],
-)
-def test_round_trip_decimal_column(data, typ):
-    pa_arr = pa.array(data, type=typ)
-    col = Decimal64Column.from_arrow(pa_arr)
-    assert pa_arr.equals(col.to_arrow())
-
-
-def test_from_arrow_max_precision():
+data_ = [
+    [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+    [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+    [1],
+    [-1],
+    [1, 2, 3, 4],
+    [42, 1729, 4104],
+    [1, 2, None, 4],
+    [None, None, None],
+    [],
+]
+typ_ = [
+    pa.decimal128(precision=4, scale=2),
+    pa.decimal128(precision=5, scale=3),
+    pa.decimal128(precision=6, scale=4),
+]
+
+
+@pytest.mark.parametrize("data_", data_)
+@pytest.mark.parametrize("typ_", typ_)
+def test_round_trip_decimal64_column(data_, typ_):
+    pa_arr = pa.array(data_, type=typ_)
+    col_64 = Decimal64Column.from_arrow(pa_arr)
+    assert pa_arr.equals(col_64.to_arrow())
+
+
+@pytest.mark.parametrize("data_", data_)
+@pytest.mark.parametrize("typ_", typ_)
+def test_round_trip_decimal32_column(data_, typ_):
+    pa_arr = pa.array(data_, type=typ_)
+    col_32 = Decimal32Column.from_arrow(pa_arr)
+    assert pa_arr.equals(col_32.to_arrow())
+
+
+def test_from_arrow_max_precision_decimal64():
     with pytest.raises(ValueError):
         Decimal64Column.from_arrow(
             pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
         )
 
 
+def test_from_arrow_max_precision_decimal32():
+    with pytest.raises(ValueError):
+        Decimal32Column.from_arrow(
+            pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=10))
+        )
+
+
 @pytest.mark.parametrize(
     "data",
     [

From 4cb5b57ecc05ed8d9a6e97e65e80b0ee3d1a33f3 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Fri, 18 Jun 2021 14:43:21 -0700
Subject: [PATCH 10/13] addressed review comments

---
 python/cudf/cudf/api/types.py           |  10 +-
 python/cudf/cudf/core/column/column.py  |  34 +++--
 python/cudf/cudf/core/column/decimal.py |   6 +-
 python/cudf/cudf/core/dtypes.py         |  34 +++--
 python/cudf/cudf/utils/dtypes.py        | 180 ++----------------------
 5 files changed, 69 insertions(+), 195 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index a985efeca51..56398bd4f13 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -15,9 +15,11 @@
 
 import cudf
 from cudf._lib.scalar import DeviceScalar
-from cudf.core.dtypes import (
+from cudf.core.dtypes import (  # noqa: F401
     _BaseDtype,
     is_categorical_dtype,
+    is_decimal32_dtype,
+    is_decimal64_dtype,
     is_decimal_dtype,
     is_interval_dtype,
     is_list_dtype,
@@ -39,11 +41,15 @@ def is_numeric_dtype(obj):
         Whether or not the array or dtype is of a numeric dtype.
     """
     if isclass(obj):
-        if issubclass(obj, cudf.Decimal64Dtype):
+        if issubclass(obj, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)):
             return True
         if issubclass(obj, _BaseDtype):
             return False
     else:
+        if isinstance(obj, cudf.Decimal32Dtype) or isinstance(
+            getattr(obj, "dtype", None), cudf.Decimal32Dtype
+        ):
+            return True
         if isinstance(obj, cudf.Decimal64Dtype) or isinstance(
             getattr(obj, "dtype", None), cudf.Decimal64Dtype
         ):
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d6fe020e5e6..576eb0fcf6b 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -47,30 +47,30 @@
 )
 from cudf.utils import ioutils, utils
 from cudf.utils.dtypes import (
-    _is_non_decimal_numeric_dtype,
-    _is_scalar_or_zero_d_array,
     check_cast_unsupported_dtype,
     cudf_dtype_from_pa_type,
     get_time_unit,
-    is_categorical_dtype,
-    is_decimal32_dtype,
-    is_decimal64_dtype,
-    is_decimal_dtype,
-    is_interval_dtype,
-    is_list_dtype,
-    is_scalar,
-    is_string_dtype,
-    is_struct_dtype,
     min_unsigned_type,
     np_to_pa_dtype,
 )
 from cudf.utils.utils import mask_dtype
 
 from ...api.types import (
+    _is_non_decimal_numeric_dtype,
+    _is_scalar_or_zero_d_array,
     infer_dtype,
     is_bool_dtype,
+    is_categorical_dtype,
+    is_decimal32_dtype,
+    is_decimal64_dtype,
+    is_decimal_dtype,
     is_dtype_equal,
     is_integer_dtype,
+    is_interval_dtype,
+    is_list_dtype,
+    is_scalar,
+    is_string_dtype,
+    is_struct_dtype,
     pandas_dtype,
 )
 
@@ -975,9 +975,21 @@ def as_string_column(
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
+    ) -> Union[
+        "cudf.core.column.Decimal32Column", "cudf.core.column.Decimal64Column"
+    ]:
+        raise NotImplementedError
+
+    def as_decimal64_column(
+        self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.Decimal64Column":
         raise NotImplementedError
 
+    def as_decimal32_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.Decimal32Column":
+        raise NotImplementedError
+
     def apply_boolean_mask(self, mask) -> ColumnBase:
         mask = as_column(mask, dtype="bool")
         result = (
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 80999d77a71..40fc8a9d720 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -20,8 +20,8 @@
 from cudf.utils.dtypes import is_scalar
 from cudf.utils.utils import pa_mask_buffer_to_mask
 
-from .numerical_base import NumericalBaseColumn
 from ...api.types import is_integer_dtype
+from .numerical_base import NumericalBaseColumn
 
 
 class Decimal32Column(NumericalBaseColumn):
@@ -268,8 +268,8 @@ def __cuda_array_interface__(self):
         )
 
     def _with_type_metadata(
-        self: "cudf.core.column.DecimalColumn", dtype: Dtype
-    ) -> "cudf.core.column.DecimalColumn":
+        self: "cudf.core.column.Decimal64Column", dtype: Dtype
+    ) -> "cudf.core.column.Decimal64Column":
         if isinstance(dtype, Decimal64Dtype):
             self.dtype.precision = dtype.precision
 
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 1f4c576cd04..a27d60890f0 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -607,15 +607,7 @@ def is_decimal_dtype(obj):
     bool
         Whether or not the array-like or dtype is of the decimal dtype.
     """
-    return (
-        type(obj) is cudf.core.dtypes.Decimal64Dtype
-        or obj is cudf.core.dtypes.Decimal64Dtype
-        or (
-            isinstance(obj, str)
-            and obj == cudf.core.dtypes.Decimal64Dtype.name
-        )
-        or (hasattr(obj, "dtype") and is_decimal_dtype(obj.dtype))
-    )
+    return is_decimal32_dtype(obj) or is_decimal64_dtype(obj)
 
 
 def is_interval_dtype(obj):
@@ -642,3 +634,27 @@ def is_interval_dtype(obj):
         )
         or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype))
     )
+
+
+def is_decimal32_dtype(obj):
+    return (
+        type(obj) is cudf.core.dtypes.Decimal32Dtype
+        or obj is cudf.core.dtypes.Decimal32Dtype
+        or (
+            isinstance(obj, str)
+            and obj == cudf.core.dtypes.Decimal32Dtype.name
+        )
+        or (hasattr(obj, "dtype") and is_decimal32_dtype(obj.dtype))
+    )
+
+
+def is_decimal64_dtype(obj):
+    return (
+        type(obj) is cudf.core.dtypes.Decimal64Dtype
+        or obj is cudf.core.dtypes.Decimal64Dtype
+        or (
+            isinstance(obj, str)
+            and obj == cudf.core.dtypes.Decimal64Dtype.name
+        )
+        or (hasattr(obj, "dtype") and is_decimal64_dtype(obj.dtype))
+    )
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 6d1078d987e..98f45002bbc 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from pandas.core.dtypes.common import infer_dtype_from_object, pandas_dtype
-from pandas.core.dtypes.dtypes import CategoricalDtype, CategoricalDtypeType
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120
@@ -17,8 +15,11 @@
 from ..api.types import (  # noqa: F401
     _is_non_decimal_numeric_dtype,
     _is_scalar_or_zero_d_array,
+    infer_dtype,
     is_categorical_dtype,
     is_datetime_dtype as is_datetime_dtype,
+    is_decimal32_dtype,
+    is_decimal64_dtype,
     is_decimal_dtype,
     is_integer,
     is_integer_dtype,
@@ -30,6 +31,7 @@
     is_string_dtype,
     is_struct_dtype,
     is_timedelta_dtype,
+    pandas_dtype,
 )
 
 _NA_REP = "<NA>"
@@ -159,170 +161,6 @@ def numeric_normalize_types(*args):
     return [a.astype(dtype) for a in args]
 
 
-def is_numerical_dtype(obj):
-    # TODO: we should handle objects with a `.dtype` attribute,
-    # e.g., arrays, here.
-    try:
-        dtype = np.dtype(obj)
-    except TypeError:
-        return False
-    return dtype.kind in "biuf"
-
-
-def is_integer_dtype(obj):
-    try:
-        dtype = np.dtype(obj)
-    except TypeError:
-        return pd.api.types.is_integer_dtype(obj)
-    return dtype.kind in "iu"
-
-
-def is_integer(obj):
-    if isinstance(obj, cudf.Scalar):
-        return is_integer_dtype(obj.dtype)
-    return pd.api.types.is_integer(obj)
-
-
-def is_string_dtype(obj):
-    return (
-        pd.api.types.is_string_dtype(obj)
-        # Reject all cudf extension types.
-        and not is_categorical_dtype(obj)
-        and not is_decimal_dtype(obj)
-        and not is_list_dtype(obj)
-        and not is_struct_dtype(obj)
-        and not is_interval_dtype(obj)
-    )
-
-
-def is_datetime_dtype(obj):
-    if obj is None:
-        return False
-    if not hasattr(obj, "str"):
-        return False
-    return "M8" in obj.str
-
-
-def is_timedelta_dtype(obj):
-    if obj is None:
-        return False
-    if not hasattr(obj, "str"):
-        return False
-    return "m8" in obj.str
-
-
-def is_categorical_dtype(obj):
-    """Infer whether a given pandas, numpy, or cuDF Column, Series, or dtype
-    is a pandas CategoricalDtype.
-    """
-    if obj is None:
-        return False
-    if isinstance(obj, cudf.CategoricalDtype):
-        return True
-    if obj is cudf.CategoricalDtype:
-        return True
-    if isinstance(obj, np.dtype):
-        return False
-    if isinstance(obj, CategoricalDtype):
-        return True
-    if obj is CategoricalDtype:
-        return True
-    if obj is CategoricalDtypeType:
-        return True
-    if isinstance(obj, str) and obj == "category":
-        return True
-    if isinstance(
-        obj,
-        (
-            CategoricalDtype,
-            cudf.core.index.CategoricalIndex,
-            cudf.core.column.CategoricalColumn,
-            pd.Categorical,
-            pd.CategoricalIndex,
-        ),
-    ):
-        return True
-    if isinstance(obj, np.ndarray):
-        return False
-    if isinstance(
-        obj,
-        (
-            cudf.Index,
-            cudf.Series,
-            cudf.core.column.ColumnBase,
-            pd.Index,
-            pd.Series,
-        ),
-    ):
-        return is_categorical_dtype(obj.dtype)
-    if hasattr(obj, "type"):
-        if obj.type is CategoricalDtypeType:
-            return True
-    return pd.api.types.is_categorical_dtype(obj)
-
-
-def is_list_dtype(obj):
-    return (
-        type(obj) is cudf.core.dtypes.ListDtype
-        or obj is cudf.core.dtypes.ListDtype
-        or type(obj) is cudf.core.column.ListColumn
-        or obj is cudf.core.column.ListColumn
-        or (isinstance(obj, str) and obj == cudf.core.dtypes.ListDtype.name)
-        or (hasattr(obj, "dtype") and is_list_dtype(obj.dtype))
-    )
-
-
-def is_struct_dtype(obj):
-    return (
-        isinstance(obj, cudf.core.dtypes.StructDtype)
-        or obj is cudf.core.dtypes.StructDtype
-        or (isinstance(obj, str) and obj == cudf.core.dtypes.StructDtype.name)
-        or (hasattr(obj, "dtype") and is_struct_dtype(obj.dtype))
-    )
-
-
-def is_interval_dtype(obj):
-    return (
-        isinstance(obj, cudf.core.dtypes.IntervalDtype)
-        or isinstance(obj, pd.core.dtypes.dtypes.IntervalDtype)
-        or obj is cudf.core.dtypes.IntervalDtype
-        or (
-            isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name
-        )
-        or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype))
-    )
-
-
-def is_decimal_dtype(obj):
-    return (
-        is_decimal32_dtype(obj) or is_decimal64_dtype(obj)   
-    )
-
-
-def is_decimal32_dtype(obj):
-    return (
-        type(obj) is cudf.core.dtypes.Decimal32Dtype
-        or obj is cudf.core.dtypes.Decimal32Dtype
-        or (
-            isinstance(obj, str)
-            and obj == cudf.core.dtypes.Decimal32Dtype.name
-        )
-        or (hasattr(obj, "dtype") and is_decimal32_dtype(obj.dtype))
-    )
-
-
-def is_decimal64_dtype(obj):
-    return (
-        type(obj) is cudf.core.dtypes.Decimal64Dtype
-        or obj is cudf.core.dtypes.Decimal64Dtype
-        or (
-            isinstance(obj, str)
-            and obj == cudf.core.dtypes.Decimal64Dtype.name
-        )
-        or (hasattr(obj, "dtype") and is_decimal64_dtype(obj.dtype))
-    )
-
-
 def _find_common_type_decimal(dtypes):
     # Find the largest scale and the largest difference between
     # precision and scale of the columns to be concatenated
@@ -341,12 +179,14 @@ def cudf_dtype_from_pydata_dtype(dtype):
 
     if is_categorical_dtype(dtype):
         return cudf.core.dtypes.CategoricalDtype
-    elif is_decimal_dtype(dtype):
+    elif is_decimal32_dtype(dtype):
+        return cudf.core.dtypes.Decimal32Dtype
+    elif is_decimal64_dtype(dtype):
         return cudf.core.dtypes.Decimal64Dtype
     elif dtype in cudf._lib.types.np_to_cudf_types:
         return dtype.type
 
-    return infer_dtype_from_object(dtype)
+    return infer_dtype(dtype)
 
 
 def cudf_dtype_to_pa_type(dtype):
@@ -376,7 +216,7 @@ def cudf_dtype_from_pa_type(typ):
     elif pa.types.is_decimal(typ):
         return cudf.core.dtypes.Decimal64Dtype.from_arrow(typ)
     else:
-        return pd.api.types.pandas_dtype(typ.to_pandas_dtype())
+        return pandas_dtype(typ.to_pandas_dtype())
 
 
 def to_cudf_compatible_scalar(val, dtype=None):
@@ -416,7 +256,7 @@ def to_cudf_compatible_scalar(val, dtype=None):
     elif isinstance(val, pd.Timedelta):
         val = val.to_timedelta64()
 
-    val = pd.api.types.pandas_dtype(type(val)).type(val)
+    val = pandas_dtype(type(val)).type(val)
 
     if dtype is not None:
         val = val.astype(dtype)

From 53009c0a874c626a2942e7d93861f0a813190f41 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 23 Jun 2021 12:34:05 -0700
Subject: [PATCH 11/13] fixed CI tests

---
 python/cudf/cudf/core/frame.py  | 2 +-
 python/cudf/cudf/core/series.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5da6f7d8f2f..062ad4001de 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -507,7 +507,7 @@ def _concat(
 
         # Reassign precision for any decimal cols
         for name, col in out._data.items():
-            if isinstance(col, cudf.core.column.DecimalColumn):
+            if isinstance(col, cudf.core.column.Decimal64Column):
                 col = col._with_type_metadata(tables[0]._data[name].dtype)
 
         # Reassign index and column names
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ad2df0849c3..5eab470cb4f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2415,7 +2415,7 @@ def _concat(cls, objs, axis=0, index=True):
 
         col = _concat_columns([o._column for o in objs])
 
-        if isinstance(col, cudf.core.column.DecimalColumn):
+        if isinstance(col, cudf.core.column.Decimal64Column):
             col = col._with_type_metadata(objs[0]._column.dtype)
 
         return cls(data=col, index=index, name=name)

From 4d66196d4beebdbc434d60cd55360e3f8c572207 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 24 Jun 2021 13:08:55 -0700
Subject: [PATCH 12/13] fixed failing tests-- infer_dtype

---
 python/cudf/cudf/utils/dtypes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 2225852fbd4..2edb94d5778 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -184,6 +184,8 @@ def cudf_dtype_from_pydata_dtype(dtype):
         return cudf.core.dtypes.Decimal64Dtype
     elif dtype in cudf._lib.types.np_to_cudf_types:
         return dtype.type
+    elif isinstance(pandas_dtype(dtype), np.dtype):
+        return dtype
 
     return infer_dtype(dtype)
 

From c69ab3f7f3a4c512cffc1b04fd49515ce07865a6 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Mon, 28 Jun 2021 17:31:50 -0700
Subject: [PATCH 13/13] reverted to infer_dtype_from_object

---
 python/cudf/cudf/utils/dtypes.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 2edb94d5778..e1ae87e5089 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from pandas.core.dtypes.common import infer_dtype_from_object
 
 import cudf
 from cudf.api.types import (  # noqa: F401
@@ -184,10 +185,8 @@ def cudf_dtype_from_pydata_dtype(dtype):
         return cudf.core.dtypes.Decimal64Dtype
     elif dtype in cudf._lib.types.np_to_cudf_types:
         return dtype.type
-    elif isinstance(pandas_dtype(dtype), np.dtype):
-        return dtype
 
-    return infer_dtype(dtype)
+    return infer_dtype_from_object(dtype)
 
 
 def cudf_dtype_to_pa_type(dtype):