Enable type conversion from float to decimal type (#7450)

This implements typecasting between `decimal` and `float` types. Addresses half of #7440 Authors: - @ChrisJar Approvers: - Ram (Ramakrishna Prabhu) (@rgsl888prabhu) - Ashwin Srinath (@shwina) URL: #7450
rapidsai · Mar 10, 2021 · 8c44d62 · 8c44d62
1 parent 2e4b5a6
commit 8c44d62
Show file tree

Hide file tree

Showing 4 changed files with 177 additions and 10 deletions.
diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx
@@ -1,6 +1,7 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from enum import IntEnum
+from cudf.utils.dtypes import is_decimal_dtype
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -27,6 +28,7 @@ from cudf._lib.cpp.unary cimport (
 from cudf._lib.types cimport underlying_type_t_type_id
 
 cimport cudf._lib.cpp.unary as libcudf_unary
+cimport cudf._lib.cpp.types as libcudf_types
 
 
 class UnaryOp(IntEnum):
@@ -93,14 +95,24 @@ def is_valid(Column input):
 
 def cast(Column input, object dtype=np.float64):
     cdef column_view c_input = input.view()
-    cdef type_id tid = (
-        <type_id> (
-            <underlying_type_t_type_id> (
-                np_to_cudf_types[np.dtype(dtype)]
+    cdef type_id tid
+    cdef data_type c_dtype
+
+    # TODO: Use dtype_to_data_type when it becomes available
+    # to simplify this conversion
+    if is_decimal_dtype(dtype):
+        tid = libcudf_types.type_id.DECIMAL64
+        c_dtype = data_type(tid, -dtype.scale)
+    else:
+        tid = (
+            <type_id> (
+                <underlying_type_t_type_id> (
+                    np_to_cudf_types[np.dtype(dtype)]
+                )
             )
         )
-    )
-    cdef data_type c_dtype = data_type(tid)
+        c_dtype = data_type(tid)
+
     cdef unique_ptr[column] c_result
 
     with nogil:

diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
@@ -4,18 +4,19 @@
 import cupy as cp
 import numpy as np
 import pyarrow as pa
-
+from pandas.api.types import is_integer_dtype
 from typing import cast
 
 from cudf import _lib as libcudf
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.utils.utils import pa_mask_buffer_to_mask
+
+from cudf._typing import Dtype
 from cudf._lib.strings.convert.convert_fixed_point import (
     from_decimal as cpp_from_decimal,
 )
-from cudf._typing import Dtype
 from cudf.core.column import as_column
 
 
@@ -67,6 +68,26 @@ def binary_operator(self, op, other, reflect=False):
         result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)
         return result
 
+    def as_decimal_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DecimalColumn":
+        if dtype == self.dtype:
+            return self
+        result = libcudf.unary.cast(self, dtype)
+        if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
+            result.dtype.precision = dtype.precision
+        return result
+
+    def as_numerical_column(
+        self, dtype: Dtype
+    ) -> "cudf.core.column.NumericalColumn":
+        if is_integer_dtype(dtype):
+            raise NotImplementedError(
+                "Casting from decimal types to integer "
+                "types not currently supported"
+            )
+        return libcudf.unary.cast(self, dtype)
+
     def as_string_column(
         self, dtype: Dtype, format=None
     ) -> "cudf.core.column.StringColumn":

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -205,6 +205,19 @@ def as_timedelta_column(
             ),
         )
 
+    def as_decimal_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DecimalColumn":
+        if is_integer_dtype(self.dtype):
+            raise NotImplementedError(
+                "Casting from integer types to decimal "
+                "types not currently supported"
+            )
+        result = libcudf.unary.cast(self, dtype)
+        if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
+            result.dtype.precision = dtype.precision
+        return result
+
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         dtype = np.dtype(dtype)
         if dtype == self.dtype:

diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
@@ -2,10 +2,18 @@
 
 from decimal import Decimal
 
+import numpy as np
 import pyarrow as pa
 import pytest
+import cudf
 
-from cudf.core.column import DecimalColumn
+from cudf.core.dtypes import Decimal64Dtype
+from cudf.core.column import DecimalColumn, NumericalColumn
+
+from cudf.tests.utils import (
+    FLOAT_TYPES,
+    assert_eq,
+)
 
 
 @pytest.mark.parametrize(
@@ -41,3 +49,116 @@ def test_from_arrow_max_precision():
         DecimalColumn.from_arrow(
             pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
         )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cudf.Series(
+            [
+                14.12302,
+                97938.2,
+                np.nan,
+                0.0,
+                -8.302014,
+                np.nan,
+                94.31304,
+                -112.2314,
+                0.3333333,
+                np.nan,
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
+@pytest.mark.parametrize(
+    "to_dtype",
+    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
+)
+def test_typecast_to_decimal(data, from_dtype, to_dtype):
+    actual = data.astype(from_dtype)
+    expected = actual
+
+    actual = actual.astype(to_dtype)
+    pa_arr = expected.to_arrow().cast(
+        pa.decimal128(to_dtype.precision, to_dtype.scale)
+    )
+    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+
+    assert_eq(actual, expected)
+    assert_eq(actual.dtype, expected.dtype)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cudf.Series(
+            [
+                14.12309,
+                2.343942,
+                np.nan,
+                0.0,
+                -8.302082,
+                np.nan,
+                94.31308,
+                -112.2364,
+                -8.029972,
+                np.nan,
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "from_dtype",
+    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
+)
+@pytest.mark.parametrize(
+    "to_dtype",
+    [Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)],
+)
+def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
+    actual = data.astype(from_dtype)
+    expected = actual
+
+    actual = actual.astype(to_dtype)
+    pa_arr = expected.to_arrow().cast(
+        pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
+    )
+    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+
+    assert_eq(actual, expected)
+    assert_eq(actual.dtype, expected.dtype)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cudf.Series(
+            [
+                14.12309,
+                2.343942,
+                np.nan,
+                0.0,
+                -8.302082,
+                np.nan,
+                94.31308,
+                -112.2364,
+                -8.029972,
+                np.nan,
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "from_dtype",
+    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
+)
+@pytest.mark.parametrize("to_dtype", FLOAT_TYPES)
+def test_typecast_from_decimal(data, from_dtype, to_dtype):
+    actual = data.astype(from_dtype)
+    pa_arr = actual.to_arrow().cast(to_dtype, safe=False)
+
+    actual = actual.astype(to_dtype)
+    expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))
+
+    assert_eq(actual, expected)