Enable typecasting between decimal and int (#7691)

This enables type conversions between `decimal` types and `int` types. Closes #7440 Authors: - @ChrisJar Approvers: - GALI PREM SAGAR (@galipremsagar) URL: #7691
rapidsai · Mar 30, 2021 · fdcdb96 · fdcdb96
1 parent 2d24a9b
commit fdcdb96
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 57 deletions.
diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx
@@ -25,7 +25,7 @@ from cudf._lib.cpp.unary cimport (
     unary_operator,
 )
 
-from cudf._lib.types cimport underlying_type_t_type_id
+from cudf._lib.types cimport underlying_type_t_type_id, dtype_to_data_type
 
 cimport cudf._lib.cpp.unary as libcudf_unary
 cimport cudf._lib.cpp.types as libcudf_types
@@ -95,30 +95,17 @@ def is_valid(Column input):
 
 def cast(Column input, object dtype=np.float64):
     cdef column_view c_input = input.view()
-    cdef type_id tid
-    cdef data_type c_dtype
-
-    # TODO: Use dtype_to_data_type when it becomes available
-    # to simplify this conversion
-    if is_decimal_dtype(dtype):
-        tid = libcudf_types.type_id.DECIMAL64
-        c_dtype = data_type(tid, -dtype.scale)
-    else:
-        tid = (
-            <type_id> (
-                <underlying_type_t_type_id> (
-                    np_to_cudf_types[np.dtype(dtype)]
-                )
-            )
-        )
-        c_dtype = data_type(tid)
+    cdef data_type c_dtype = dtype_to_data_type(dtype)
 
     cdef unique_ptr[column] c_result
 
     with nogil:
         c_result = move(libcudf_unary.cast(c_input, c_dtype))
 
-    return Column.from_unique_ptr(move(c_result))
+    result = Column.from_unique_ptr(move(c_result))
+    if is_decimal_dtype(result.dtype):
+        result.dtype.precision = dtype.precision
+    return result
 
 
 def is_nan(Column input):

diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
@@ -4,7 +4,6 @@
 import cupy as cp
 import numpy as np
 import pyarrow as pa
-from pandas.api.types import is_integer_dtype
 from typing import cast
 
 from cudf import _lib as libcudf
@@ -80,19 +79,11 @@ def as_decimal_column(
     ) -> "cudf.core.column.DecimalColumn":
         if dtype == self.dtype:
             return self
-        result = libcudf.unary.cast(self, dtype)
-        if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
-            result.dtype.precision = dtype.precision
-        return result
+        return libcudf.unary.cast(self, dtype)
 
     def as_numerical_column(
         self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
-        if is_integer_dtype(dtype):
-            raise NotImplementedError(
-                "Casting from decimal types to integer "
-                "types not currently supported"
-            )
         return libcudf.unary.cast(self, dtype)
 
     def as_string_column(

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -208,15 +208,7 @@ def as_timedelta_column(
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.DecimalColumn":
-        if is_integer_dtype(self.dtype):
-            raise NotImplementedError(
-                "Casting from integer types to decimal "
-                "types not currently supported"
-            )
-        result = libcudf.unary.cast(self, dtype)
-        if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
-            result.dtype.precision = dtype.precision
-        return result
+        return libcudf.unary.cast(self, dtype)
 
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         dtype = np.dtype(dtype)

diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
@@ -11,7 +11,9 @@
 from cudf.core.column import DecimalColumn, NumericalColumn
 
 from cudf.tests.utils import (
+    NUMERIC_TYPES,
     FLOAT_TYPES,
+    INTEGER_TYPES,
     assert_eq,
 )
 
@@ -75,18 +77,59 @@ def test_from_arrow_max_precision():
     "to_dtype",
     [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
 )
-def test_typecast_to_decimal(data, from_dtype, to_dtype):
-    actual = data.astype(from_dtype)
-    expected = actual
+def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
+    got = data.astype(from_dtype)
 
-    actual = actual.astype(to_dtype)
-    pa_arr = expected.to_arrow().cast(
+    pa_arr = got.to_arrow().cast(
         pa.decimal128(to_dtype.precision, to_dtype.scale)
     )
     expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
 
-    assert_eq(actual, expected)
-    assert_eq(actual.dtype, expected.dtype)
+    got = got.astype(to_dtype)
+
+    assert_eq(got, expected)
+    assert_eq(got.dtype, expected.dtype)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cudf.Series(
+            [
+                14.12302,
+                38.2,
+                np.nan,
+                0.0,
+                -8.302014,
+                np.nan,
+                94.31304,
+                np.nan,
+                -112.2314,
+                0.3333333,
+                np.nan,
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize("from_dtype", INTEGER_TYPES)
+@pytest.mark.parametrize(
+    "to_dtype",
+    [Decimal64Dtype(9, 3), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
+)
+def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
+    got = data.astype(from_dtype)
+
+    pa_arr = (
+        got.to_arrow()
+        .cast("float64")
+        .cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
+    )
+    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
+
+    got = got.astype(to_dtype)
+
+    assert_eq(got, expected)
+    assert_eq(got.dtype, expected.dtype)
 
 
 @pytest.mark.parametrize(
@@ -117,17 +160,17 @@ def test_typecast_to_decimal(data, from_dtype, to_dtype):
     [Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)],
 )
 def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
-    actual = data.astype(from_dtype)
-    expected = actual
+    got = data.astype(from_dtype)
 
-    actual = actual.astype(to_dtype)
-    pa_arr = expected.to_arrow().cast(
+    pa_arr = got.to_arrow().cast(
         pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
     )
     expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))
 
-    assert_eq(actual, expected)
-    assert_eq(actual.dtype, expected.dtype)
+    got = got.astype(to_dtype)
+
+    assert_eq(got, expected)
+    assert_eq(got.dtype, expected.dtype)
 
 
 @pytest.mark.parametrize(
@@ -151,14 +194,15 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
 )
 @pytest.mark.parametrize(
     "from_dtype",
-    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
+    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(17, 10)],
 )
-@pytest.mark.parametrize("to_dtype", FLOAT_TYPES)
+@pytest.mark.parametrize("to_dtype", NUMERIC_TYPES)
 def test_typecast_from_decimal(data, from_dtype, to_dtype):
-    actual = data.astype(from_dtype)
-    pa_arr = actual.to_arrow().cast(to_dtype, safe=False)
+    got = data.astype(from_dtype)
+    pa_arr = got.to_arrow().cast(to_dtype, safe=False)
 
-    actual = actual.astype(to_dtype)
+    got = got.astype(to_dtype)
     expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))
 
-    assert_eq(actual, expected)
+    assert_eq(got, expected)
+    assert_eq(got.dtype, expected.dtype)