diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx index 70cbc56e525..6e20dcaf299 100644 --- a/python/cudf/cudf/_lib/unary.pyx +++ b/python/cudf/cudf/_lib/unary.pyx @@ -1,6 +1,7 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. from enum import IntEnum +from cudf.utils.dtypes import is_decimal_dtype from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -27,6 +28,7 @@ from cudf._lib.cpp.unary cimport ( from cudf._lib.types cimport underlying_type_t_type_id cimport cudf._lib.cpp.unary as libcudf_unary +cimport cudf._lib.cpp.types as libcudf_types class UnaryOp(IntEnum): @@ -93,14 +95,24 @@ def is_valid(Column input): def cast(Column input, object dtype=np.float64): cdef column_view c_input = input.view() - cdef type_id tid = ( - ( - ( - np_to_cudf_types[np.dtype(dtype)] + cdef type_id tid + cdef data_type c_dtype + + # TODO: Use dtype_to_data_type when it becomes available + # to simplify this conversion + if is_decimal_dtype(dtype): + tid = libcudf_types.type_id.DECIMAL64 + c_dtype = data_type(tid, -dtype.scale) + else: + tid = ( + ( + ( + np_to_cudf_types[np.dtype(dtype)] + ) ) ) - ) - cdef data_type c_dtype = data_type(tid) + c_dtype = data_type(tid) + cdef unique_ptr[column] c_result with nogil: diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 0056b3a8454..4766426892a 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -4,7 +4,7 @@ import cupy as cp import numpy as np import pyarrow as pa - +from pandas.api.types import is_integer_dtype from typing import cast from cudf import _lib as libcudf @@ -12,10 +12,11 @@ from cudf.core.column import ColumnBase from cudf.core.dtypes import Decimal64Dtype from cudf.utils.utils import pa_mask_buffer_to_mask + +from cudf._typing import Dtype from cudf._lib.strings.convert.convert_fixed_point import ( from_decimal as cpp_from_decimal, ) -from cudf._typing import Dtype from cudf.core.column import as_column @@ -67,6 +68,26 @@ def binary_operator(self, op, other, reflect=False): result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) return result + def as_decimal_column( + self, dtype: Dtype, **kwargs + ) -> "cudf.core.column.DecimalColumn": + if dtype == self.dtype: + return self + result = libcudf.unary.cast(self, dtype) + if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): + result.dtype.precision = dtype.precision + return result + + def as_numerical_column( + self, dtype: Dtype + ) -> "cudf.core.column.NumericalColumn": + if is_integer_dtype(dtype): + raise NotImplementedError( + "Casting from decimal types to integer " + "types not currently supported" + ) + return libcudf.unary.cast(self, dtype) + def as_string_column( self, dtype: Dtype, format=None ) -> "cudf.core.column.StringColumn": diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index f9b695e9ce3..6fae8c644e3 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -205,6 +205,19 @@ def as_timedelta_column( ), ) + def as_decimal_column( + self, dtype: Dtype, **kwargs + ) -> "cudf.core.column.DecimalColumn": + if is_integer_dtype(self.dtype): + raise NotImplementedError( + "Casting from integer types to decimal " + "types not currently supported" + ) + result = libcudf.unary.cast(self, dtype) + if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): + result.dtype.precision = dtype.precision + return result + def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: dtype = np.dtype(dtype) if dtype == self.dtype: diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index f73a785727b..ddf56828c3d 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -2,10 +2,18 @@ from decimal import Decimal +import numpy as np import pyarrow as pa import pytest +import cudf -from cudf.core.column import DecimalColumn +from cudf.core.dtypes import Decimal64Dtype +from cudf.core.column import DecimalColumn, NumericalColumn + +from cudf.tests.utils import ( + FLOAT_TYPES, + assert_eq, +) @pytest.mark.parametrize( @@ -41,3 +49,116 @@ def test_from_arrow_max_precision(): DecimalColumn.from_arrow( pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19)) ) + + +@pytest.mark.parametrize( + "data", + [ + cudf.Series( + [ + 14.12302, + 97938.2, + np.nan, + 0.0, + -8.302014, + np.nan, + 94.31304, + -112.2314, + 0.3333333, + np.nan, + ] + ), + ], +) +@pytest.mark.parametrize("from_dtype", FLOAT_TYPES) +@pytest.mark.parametrize( + "to_dtype", + [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)], +) +def test_typecast_to_decimal(data, from_dtype, to_dtype): + actual = data.astype(from_dtype) + expected = actual + + actual = actual.astype(to_dtype) + pa_arr = expected.to_arrow().cast( + pa.decimal128(to_dtype.precision, to_dtype.scale) + ) + expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) + + assert_eq(actual, expected) + assert_eq(actual.dtype, expected.dtype) + + +@pytest.mark.parametrize( + "data", + [ + cudf.Series( + [ + 14.12309, + 2.343942, + np.nan, + 0.0, + -8.302082, + np.nan, + 94.31308, + -112.2364, + -8.029972, + np.nan, + ] + ), + ], +) +@pytest.mark.parametrize( + "from_dtype", + [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)], +) +@pytest.mark.parametrize( + "to_dtype", + [Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)], +) +def test_typecast_to_from_decimal(data, from_dtype, to_dtype): + actual = data.astype(from_dtype) + expected = actual + + actual = actual.astype(to_dtype) + pa_arr = expected.to_arrow().cast( + pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False + ) + expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) + + assert_eq(actual, expected) + assert_eq(actual.dtype, expected.dtype) + + +@pytest.mark.parametrize( + "data", + [ + cudf.Series( + [ + 14.12309, + 2.343942, + np.nan, + 0.0, + -8.302082, + np.nan, + 94.31308, + -112.2364, + -8.029972, + np.nan, + ] + ), + ], +) +@pytest.mark.parametrize( + "from_dtype", + [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)], +) +@pytest.mark.parametrize("to_dtype", FLOAT_TYPES) +def test_typecast_from_decimal(data, from_dtype, to_dtype): + actual = data.astype(from_dtype) + pa_arr = actual.to_arrow().cast(to_dtype, safe=False) + + actual = actual.astype(to_dtype) + expected = cudf.Series(NumericalColumn.from_arrow(pa_arr)) + + assert_eq(actual, expected)