Skip to content

Commit

Permalink
Enable typecasting between decimal and int (#7691)
Browse files Browse the repository at this point in the history
This enables type conversions between `decimal` types and `int` types.

Closes #7440

Authors:
  - @ChrisJar

Approvers:
  - GALI PREM SAGAR (@galipremsagar)

URL: #7691
  • Loading branch information
ChrisJar authored Mar 30, 2021
1 parent 2d24a9b commit fdcdb96
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 57 deletions.
25 changes: 6 additions & 19 deletions python/cudf/cudf/_lib/unary.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ from cudf._lib.cpp.unary cimport (
unary_operator,
)

from cudf._lib.types cimport underlying_type_t_type_id
from cudf._lib.types cimport underlying_type_t_type_id, dtype_to_data_type

cimport cudf._lib.cpp.unary as libcudf_unary
cimport cudf._lib.cpp.types as libcudf_types
Expand Down Expand Up @@ -95,30 +95,17 @@ def is_valid(Column input):

def cast(Column input, object dtype=np.float64):
cdef column_view c_input = input.view()
cdef type_id tid
cdef data_type c_dtype

# TODO: Use dtype_to_data_type when it becomes available
# to simplify this conversion
if is_decimal_dtype(dtype):
tid = libcudf_types.type_id.DECIMAL64
c_dtype = data_type(tid, -dtype.scale)
else:
tid = (
<type_id> (
<underlying_type_t_type_id> (
np_to_cudf_types[np.dtype(dtype)]
)
)
)
c_dtype = data_type(tid)
cdef data_type c_dtype = dtype_to_data_type(dtype)

cdef unique_ptr[column] c_result

with nogil:
c_result = move(libcudf_unary.cast(c_input, c_dtype))

return Column.from_unique_ptr(move(c_result))
result = Column.from_unique_ptr(move(c_result))
if is_decimal_dtype(result.dtype):
result.dtype.precision = dtype.precision
return result


def is_nan(Column input):
Expand Down
11 changes: 1 addition & 10 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import cupy as cp
import numpy as np
import pyarrow as pa
from pandas.api.types import is_integer_dtype
from typing import cast

from cudf import _lib as libcudf
Expand Down Expand Up @@ -80,19 +79,11 @@ def as_decimal_column(
) -> "cudf.core.column.DecimalColumn":
if dtype == self.dtype:
return self
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result
return libcudf.unary.cast(self, dtype)

def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
if is_integer_dtype(dtype):
raise NotImplementedError(
"Casting from decimal types to integer "
"types not currently supported"
)
return libcudf.unary.cast(self, dtype)

def as_string_column(
Expand Down
10 changes: 1 addition & 9 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,7 @@ def as_timedelta_column(
def as_decimal_column(
self, dtype: Dtype, **kwargs
) -> "cudf.core.column.DecimalColumn":
if is_integer_dtype(self.dtype):
raise NotImplementedError(
"Casting from integer types to decimal "
"types not currently supported"
)
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result
return libcudf.unary.cast(self, dtype)

def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
dtype = np.dtype(dtype)
Expand Down
82 changes: 63 additions & 19 deletions python/cudf/cudf/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from cudf.core.column import DecimalColumn, NumericalColumn

from cudf.tests.utils import (
NUMERIC_TYPES,
FLOAT_TYPES,
INTEGER_TYPES,
assert_eq,
)

Expand Down Expand Up @@ -75,18 +77,59 @@ def test_from_arrow_max_precision():
"to_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_to_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual
def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
got = data.astype(from_dtype)

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa_arr = got.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale)
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)
got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
"data",
[
cudf.Series(
[
14.12302,
38.2,
np.nan,
0.0,
-8.302014,
np.nan,
94.31304,
np.nan,
-112.2314,
0.3333333,
np.nan,
]
),
],
)
@pytest.mark.parametrize("from_dtype", INTEGER_TYPES)
@pytest.mark.parametrize(
"to_dtype",
[Decimal64Dtype(9, 3), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
got = data.astype(from_dtype)

pa_arr = (
got.to_arrow()
.cast("float64")
.cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -117,17 +160,17 @@ def test_typecast_to_decimal(data, from_dtype, to_dtype):
[Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)],
)
def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual
got = data.astype(from_dtype)

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa_arr = got.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)
got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
Expand All @@ -151,14 +194,15 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
)
@pytest.mark.parametrize(
"from_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(17, 10)],
)
@pytest.mark.parametrize("to_dtype", FLOAT_TYPES)
@pytest.mark.parametrize("to_dtype", NUMERIC_TYPES)
def test_typecast_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
pa_arr = actual.to_arrow().cast(to_dtype, safe=False)
got = data.astype(from_dtype)
pa_arr = got.to_arrow().cast(to_dtype, safe=False)

actual = actual.astype(to_dtype)
got = got.astype(to_dtype)
expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)

0 comments on commit fdcdb96

Please sign in to comment.