Skip to content

Commit

Permalink
Enable type conversion from float to decimal type (#7450)
Browse files Browse the repository at this point in the history
This implements typecasting between `decimal` and `float` types.

Addresses half of #7440

Authors:
  - @ChrisJar

Approvers:
  - Ram (Ramakrishna Prabhu) (@rgsl888prabhu)
  - Ashwin Srinath (@shwina)

URL: #7450
  • Loading branch information
ChrisJar authored Mar 10, 2021
1 parent 2e4b5a6 commit 8c44d62
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 10 deletions.
26 changes: 19 additions & 7 deletions python/cudf/cudf/_lib/unary.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2021, NVIDIA CORPORATION.

from enum import IntEnum
from cudf.utils.dtypes import is_decimal_dtype

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
Expand All @@ -27,6 +28,7 @@ from cudf._lib.cpp.unary cimport (
from cudf._lib.types cimport underlying_type_t_type_id

cimport cudf._lib.cpp.unary as libcudf_unary
cimport cudf._lib.cpp.types as libcudf_types


class UnaryOp(IntEnum):
Expand Down Expand Up @@ -93,14 +95,24 @@ def is_valid(Column input):

def cast(Column input, object dtype=np.float64):
cdef column_view c_input = input.view()
cdef type_id tid = (
<type_id> (
<underlying_type_t_type_id> (
np_to_cudf_types[np.dtype(dtype)]
cdef type_id tid
cdef data_type c_dtype

# TODO: Use dtype_to_data_type when it becomes available
# to simplify this conversion
if is_decimal_dtype(dtype):
tid = libcudf_types.type_id.DECIMAL64
c_dtype = data_type(tid, -dtype.scale)
else:
tid = (
<type_id> (
<underlying_type_t_type_id> (
np_to_cudf_types[np.dtype(dtype)]
)
)
)
)
cdef data_type c_dtype = data_type(tid)
c_dtype = data_type(tid)

cdef unique_ptr[column] c_result

with nogil:
Expand Down
25 changes: 23 additions & 2 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@
import cupy as cp
import numpy as np
import pyarrow as pa

from pandas.api.types import is_integer_dtype
from typing import cast

from cudf import _lib as libcudf
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase
from cudf.core.dtypes import Decimal64Dtype
from cudf.utils.utils import pa_mask_buffer_to_mask

from cudf._typing import Dtype
from cudf._lib.strings.convert.convert_fixed_point import (
from_decimal as cpp_from_decimal,
)
from cudf._typing import Dtype
from cudf.core.column import as_column


Expand Down Expand Up @@ -67,6 +68,26 @@ def binary_operator(self, op, other, reflect=False):
result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)
return result

def as_decimal_column(
self, dtype: Dtype, **kwargs
) -> "cudf.core.column.DecimalColumn":
if dtype == self.dtype:
return self
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result

def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
if is_integer_dtype(dtype):
raise NotImplementedError(
"Casting from decimal types to integer "
"types not currently supported"
)
return libcudf.unary.cast(self, dtype)

def as_string_column(
self, dtype: Dtype, format=None
) -> "cudf.core.column.StringColumn":
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,19 @@ def as_timedelta_column(
),
)

def as_decimal_column(
self, dtype: Dtype, **kwargs
) -> "cudf.core.column.DecimalColumn":
if is_integer_dtype(self.dtype):
raise NotImplementedError(
"Casting from integer types to decimal "
"types not currently supported"
)
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result

def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
dtype = np.dtype(dtype)
if dtype == self.dtype:
Expand Down
123 changes: 122 additions & 1 deletion python/cudf/cudf/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@

from decimal import Decimal

import numpy as np
import pyarrow as pa
import pytest
import cudf

from cudf.core.column import DecimalColumn
from cudf.core.dtypes import Decimal64Dtype
from cudf.core.column import DecimalColumn, NumericalColumn

from cudf.tests.utils import (
FLOAT_TYPES,
assert_eq,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -41,3 +49,116 @@ def test_from_arrow_max_precision():
DecimalColumn.from_arrow(
pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
)


@pytest.mark.parametrize(
"data",
[
cudf.Series(
[
14.12302,
97938.2,
np.nan,
0.0,
-8.302014,
np.nan,
94.31304,
-112.2314,
0.3333333,
np.nan,
]
),
],
)
@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
@pytest.mark.parametrize(
"to_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_to_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale)
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)


@pytest.mark.parametrize(
"data",
[
cudf.Series(
[
14.12309,
2.343942,
np.nan,
0.0,
-8.302082,
np.nan,
94.31308,
-112.2364,
-8.029972,
np.nan,
]
),
],
)
@pytest.mark.parametrize(
"from_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
)
@pytest.mark.parametrize(
"to_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)],
)
def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)


@pytest.mark.parametrize(
"data",
[
cudf.Series(
[
14.12309,
2.343942,
np.nan,
0.0,
-8.302082,
np.nan,
94.31308,
-112.2364,
-8.029972,
np.nan,
]
),
],
)
@pytest.mark.parametrize(
"from_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
)
@pytest.mark.parametrize("to_dtype", FLOAT_TYPES)
def test_typecast_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
pa_arr = actual.to_arrow().cast(to_dtype, safe=False)

actual = actual.astype(to_dtype)
expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)

0 comments on commit 8c44d62

Please sign in to comment.