diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index d879b2ec4e2..fbd4cf492de 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -2,6 +2,7 @@ import decimal import numpy as np +import pyarrow as pa from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar from cudf.core.column.column import ColumnBase @@ -114,44 +115,36 @@ def _device_value_to_host(self): self._host_value = self._device_value._to_host_scalar() def _preprocess_host_value(self, value, dtype): - if isinstance(dtype, Decimal64Dtype): - # TODO: Support coercion from decimal.Decimal to different dtype - # TODO: Support coercion from integer to Decimal64Dtype - raise NotImplementedError( - "dtype as cudf.Decimal64Dtype is not supported. Pass a " - "decimal.Decimal to construct a DecimalScalar." - ) - if isinstance(value, decimal.Decimal) and dtype is not None: - raise TypeError(f"Can not coerce decimal to {dtype}") - - value = to_cudf_compatible_scalar(value, dtype=dtype) valid = not _is_null_host_scalar(value) - if isinstance(value, decimal.Decimal): - # 0.0042 -> Decimal64Dtype(2, 4) + if isinstance(dtype, Decimal64Dtype): + value = pa.scalar( + value, type=pa.decimal128(dtype.precision, dtype.scale) + ).as_py() + if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) - else: - if dtype is None: - if not valid: - if isinstance(value, (np.datetime64, np.timedelta64)): - unit, _ = np.datetime_data(value) - if unit == "generic": - raise TypeError( - "Cant convert generic NaT to null scalar" - ) - else: - dtype = value.dtype - else: + value = to_cudf_compatible_scalar(value, dtype=dtype) + + if dtype is None: + if not valid: + if isinstance(value, (np.datetime64, np.timedelta64)): + unit, _ = np.datetime_data(value) + if unit == "generic": raise TypeError( - "dtype required when constructing a null scalar" + "Cant convert generic NaT to null scalar" ) + else: + dtype = value.dtype else: - dtype = value.dtype - dtype = np.dtype(dtype) + raise TypeError( + "dtype required when constructing a null scalar" + ) + else: + dtype = value.dtype - # temporary - dtype = np.dtype("object") if dtype.char == "U" else dtype + if not isinstance(dtype, Decimal64Dtype): + dtype = np.dtype(dtype) if not valid: value = NA diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index 70fc63baba8..f6502c4c1fd 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -201,3 +201,89 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype): expected = cudf.Series(NumericalColumn.from_arrow(pa_arr)) assert_eq(got, expected) + assert_eq(got.dtype, expected.dtype) + + +def _decimal_series(input, dtype): + return cudf.Series( + [x if x is None else Decimal(x) for x in input], dtype=dtype, + ) + + +@pytest.mark.parametrize( + "args", + [ + # scatter to a single index + ( + ["1", "2", "3"], + Decimal64Dtype(1, 0), + Decimal(5), + 1, + ["1", "5", "3"], + ), + ( + ["1.5", "2.5", "3.5"], + Decimal64Dtype(2, 1), + Decimal("5.5"), + 1, + ["1.5", "5.5", "3.5"], + ), + ( + ["1.0042", "2.0042", "3.0042"], + Decimal64Dtype(5, 4), + Decimal("5.0042"), + 1, + ["1.0042", "5.0042", "3.0042"], + ), + # scatter via boolmask + ( + ["1", "2", "3"], + Decimal64Dtype(1, 0), + Decimal(5), + cudf.Series([True, False, True]), + ["5", "2", "5"], + ), + ( + ["1.5", "2.5", "3.5"], + Decimal64Dtype(2, 1), + Decimal("5.5"), + cudf.Series([True, True, True]), + ["5.5", "5.5", "5.5"], + ), + ( + ["1.0042", "2.0042", "3.0042"], + Decimal64Dtype(5, 4), + Decimal("5.0042"), + cudf.Series([False, False, True]), + ["1.0042", "2.0042", "5.0042"], + ), + # We will allow assigning a decimal with less precision + ( + ["1.00", "2.00", "3.00"], + Decimal64Dtype(3, 2), + Decimal(5), + 1, + ["1.00", "5.00", "3.00"], + ), + # But not truncation + ( + ["1", "2", "3"], + Decimal64Dtype(1, 0), + Decimal("5.5"), + 1, + pa.lib.ArrowInvalid, + ), + ], +) +def test_series_setitem_decimal(args): + data, dtype, item, to, expect = args + data = _decimal_series(data, dtype) + + if expect is pa.lib.ArrowInvalid: + with pytest.raises(expect): + data[to] = item + return + else: + expect = _decimal_series(expect, dtype) + data[to] = item + assert_eq(data, expect) diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py index 916e73ea381..42939f8129a 100644 --- a/python/cudf/cudf/tests/test_scalar.py +++ b/python/cudf/cudf/tests/test_scalar.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +import pyarrow as pa import pytest import cudf @@ -194,14 +195,13 @@ def test_scalar_roundtrip(value): + TEST_DECIMAL_TYPES, ) def test_null_scalar(dtype): - if isinstance(dtype, cudf.Decimal64Dtype): - with pytest.raises(NotImplementedError): - s = cudf.Scalar(None, dtype=dtype) - return - s = cudf.Scalar(None, dtype=dtype) assert s.value is cudf.NA - assert s.dtype == np.dtype(dtype) + assert s.dtype == ( + np.dtype(dtype) + if not isinstance(dtype, cudf.Decimal64Dtype) + else dtype + ) assert s.is_valid() is False @@ -234,25 +234,36 @@ def test_generic_null_scalar_construction_fails(value): @pytest.mark.parametrize( - "dtype", - NUMERIC_TYPES - + DATETIME_TYPES - + TIMEDELTA_TYPES - + ["object"] - + TEST_DECIMAL_TYPES, + "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["object"] ) def test_scalar_dtype_and_validity(dtype): - if isinstance(dtype, cudf.Decimal64Dtype): - with pytest.raises(NotImplementedError): - s = cudf.Scalar(None, dtype=dtype) - return - s = cudf.Scalar(1, dtype=dtype) assert s.dtype == np.dtype(dtype) assert s.is_valid() is True +@pytest.mark.parametrize( + "slr,dtype,expect", + [ + (1, cudf.Decimal64Dtype(1, 0), Decimal("1")), + (Decimal(1), cudf.Decimal64Dtype(1, 0), Decimal("1")), + (Decimal("1.1"), cudf.Decimal64Dtype(2, 1), Decimal("1.1")), + (Decimal("1.1"), cudf.Decimal64Dtype(4, 3), Decimal("1.100")), + (Decimal("1.11"), cudf.Decimal64Dtype(2, 2), pa.lib.ArrowInvalid), + ], +) +def test_scalar_dtype_and_validity_decimal(slr, dtype, expect): + if expect is pa.lib.ArrowInvalid: + with pytest.raises(expect): + cudf.Scalar(slr, dtype=dtype) + return + else: + result = cudf.Scalar(slr, dtype=dtype) + assert result.dtype == dtype + assert result.is_valid + + @pytest.mark.parametrize( "value", [ diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index c69ccb0f42e..b426269458f 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -461,7 +461,7 @@ def get_appropriate_dispatched_func( def _cast_to_appropriate_cudf_type(val, index=None): # Handle scalar if val.ndim == 0: - return cudf.Scalar(val).value + return to_cudf_compatible_scalar(val) # 1D array elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1): # if index is not None and is of a different length