diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 2d9438b515f..94649069060 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -40,7 +40,12 @@ merge, ) from cudf.core.algorithms import factorize -from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype +from cudf.core.dtypes import ( + CategoricalDtype, + Decimal64Dtype, + ListDtype, + StructDtype, +) from cudf.core.groupby import Grouper from cudf.core.ops import ( add, diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 8531ec18edc..f80bef79c79 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1414,6 +1414,8 @@ def _copy_type_metadata(self: T, other: ColumnBase) -> ColumnBase: of `other` and the categories of `self`. * when both `self` and `other` are StructColumns, rename the fields of `other` to the field names of `self`. + * when both `self` and `other` are DecimalColumns, copy the precision + from self.dtype to other.dtype * when `self` and `other` are nested columns of the same type, recursively apply this function on the children of `self` to the and the children of `other`. @@ -1437,6 +1439,11 @@ def _copy_type_metadata(self: T, other: ColumnBase) -> ColumnBase: ): other = other._rename_fields(self.dtype.fields.keys()) + if isinstance(other, cudf.core.column.DecimalColumn) and isinstance( + self, cudf.core.column.DecimalColumn + ): + other.dtype.precision = self.dtype.precision + if type(self) is type(other): if self.base_children and other.base_children: base_children = tuple( diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index e93c5824817..d5568df6468 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -22,6 +22,8 @@ class DecimalColumn(ColumnBase): + dtype: Decimal64Dtype + @classmethod def from_arrow(cls, data: pa.Array): dtype = Decimal64Dtype.from_arrow(data.type) @@ -110,7 +112,8 @@ def normalize_binop_value(self, other): raise TypeError(f"cannot normalize {type(other)}") def _apply_scan_op(self, op: str) -> ColumnBase: - return libcudf.reduce.scan(op, self, True) + result = libcudf.reduce.scan(op, self, True) + return self._copy_type_metadata(result) def as_decimal_column( self, dtype: Dtype, **kwargs diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 0bde2eb551f..0c436cf36e7 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -14,7 +14,12 @@ from cudf._typing import Dtype -class CategoricalDtype(ExtensionDtype): +class _BaseDtype(ExtensionDtype): + # Base type for all cudf-specific dtypes + pass + + +class CategoricalDtype(_BaseDtype): ordered: Optional[bool] @@ -121,7 +126,7 @@ def deserialize(cls, header, frames): return cls(categories=categories, ordered=ordered) -class ListDtype(ExtensionDtype): +class ListDtype(_BaseDtype): _typ: pa.ListType name: str = "list" @@ -180,7 +185,7 @@ def __hash__(self): return hash(self._typ) -class StructDtype(ExtensionDtype): +class StructDtype(_BaseDtype): name = "struct" @@ -231,7 +236,7 @@ def __hash__(self): return hash(self._typ) -class Decimal64Dtype(ExtensionDtype): +class Decimal64Dtype(_BaseDtype): name = "decimal" _metadata = ("precision", "scale") diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index 80ff9d5734c..70fc63baba8 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -5,15 +5,14 @@ import numpy as np import pyarrow as pa import pytest -import cudf -from cudf.core.dtypes import Decimal64Dtype +import cudf from cudf.core.column import DecimalColumn, NumericalColumn - +from cudf.core.dtypes import Decimal64Dtype from cudf.tests.utils import ( - NUMERIC_TYPES, FLOAT_TYPES, INTEGER_TYPES, + NUMERIC_TYPES, assert_eq, ) @@ -88,7 +87,6 @@ def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype): got = got.astype(to_dtype) assert_eq(got, expected) - assert_eq(got.dtype, expected.dtype) @pytest.mark.parametrize( @@ -129,7 +127,6 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype): got = got.astype(to_dtype) assert_eq(got, expected) - assert_eq(got.dtype, expected.dtype) @pytest.mark.parametrize( @@ -170,7 +167,6 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype): got = got.astype(to_dtype) assert_eq(got, expected) - assert_eq(got.dtype, expected.dtype) @pytest.mark.parametrize( @@ -205,4 +201,3 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype): expected = cudf.Series(NumericalColumn.from_arrow(pa_arr)) assert_eq(got, expected) - assert_eq(got.dtype, expected.dtype) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index cec2623027f..086d59ab0f2 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1401,3 +1401,14 @@ def test_iloc_before_zero_terminate(arg, pobj): gobj = cudf.from_pandas(pobj) assert_eq(pobj.iloc[arg], gobj.iloc[arg]) + + +def test_iloc_decimal(): + sr = cudf.Series(["1.00", "2.00", "3.00", "4.00"]).astype( + cudf.Decimal64Dtype(scale=2, precision=3) + ) + got = sr.iloc[[3, 2, 1, 0]] + expect = cudf.Series(["4.00", "3.00", "2.00", "1.00"],).astype( + cudf.Decimal64Dtype(scale=2, precision=3) + ) + assert_eq(expect.reset_index(drop=True), got.reset_index(drop=True)) diff --git a/python/cudf/cudf/tests/utils.py b/python/cudf/cudf/tests/utils.py index 1163c3085e4..37a74ab4760 100644 --- a/python/cudf/cudf/tests/utils.py +++ b/python/cudf/cudf/tests/utils.py @@ -74,6 +74,17 @@ def assert_eq(left, right, **kwargs): without switching between assert_frame_equal/assert_series_equal/... functions. """ + # dtypes that we support but Pandas doesn't will convert to + # `object`. Check equality before that happens: + if kwargs.get("check_dtype", True): + if hasattr(left, "dtype") and hasattr(right, "dtype"): + if isinstance( + left.dtype, cudf.core.dtypes._BaseDtype + ) and not isinstance( + left.dtype, cudf.CategoricalDtype + ): # leave categorical comparison to Pandas + assert_eq(left.dtype, right.dtype) + if hasattr(left, "to_pandas"): left = left.to_pandas() if hasattr(right, "to_pandas"):