Skip to content

Commit

Permalink
Avoid .ordered and .categories from being settable in CategoricalColu…
Browse files Browse the repository at this point in the history
…mn and CategoricalDtype (#15475)

A rehash of #14979

The `CategoricalDtype.ordered` behavior matches `pandas.CategoricalDtype.ordered` behavior.

Also combines `as_ordered` and `as_unordred` into 1 method, and avoids to `as_index` casts that are already performed elsewhere

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15475
  • Loading branch information
mroeschke authored Apr 9, 2024
1 parent f1a3db2 commit 54eff4e
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 42 deletions.
53 changes: 18 additions & 35 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def categories(self) -> "cudf.core.index.Index":
"""
The categories of this categorical.
"""
return cudf.core.index.as_index(self._column.categories)
return self._column.dtype.categories

@property
def codes(self) -> "cudf.Series":
Expand Down Expand Up @@ -165,7 +165,7 @@ def as_ordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1 < 2 < 10]
"""
return self._return_or_inplace(self._column.as_ordered())
return self._return_or_inplace(self._column.as_ordered(ordered=True))

def as_unordered(self) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -212,8 +212,7 @@ def as_unordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1, 2, 10]
"""

return self._return_or_inplace(self._column.as_unordered())
return self._return_or_inplace(self._column.as_ordered(ordered=False))

def add_categories(self, new_categories: Any) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -631,10 +630,6 @@ def codes(self) -> NumericalColumn:
def ordered(self) -> bool:
return self.dtype.ordered

@ordered.setter
def ordered(self, value: bool):
self.dtype.ordered = value

def __setitem__(self, key, value):
if cudf.api.types.is_scalar(
value
Expand Down Expand Up @@ -1170,9 +1165,11 @@ def _get_decategorized_column(self) -> ColumnBase:
def copy(self, deep: bool = True) -> Self:
result_col = super().copy(deep=deep)
if deep:
result_col.categories = libcudf.copying.copy_column(
self.dtype._categories
dtype_copy = CategoricalDtype(
categories=self.categories.copy(),
ordered=self.ordered,
)
result_col = cast(Self, result_col._with_type_metadata(dtype_copy))
return result_col

@cached_property
Expand Down Expand Up @@ -1411,31 +1408,17 @@ def reorder_categories(
)
return self._set_categories(new_categories, ordered=ordered)

def as_ordered(self):
out_col = self
if not out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=True,
)
return out_col

def as_unordered(self):
out_col = self
if out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=False,
)
return out_col
def as_ordered(self, ordered: bool):
if self.dtype.ordered == ordered:
return self
return column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=ordered,
)


def _create_empty_categorical_column(
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,6 @@ def ordered(self) -> bool:
"""
return self._ordered

@ordered.setter
def ordered(self, value) -> None:
self._ordered = value

@classmethod
def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
"""
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2624,9 +2624,9 @@ def __init__(
elif isinstance(dtype, (pd.CategoricalDtype, cudf.CategoricalDtype)):
data = data.set_categories(dtype.categories, ordered=ordered)
elif ordered is True and data.ordered is False:
data = data.as_ordered()
data = data.as_ordered(ordered=True)
elif ordered is False and data.ordered is True:
data = data.as_unordered()
data = data.as_ordered(ordered=False)
super().__init__(data, **kwargs)

@property # type: ignore
Expand All @@ -2643,7 +2643,7 @@ def categories(self):
"""
The categories of this categorical.
"""
return as_index(self._values.categories)
return self.dtype.categories

def _is_boolean(self):
return False
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,11 @@ def test_empty_series_category_cast(ordered):
assert_eq(expected.dtype.ordered, actual.dtype.ordered)


def test_categorical_dtype_ordered_not_settable():
with pytest.raises(AttributeError):
cudf.CategoricalDtype().ordered = False


@pytest.mark.parametrize("scalar", [1, "a", None, 10.2])
def test_cat_from_scalar(scalar):
ps = pd.Series(scalar, dtype="category")
Expand Down

0 comments on commit 54eff4e

Please sign in to comment.