Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid .ordered and .categories from being settable in CategoricalColumn and CategoricalDtype #15475

Merged
merged 5 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 18 additions & 35 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def categories(self) -> "cudf.core.index.Index":
"""
The categories of this categorical.
"""
return cudf.core.index.as_index(self._column.categories)
return self._column.dtype.categories

@property
def codes(self) -> "cudf.Series":
Expand Down Expand Up @@ -165,7 +165,7 @@ def as_ordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1 < 2 < 10]
"""
return self._return_or_inplace(self._column.as_ordered())
return self._return_or_inplace(self._column.as_ordered(ordered=True))

def as_unordered(self) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -212,8 +212,7 @@ def as_unordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1, 2, 10]
"""

return self._return_or_inplace(self._column.as_unordered())
return self._return_or_inplace(self._column.as_ordered(ordered=False))

def add_categories(self, new_categories: Any) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -631,10 +630,6 @@ def codes(self) -> NumericalColumn:
def ordered(self) -> bool:
return self.dtype.ordered

@ordered.setter
def ordered(self, value: bool):
self.dtype.ordered = value

def __setitem__(self, key, value):
if cudf.api.types.is_scalar(
value
Expand Down Expand Up @@ -1170,9 +1165,11 @@ def _get_decategorized_column(self) -> ColumnBase:
def copy(self, deep: bool = True) -> Self:
result_col = super().copy(deep=deep)
if deep:
result_col.categories = libcudf.copying.copy_column(
self.dtype._categories
dtype_copy = CategoricalDtype(
categories=self.categories.copy(),
ordered=self.ordered,
)
result_col = cast(Self, result_col._with_type_metadata(dtype_copy))
return result_col

@cached_property
Expand Down Expand Up @@ -1411,31 +1408,17 @@ def reorder_categories(
)
return self._set_categories(new_categories, ordered=ordered)

def as_ordered(self):
out_col = self
if not out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=True,
)
return out_col

def as_unordered(self):
out_col = self
if out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=False,
)
return out_col
def as_ordered(self, ordered: bool):
if self.dtype.ordered == ordered:
return self
return column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=ordered,
)


def _create_empty_categorical_column(
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,6 @@ def ordered(self) -> bool:
"""
return self._ordered

@ordered.setter
def ordered(self, value) -> None:
self._ordered = value

@classmethod
def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
"""
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2624,9 +2624,9 @@ def __init__(
elif isinstance(dtype, (pd.CategoricalDtype, cudf.CategoricalDtype)):
data = data.set_categories(dtype.categories, ordered=ordered)
elif ordered is True and data.ordered is False:
data = data.as_ordered()
data = data.as_ordered(ordered=True)
elif ordered is False and data.ordered is True:
data = data.as_unordered()
data = data.as_ordered(ordered=False)
super().__init__(data, **kwargs)

@property # type: ignore
Expand All @@ -2643,7 +2643,7 @@ def categories(self):
"""
The categories of this categorical.
"""
return as_index(self._values.categories)
return self.dtype.categories

def _is_boolean(self):
return False
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,11 @@ def test_empty_series_category_cast(ordered):
assert_eq(expected.dtype.ordered, actual.dtype.ordered)


def test_categorical_dtype_ordered_not_settable():
with pytest.raises(AttributeError):
cudf.CategoricalDtype().ordered = False


@pytest.mark.parametrize("scalar", [1, "a", None, 10.2])
def test_cat_from_scalar(scalar):
ps = pd.Series(scalar, dtype="category")
Expand Down
Loading