Skip to content

Commit

Permalink
DEPR: Series[categorical].replace special-casing (#56385)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Dec 9, 2023
1 parent ebde354 commit 536ce30
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 23 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ Other Deprecations
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
- Deprecated the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype`; in a future version replace will change the values while preserving the categories. To change the categories, use ``ser.cat.rename_categories`` instead (:issue:`55147`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2626,6 +2626,8 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
def _replace(self, *, to_replace, value, inplace: bool = False):
from pandas import Index

orig_dtype = self.dtype

inplace = validate_bool_kwarg(inplace, "inplace")
cat = self if inplace else self.copy()

Expand Down Expand Up @@ -2656,6 +2658,17 @@ def _replace(self, *, to_replace, value, inplace: bool = False):
new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered)
NDArrayBacked.__init__(cat, new_codes, new_dtype)

if new_dtype != orig_dtype:
warnings.warn(
# GH#55147
"The behavior of Series.replace (and DataFrame.replace) with "
"CategoricalDtype is deprecated. In a future version, replace "
"will only be used for cases that preserve the categories. "
"To change the categories, use ser.cat.rename_categories "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if not inplace:
return cat

Expand Down
28 changes: 24 additions & 4 deletions pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
([1, 2, "3"], "5", ["5", "5", 3], True),
],
)
@pytest.mark.filterwarnings(
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
)
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
# GH 31720

Expand Down Expand Up @@ -60,7 +63,13 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
# GH#26988
cat = Categorical(["a", "b"])
expected = Categorical(result)
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if expected_error_msg is not None else None
with tm.assert_produces_warning(warn, match=msg):
result = pd.Series(cat, copy=False).replace(to_replace, value)._values

tm.assert_categorical_equal(result, expected)
if to_replace == "b": # the "c" test is supposed to be unchanged
Expand All @@ -69,14 +78,20 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
tm.assert_categorical_equal(cat, expected)

ser = pd.Series(cat, copy=False)
ser.replace(to_replace, value, inplace=True)
with tm.assert_produces_warning(warn, match=msg):
ser.replace(to_replace, value, inplace=True)
tm.assert_categorical_equal(cat, expected)


def test_replace_categorical_ea_dtype():
# GH49404
cat = Categorical(pd.array(["a", "b"], dtype="string"))
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
tm.assert_categorical_equal(result, expected)

Expand All @@ -85,7 +100,12 @@ def test_replace_maintain_ordering():
# GH51016
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
ser = pd.Series([0, 1, 2], dtype=dtype)
result = ser.replace(0, 2)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(0, 2)
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
tm.assert_series_equal(expected, result, check_category_order=True)
41 changes: 35 additions & 6 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,19 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write):
def test_replace_list_categorical(using_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
arr = get_array(df, "a")
df.replace(["c"], value="a", inplace=True)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df.replace(["c"], value="a", inplace=True)
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)

df_orig = df.copy()
df2 = df.replace(["b"], value="a")
with tm.assert_produces_warning(FutureWarning, match=msg):
df2 = df.replace(["b"], value="a")
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)

tm.assert_frame_equal(df, df_orig)
Expand All @@ -178,7 +184,12 @@ def test_replace_list_inplace_refs_categorical(using_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
view = df[:]
df_orig = df.copy()
df.replace(["c"], value="a", inplace=True)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df.replace(["c"], value="a", inplace=True)
if using_copy_on_write:
assert not np.shares_memory(
get_array(view, "a").codes, get_array(df, "a").codes
Expand Down Expand Up @@ -238,7 +249,13 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl
df_orig = df.copy()
arr_a = get_array(df, "a")
view = df[:]
df.replace(to_replace=to_replace, value=val, inplace=True)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df.replace(to_replace=to_replace, value=val, inplace=True)

if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
Expand All @@ -253,7 +270,13 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl
def test_replace_categorical_inplace(using_copy_on_write, val):
df = DataFrame({"a": Categorical([1, 2, 3])})
arr_a = get_array(df, "a")
df.replace(to_replace=1, value=val, inplace=True)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df.replace(to_replace=1, value=val, inplace=True)

assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
if using_copy_on_write:
Expand All @@ -267,7 +290,13 @@ def test_replace_categorical_inplace(using_copy_on_write, val):
def test_replace_categorical(using_copy_on_write, val):
df = DataFrame({"a": Categorical([1, 2, 3])})
df_orig = df.copy()
df2 = df.replace(to_replace=1, value=val)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df2 = df.replace(to_replace=1, value=val)

if using_copy_on_write:
assert df._mgr._has_no_reference(0)
Expand Down
25 changes: 19 additions & 6 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,9 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data):
b = pd.Categorical(final_data[:, 1], categories=ex_cat)

expected = DataFrame({"a": a, "b": b})
result = df.replace(replace_dict, 3)
msg2 = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = df.replace(replace_dict, 3)
tm.assert_frame_equal(result, expected)
msg = (
r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are "
Expand All @@ -1288,7 +1290,8 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data):
with pytest.raises(AssertionError, match=msg):
# ensure non-inplace call does not affect original
tm.assert_frame_equal(df, expected)
return_value = df.replace(replace_dict, 3, inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg2):
return_value = df.replace(replace_dict, 3, inplace=True)
assert return_value is None
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -1438,9 +1441,14 @@ def test_replace_value_category_type(self):
)

# replace values in input dataframe
input_df = input_df.replace("d", "z")
input_df = input_df.replace("obj1", "obj9")
result = input_df.replace("cat2", "catX")
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
input_df = input_df.replace("d", "z")
input_df = input_df.replace("obj1", "obj9")
result = input_df.replace("cat2", "catX")

tm.assert_frame_equal(result, expected)

Expand All @@ -1466,7 +1474,12 @@ def test_replace_dict_category_type(self):
)

# replace values in input dataframe using a dict
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})

tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,9 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki

gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
expected["x"] = expected["x"].replace(4, None)
expected["x"] = expected["x"].cat.remove_categories([4])
if index_kind == "multi":
expected["x2"] = expected["x2"].replace(4, None)
expected["x2"] = expected["x2"].cat.remove_categories([4])
if as_index:
if index_kind == "multi":
expected = expected.set_index(["x", "x2"])
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,15 @@ def test_latin_encoding(tmp_path, setup_path, dtype, val):
ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
retr = read_hdf(store, key)

s_nan = ser.replace(nan_rep, np.nan)
# TODO:(3.0): once Categorical replace deprecation is enforced,
# we may be able to re-simplify the construction of s_nan
if dtype == "category":
if nan_rep in ser.cat.categories:
s_nan = ser.cat.remove_categories([nan_rep])
else:
s_nan = ser
else:
s_nan = ser.replace(nan_rep, np.nan)

tm.assert_series_equal(s_nan, retr)

Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ def test_replace_categorical(self, categorical, numeric):
# GH 24971, GH#23305
ser = pd.Series(categorical)
msg = "Downcasting behavior in `replace`"
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
Expand All @@ -418,7 +419,9 @@ def test_replace_categorical(self, categorical, numeric):
def test_replace_categorical_inplace(self, data, data_exp):
# GH 53358
result = pd.Series(data, dtype="category")
result.replace(to_replace="a", value="b", inplace=True)
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result.replace(to_replace="a", value="b", inplace=True)
expected = pd.Series(data_exp, dtype="category")
tm.assert_series_equal(result, expected)

Expand All @@ -434,16 +437,22 @@ def test_replace_categorical_single(self):
expected = expected.cat.remove_unused_categories()
assert c[2] != "foo"

result = c.replace(c[2], "foo")
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = c.replace(c[2], "foo")
tm.assert_series_equal(expected, result)
assert c[2] != "foo" # ensure non-inplace call does not alter original

return_value = c.replace(c[2], "foo", inplace=True)
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[2], "foo", inplace=True)
assert return_value is None
tm.assert_series_equal(expected, c)

first_value = c[0]
return_value = c.replace(c[1], c[0], inplace=True)
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[1], c[0], inplace=True)
assert return_value is None
assert c[0] == c[1] == first_value # test replacing with existing value

Expand Down

0 comments on commit 536ce30

Please sign in to comment.