Skip to content

Commit

Permalink
DEPR: downcasting in NDFrame.where, mask, clip (pandas-dev#53656)
Browse files Browse the repository at this point in the history
* DEPR: downcasting in NDFrame.where, mask, clip

* GH ref

* suppress warning in doctet

* add caller

* implement future.no_silent_downcasting

* move whatsnew to 2.2
  • Loading branch information
jbrockmendel authored Sep 1, 2023
1 parent bb08817 commit 80a1a8b
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
Expand Down
1 change: 1 addition & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_sparse", "is_sparse is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
("Series.idxmin", "The behavior of Series.idxmin"),
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,3 +902,14 @@ def register_converter_cb(key) -> None:
"(at which point this option will be deprecated).",
validator=is_one_of_factory([True, False]),
)

cf.register_option(
"no_silent_downcasting",
False,
"Whether to opt-in to the future behavior which will *not* silently "
"downcast results from Series and DataFrame `where`, `mask`, and `clip` "
"methods. "
"Silent downcasting will be removed in pandas 3.0 "
"(at which point this option will be deprecated).",
validator=is_one_of_factory([True, False]),
)
62 changes: 49 additions & 13 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@

import numpy as np

from pandas._config import using_copy_on_write
from pandas._config import (
get_option,
using_copy_on_write,
)

from pandas._libs import (
NaT,
Expand Down Expand Up @@ -495,7 +498,7 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:

@final
def _maybe_downcast(
self, blocks: list[Block], downcast=None, using_cow: bool = False
self, blocks: list[Block], downcast, using_cow: bool, caller: str
) -> list[Block]:
if downcast is False:
return blocks
Expand All @@ -507,14 +510,43 @@ def _maybe_downcast(
# but ATM it breaks too much existing code.
# split and convert the blocks

return extend_blocks(
nbs = extend_blocks(
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
)

if downcast is None:
elif downcast is None:
return blocks
elif caller == "where" and get_option("future.no_silent_downcasting") is True:
return blocks
else:
nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])

# When _maybe_downcast is called with caller="where", it is either
# a) with downcast=False, which is a no-op (the desired future behavior)
# b) with downcast="infer", which is _not_ passed by the user.
# In the latter case the future behavior is to stop doing inference,
# so we issue a warning if and only if some inference occurred.
if caller == "where":
# GH#53656
if len(blocks) != len(nbs) or any(
left.dtype != right.dtype for left, right in zip(blocks, nbs)
):
# In this case _maybe_downcast was _not_ a no-op, so the behavior
# will change, so we issue a warning.
warnings.warn(
"Downcasting behavior in Series and DataFrame methods 'where', "
"'mask', and 'clip' is deprecated. In a future "
"version this will not infer object dtypes or cast all-round "
"floats to integers. Instead call "
"result.infer_objects(copy=False) for object inference, "
"or cast round floats explicitly. To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)

return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
return nbs

@final
@maybe_split
Expand Down Expand Up @@ -1308,7 +1340,7 @@ def where(
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond, using_cow=using_cow)
return self._maybe_downcast(
blocks, downcast=_downcast, using_cow=using_cow
blocks, downcast=_downcast, using_cow=using_cow, caller="where"
)

else:
Expand Down Expand Up @@ -1404,7 +1436,9 @@ def fillna(
else:
# GH#45423 consistent downcasting on no-ops.
nb = self.copy(deep=not using_cow)
nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow)
nbs = nb._maybe_downcast(
[nb], downcast=downcast, using_cow=using_cow, caller="fillna"
)
return nbs

if limit is not None:
Expand All @@ -1422,7 +1456,9 @@ def fillna(
# different behavior in _maybe_downcast.
return extend_blocks(
[
blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)
blk._maybe_downcast(
[blk], downcast=downcast, using_cow=using_cow, caller="fillna"
)
for blk in nbs
]
)
Expand Down Expand Up @@ -1463,7 +1499,7 @@ def pad_or_backfill(
data = extract_array(new_values, extract_numpy=True)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow)
return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill")

@final
def interpolate(
Expand Down Expand Up @@ -1516,7 +1552,7 @@ def interpolate(
data = extract_array(new_values, extract_numpy=True)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow)
return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")

@final
def diff(self, n: int) -> list[Block]:
Expand Down Expand Up @@ -1805,7 +1841,7 @@ def where(
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)

elif isinstance(self, NDArrayBackedExtensionBlock):
Expand All @@ -1814,7 +1850,7 @@ def where(
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)

else:
Expand Down Expand Up @@ -2013,7 +2049,7 @@ def fillna(
)

nb = self.make_block_same_class(new_values, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")

@cache_readonly
def shape(self) -> Shape:
Expand Down
32 changes: 25 additions & 7 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):

def _check_set(df, cond, check_dtypes=True):
dfi = df.copy()
econd = cond.reindex_like(df).fillna(True)
econd = cond.reindex_like(df).fillna(True).infer_objects(copy=False)
expected = dfi.mask(~econd)

return_value = dfi.where(cond, np.nan, inplace=True)
Expand Down Expand Up @@ -356,7 +356,9 @@ def test_where_bug_transposition(self):
expected = a.copy()
expected[~do_not_replace] = b

result = a.where(do_not_replace, b)
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)

a = DataFrame({0: [4, 6], 1: [1, 0]})
Expand All @@ -366,7 +368,8 @@ def test_where_bug_transposition(self):
expected = a.copy()
expected[~do_not_replace] = b

result = a.where(do_not_replace, b)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)

def test_where_datetime(self):
Expand Down Expand Up @@ -718,7 +721,9 @@ def test_where_ea_other(self):
ser2 = Series(arr[:2], index=["A", "B"])
expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]})
expected["B"] = expected["B"].astype(object)
result = df.where(mask, ser2, axis=1)
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.where(mask, ser2, axis=1)
tm.assert_frame_equal(result, expected)

def test_where_interval_noop(self):
Expand All @@ -735,7 +740,10 @@ def test_where_interval_fullop_downcast(self, frame_or_series):
# GH#45768
obj = frame_or_series([pd.Interval(0, 0)] * 2)
other = frame_or_series([1.0, 2.0])
res = obj.where(~obj.notna(), other)

msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = obj.where(~obj.notna(), other)

# since all entries are being changed, we will downcast result
# from object to ints (not floats)
Expand Down Expand Up @@ -780,7 +788,9 @@ def test_where_datetimelike_noop(self, dtype):

# opposite case where we are replacing *all* values -> we downcast
# from object dtype # GH#45768
res5 = df.where(mask2, 4)
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
res5 = df.where(mask2, 4)
expected = DataFrame(4, index=df.index, columns=df.columns)
tm.assert_frame_equal(res5, expected)

Expand Down Expand Up @@ -984,10 +994,18 @@ def test_where_downcast_to_td64():

td = pd.Timedelta(days=1)

res = ser.where(mask, td)
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = ser.where(mask, td)
expected = Series([td, td, td], dtype="m8[ns]")
tm.assert_series_equal(res, expected)

with pd.option_context("future.no_silent_downcasting", True):
with tm.assert_produces_warning(None, match=msg):
res2 = ser.where(mask, td)
expected2 = expected.astype(object)
tm.assert_series_equal(res2, expected2)


def _check_where_equivalences(df, mask, other, expected):
# similar to tests.series.indexing.test_setitem.SetitemCastingEquivalences
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/frame/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,11 @@ def test_clip_with_na_args(self, float_frame):
# GH#19992 and adjusted in GH#40420
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})

result = df.clip(lower=[4, 5, np.nan], axis=0)
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
# TODO: avoid this warning here? seems like we should never be upcasting
# in the first place?
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.clip(lower=[4, 5, np.nan], axis=0)
expected = DataFrame(
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
)
Expand All @@ -167,7 +171,8 @@ def test_clip_with_na_args(self, float_frame):
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
df = DataFrame(data)
t = Series([2, -4, np.nan, 6, 3])
result = df.clip(lower=t, axis=0)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.clip(lower=t, axis=0)
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
tm.assert_frame_equal(result, expected)

Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,21 @@ def test_where_datetimelike_coerce(dtype):
expected = Series([10, 10])
mask = np.array([False, False])

rs = ser.where(mask, [10, 10])
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10, 10])
tm.assert_series_equal(rs, expected)

rs = ser.where(mask, 10)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10)
tm.assert_series_equal(rs, expected)

rs = ser.where(mask, 10.0)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10.0)
tm.assert_series_equal(rs, expected)

rs = ser.where(mask, [10.0, 10.0])
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10.0, 10.0])
tm.assert_series_equal(rs, expected)

rs = ser.where(mask, [10.0, np.nan])
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/series/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,15 @@ def test_clip_with_na_args(self):
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))

# GH#19992
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
# TODO: avoid this warning here? seems like we should never be upcasting
# in the first place?
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(lower=[0, 4, np.nan])
tm.assert_series_equal(res, Series([1, 4, 3]))
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(upper=[1, np.nan, 1])
tm.assert_series_equal(res, Series([1, 2, 1]))

# GH#40420
s = Series([1, 2, 3])
Expand Down

0 comments on commit 80a1a8b

Please sign in to comment.