Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: fillna downcasting from object dtype #54261

Merged
merged 14 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ contains NAs, an exception will be generated:
However, these can be filled in using :meth:`~DataFrame.fillna` and it will work fine:

.. ipython:: python
:okwarning:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be changed to the future usage?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't it be weird to have documented behavior that you cant actually get yet?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or rather, can this be changed to show the work-around? (Just trying to get ahead when we will have to change this example eventually)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough, I'll take a look in a bit. (Pretty deep in the weeds in the extension tests ATM)


reindexed[crit.fillna(False)]
reindexed[crit.fillna(True)]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ Other Deprecations
- Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`)
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases (:issue:`54261`)
-
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

.. ---------------------------------------------------------------------------
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10174,7 +10174,14 @@ def _where(

# make sure we are boolean
fill_value = bool(inplace)
cond = cond.fillna(fill_value)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
cond = cond.fillna(fill_value)
cond = cond.infer_objects(copy=False)

msg = "Boolean array expected for the condition, not {dtype}"

Expand Down
34 changes: 28 additions & 6 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,11 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:

@final
def _maybe_downcast(
self, blocks: list[Block], downcast=None, using_cow: bool = False
self,
blocks: list[Block],
downcast=None,
using_cow: bool = False,
caller: str | None = None,
) -> list[Block]:
if downcast is False:
return blocks
Expand All @@ -483,9 +487,23 @@ def _maybe_downcast(
# but ATM it breaks too much existing code.
# split and convert the blocks

return extend_blocks(
casted = extend_blocks(
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
)
if caller == "fillna":
if len(casted) != len(blocks) or not all(
x.dtype == y.dtype for x, y in zip(casted, blocks)
):
# GH#54261
warnings.warn(
"Downcasting object dtype arrays on .fillna, .ffill, .bfill "
"is deprecated and will change in a future version. "
"Call result.infer_objects(copy=False) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

return casted

if downcast is None:
return blocks
Expand Down Expand Up @@ -1367,7 +1385,9 @@ def fillna(
else:
# GH#45423 consistent downcasting on no-ops.
nb = self.copy(deep=not using_cow)
nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow)
nbs = nb._maybe_downcast(
[nb], downcast=downcast, using_cow=using_cow, caller="fillna"
)
return nbs

if limit is not None:
Expand All @@ -1385,7 +1405,9 @@ def fillna(
# different behavior in _maybe_downcast.
return extend_blocks(
[
blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)
blk._maybe_downcast(
[blk], downcast=downcast, using_cow=using_cow, caller="fillna"
)
for blk in nbs
]
)
Expand Down Expand Up @@ -1426,7 +1448,7 @@ def pad_or_backfill(
data = extract_array(new_values, extract_numpy=True)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow)
return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")

@final
def interpolate(
Expand Down Expand Up @@ -2000,7 +2022,7 @@ def fillna(
)

nb = self.make_block_same_class(new_values, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")

@cache_readonly
def shape(self) -> Shape:
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
TYPE_CHECKING,
Any,
)
import warnings

from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc
Expand Down Expand Up @@ -202,7 +203,13 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]:
df = df.reset_index()

if self.na_rep is not None:
df = df.fillna(self.na_rep)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
df = df.fillna(self.na_rep)

return df.to_dict(orient="index")

Expand Down
11 changes: 10 additions & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,16 @@ def _try_convert_data(
if not self.dtype:
if all(notna(data)):
return data, False
return data.fillna(np.nan), True

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
filled = data.fillna(np.nan)

return filled, True

elif self.dtype is True:
pass
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2912,7 +2912,14 @@ def _prepare_data(self) -> np.recarray:
for i, col in enumerate(data):
typ = typlist[i]
if typ <= self._max_string_length:
data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,))
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
dc = data[col].fillna("")
data[col] = dc.apply(_pad_bytes, args=(typ,))
stype = f"S{typ}"
dtypes[col] = stype
data[col] = data[col].astype(stype)
Expand Down
8 changes: 7 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1538,7 +1538,13 @@ def _kind(self) -> Literal["area"]:

def __init__(self, data, **kwargs) -> None:
kwargs.setdefault("stacked", True)
data = data.fillna(value=0)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
data = data.fillna(value=0)
LinePlot.__init__(self, data, **kwargs)

if not self.stacked:
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
be added to the array-specific tests in `pandas/tests/arrays/`.

"""
import warnings

import numpy as np
import pytest

Expand Down Expand Up @@ -136,7 +138,14 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
expected = self._combine(s, other, op)

if op_name in ("__rtruediv__", "__truediv__", "__div__"):
expected = expected.fillna(np.nan).astype("Float64")
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Downcasting object dtype arrays",
category=FutureWarning,
)
filled = expected.fillna(np.nan)
expected = filled.astype("Float64")
else:
# combine method result in 'biggest' (int64) dtype
expected = expected.astype(sdtype)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_where_upcasting(self):

tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_where_alignment(self, where_frame, float_string_frame):
# aligning
def _check_align(df, cond, other, check_dtypes=True):
Expand Down Expand Up @@ -166,6 +167,7 @@ def test_where_invalid(self):
with pytest.raises(ValueError, match=msg):
df.mask(0)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
# where inplace

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,9 @@ def test_fillna_dtype_conversion(self):
expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)

result = df.fillna(1)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.fillna(1)
expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -817,7 +819,8 @@ def test_fillna_nones_inplace():
[[None, None], [None, None]],
columns=["A", "B"],
)
with tm.assert_produces_warning(False):
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.fillna(value={"A": 1, "B": 2}, inplace=True)

expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,9 @@ def test_operators_none_as_na(self, op):

# since filling converts dtypes from object, changed expected to be
# object
filled = df.fillna(np.nan)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
filled = df.fillna(np.nan)
result = op(df, 3)
expected = op(filled, 3).astype(object)
expected[pd.isna(expected)] = np.nan
Expand All @@ -1253,10 +1255,14 @@ def test_operators_none_as_na(self, op):
expected[pd.isna(expected)] = np.nan
tm.assert_frame_equal(result, expected)

result = op(df, df.fillna(7))
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = op(df, df.fillna(7))
tm.assert_frame_equal(result, expected)

result = op(df.fillna(7), df)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = op(df.fillna(7), df)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _check_unary_op(op):

_check_unary_op(operator.inv) # TODO: belongs elsewhere

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
def test_logical_with_nas(self):
d = DataFrame({"a": [np.nan, False], "b": [True, True]})

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,7 @@ def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame):
def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na):
getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("opname", ["any", "all"])
def test_any_all_bool_frame(self, opname, bool_frame_with_na):
# GH#12863: numpy gives back non-boolean data for object type
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ def test_stack_preserve_categorical_dtype_values(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize(
"index, columns",
[
Expand All @@ -1136,6 +1137,7 @@ def test_stack_preserve_categorical_dtype_values(self):
)
def test_stack_multi_columns_non_unique_index(self, index, columns):
# GH-28301

df = DataFrame(index=index, columns=columns).fillna(1)
stacked = df.stack()
new_index = MultiIndex.from_tuples(stacked.index.to_numpy())
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
method(*args, **kwargs)


@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("dtype", [bool, int, float, object])
def test_deprecate_numeric_only_series(dtype, groupby_func, request):
# GH#46560
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,19 @@ def test_reindex_inference():
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
result = s.reindex(list(new_index)).ffill()
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
tm.assert_series_equal(result, expected)


def test_reindex_downcasting():
# GH4618 shifted series downcasting
s = Series(False, index=range(0, 5))
result = s.shift(1).bfill()
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.shift(1).bfill()
expected = Series(False, index=range(0, 5))
tm.assert_series_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def test_series_datetimelike_attribute_access_invalid(self):
with pytest.raises(AttributeError, match=msg):
ser.weekday

@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize(
"kernel, has_numeric_only",
[
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/series/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,12 @@ def test_comparison_operators_with_nas(self, comparison_op):
result = comparison_op(ser, val)
expected = comparison_op(ser.dropna(), val).reindex(ser.index)

if comparison_op is operator.ne:
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
if comparison_op is operator.ne:
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)

tm.assert_series_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


class TestSeriesLogicalOps:
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
def test_bool_operators_with_nas(self, bool_op):
# boolean &, |, ^ should work with object arrays and propagate NAs
Expand Down