Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoW: Deprecate copy keyword from first set of methods #57347

Merged
merged 8 commits into from
Feb 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,29 @@ Other API changes

Deprecations
~~~~~~~~~~~~

Copy keyword
^^^^^^^^^^^^

The ``copy`` keyword argument in the following methods is deprecated and
will be removed in a future version:

- :meth:`DataFrame.truncate` / :meth:`Series.truncate`
- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert`
- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize`
- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
- :meth:`DataFrame.align` / :meth:`Series.align`
- :meth:`DataFrame.astype` / :meth:`Series.astype`
- :meth:`DataFrame.reindex` / :meth:`Series.reindex`
- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like`

Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until
necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect
starting with 3.0, so it can be safely removed from your code.

Other Deprecations
^^^^^^^^^^^^^^^^^^

- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5085,7 +5085,7 @@ def reindex(
columns=None,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = np.nan,
limit: int | None = None,
Expand All @@ -5101,6 +5101,7 @@ def reindex(
fill_value=fill_value,
limit=limit,
tolerance=tolerance,
copy=copy,
)

@overload
Expand Down
79 changes: 62 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4251,12 +4251,24 @@ def _is_view(self) -> bool:
"""Return boolean indicating if self is view of another array"""
return self._mgr.is_view

@staticmethod
def _check_copy_deprecation(copy):
if copy is not lib.no_default:
warnings.warn(
"The copy keyword is deprecated and will be removed in a future "
"version. Copy-on-Write is active in pandas since 3.0 which utilizes "
"a lazy copy mechanism that defers copies until necessary. Use "
".copy() to make an eager copy if necessary.",
DeprecationWarning,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DeprecationWarning because we're going to start following DeprecationWarning -> FutureWarning?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, my intention was to switch to FutureWarning in 3.2, but I suspect that copy=False is used by a lot of downstream libraries

stacklevel=find_stack_level(),
)

@final
def reindex_like(
self,
other,
method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
limit: int | None = None,
tolerance=None,
) -> Self:
Expand Down Expand Up @@ -4284,7 +4296,7 @@ def reindex_like(
* backfill / bfill: use next valid observation to fill gap
* nearest: use nearest valid observations to fill gap.

copy : bool, default True
copy : bool, default False
Return a new object, even if the passed indexes are the same.

.. note::
Expand All @@ -4298,6 +4310,8 @@ def reindex_like(

You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0
limit : int, default None
Maximum number of consecutive labels to fill for inexact matches.
tolerance : optional
Expand Down Expand Up @@ -4366,6 +4380,7 @@ def reindex_like(
2014-02-14 NaN NaN NaN
2014-02-15 35.1 NaN medium
"""
self._check_copy_deprecation(copy)
d = other._construct_axes_dict(
axes=self._AXIS_ORDERS,
method=method,
Expand Down Expand Up @@ -5011,7 +5026,7 @@ def reindex(
columns=None,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = np.nan,
limit: int | None = None,
Expand All @@ -5038,7 +5053,7 @@ def reindex(
* backfill / bfill: Use next valid observation to fill gap.
* nearest: Use nearest valid observations to fill gap.

copy : bool, default True
copy : bool, default False
Return a new object, even if the passed indexes are the same.

.. note::
Expand All @@ -5052,6 +5067,8 @@ def reindex(

You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
Expand Down Expand Up @@ -5229,6 +5246,7 @@ def reindex(
"""
# TODO: Decide if we care about having different examples for different
# kinds
self._check_copy_deprecation(copy)

if index is not None and columns is not None and labels is not None:
raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.")
Expand Down Expand Up @@ -6135,7 +6153,10 @@ def dtypes(self):

@final
def astype(
self, dtype, copy: bool | None = None, errors: IgnoreRaise = "raise"
self,
dtype,
copy: bool | lib.NoDefault = lib.no_default,
errors: IgnoreRaise = "raise",
) -> Self:
"""
Cast a pandas object to a specified dtype ``dtype``.
Expand All @@ -6148,7 +6169,7 @@ def astype(
mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
a numpy.dtype or Python type to cast one or more of the DataFrame's
columns to column-specific types.
copy : bool, default True
copy : bool, default False
Return a copy when ``copy=True`` (be very careful setting
``copy=False`` as changes to values then may propagate to other
pandas objects).
Expand All @@ -6164,6 +6185,8 @@ def astype(

You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0
errors : {'raise', 'ignore'}, default 'raise'
Control raising of exceptions on invalid data for provided dtype.

Expand Down Expand Up @@ -6253,6 +6276,7 @@ def astype(
2 2020-01-03
dtype: datetime64[ns]
"""
self._check_copy_deprecation(copy)
if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
if len(dtype) > 1 or self.name not in dtype:
Expand Down Expand Up @@ -6480,7 +6504,7 @@ def __deepcopy__(self, memo=None) -> Self:
return self.copy(deep=True)

@final
def infer_objects(self, copy: bool | None = None) -> Self:
def infer_objects(self, copy: bool | lib.NoDefault = lib.no_default) -> Self:
"""
Attempt to infer better dtypes for object columns.

Expand All @@ -6491,7 +6515,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:

Parameters
----------
copy : bool, default True
copy : bool, default False
Whether to make a copy for non-object or non-inferable columns
or Series.

Expand All @@ -6507,6 +6531,8 @@ def infer_objects(self, copy: bool | None = None) -> Self:
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0

Returns
-------
same type as input object
Expand Down Expand Up @@ -6536,6 +6562,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:
A int64
dtype: object
"""
self._check_copy_deprecation(copy)
new_mgr = self._mgr.convert()
res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
return res.__finalize__(self, method="infer_objects")
Expand Down Expand Up @@ -9405,7 +9432,7 @@ def align(
join: AlignJoin = "outer",
axis: Axis | None = None,
level: Level | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
fill_value: Hashable | None = None,
) -> tuple[Self, NDFrameT]:
"""
Expand All @@ -9430,7 +9457,7 @@ def align(
level : int or level name, default None
Broadcast across a level, matching Index values on the
passed MultiIndex level.
copy : bool, default True
copy : bool, default False
Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.

Expand All @@ -9445,6 +9472,8 @@ def align(

You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0
fill_value : scalar, default np.nan
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value.
Expand Down Expand Up @@ -9519,6 +9548,8 @@ def align(
3 60.0 70.0 80.0 90.0 NaN
4 600.0 700.0 800.0 900.0 NaN
"""
self._check_copy_deprecation(copy)

_right: DataFrame | Series
if axis is not None:
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -10342,7 +10373,7 @@ def truncate(
before=None,
after=None,
axis: Axis | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
) -> Self:
"""
Truncate a Series or DataFrame before and after some index value.
Expand All @@ -10359,7 +10390,7 @@ def truncate(
axis : {0 or 'index', 1 or 'columns'}, optional
Axis to truncate. Truncates the index (rows) by default.
For `Series` this parameter is unused and defaults to 0.
copy : bool, default is True,
copy : bool, default is False,
Return a copy of the truncated section.

.. note::
Expand All @@ -10374,6 +10405,8 @@ def truncate(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0

Returns
-------
type of caller
Expand Down Expand Up @@ -10479,6 +10512,8 @@ def truncate(
2016-01-10 23:59:58 1
2016-01-10 23:59:59 1
"""
self._check_copy_deprecation(copy)

if axis is None:
axis = 0
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -10517,7 +10552,11 @@ def truncate(
@final
@doc(klass=_shared_doc_kwargs["klass"])
def tz_convert(
self, tz, axis: Axis = 0, level=None, copy: bool | None = None
self,
tz,
axis: Axis = 0,
level=None,
copy: bool | lib.NoDefault = lib.no_default,
) -> Self:
"""
Convert tz-aware axis to target time zone.
Expand All @@ -10532,7 +10571,7 @@ def tz_convert(
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise
must be None.
copy : bool, default True
copy : bool, default False
Also make a copy of the underlying data.

.. note::
Expand All @@ -10547,6 +10586,8 @@ def tz_convert(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0

Returns
-------
{klass}
Expand Down Expand Up @@ -10576,6 +10617,7 @@ def tz_convert(
2018-09-14 23:30:00 1
dtype: int64
"""
self._check_copy_deprecation(copy)
axis = self._get_axis_number(axis)
ax = self._get_axis(axis)

Expand Down Expand Up @@ -10613,7 +10655,7 @@ def tz_localize(
tz,
axis: Axis = 0,
level=None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
) -> Self:
Expand All @@ -10633,7 +10675,7 @@ def tz_localize(
level : int, str, default None
If axis ia a MultiIndex, localize a specific level. Otherwise
must be None.
copy : bool, default True
copy : bool, default False
Also make a copy of the underlying data.

.. note::
Expand All @@ -10647,6 +10689,8 @@ def tz_localize(

You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``

.. deprecated:: 3.0.0
ambiguous : 'infer', bool, bool-ndarray, 'NaT', default 'raise'
When clocks moved backward due to DST, ambiguous times may arise.
For example in Central European Time (UTC+01), when going from
Expand Down Expand Up @@ -10772,6 +10816,7 @@ def tz_localize(
2015-03-29 03:30:00+02:00 1
dtype: int64
"""
self._check_copy_deprecation(copy)
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
if nonexistent not in nonexistent_options and not isinstance(
nonexistent, dt.timedelta
Expand Down Expand Up @@ -11726,7 +11771,7 @@ def _inplace_method(self, other, op) -> Self:

# this makes sure that we are aligned like the input
# we are updating inplace
self._update_inplace(result.reindex_like(self, copy=False))
self._update_inplace(result.reindex_like(self))
return self

@final
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def from_dummies(

# index data with a list of all columns that are dummies
try:
data_to_decode = data.astype("boolean", copy=False)
data_to_decode = data.astype("boolean")
except TypeError as err:
raise TypeError("Passed DataFrame contains non-dummy data") from err

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4822,7 +4822,7 @@ def reindex( # type: ignore[override]
*,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = None,
limit: int | None = None,
Expand All @@ -4835,6 +4835,7 @@ def reindex( # type: ignore[override]
fill_value=fill_value,
limit=limit,
tolerance=tolerance,
copy=copy,
)

@overload # type: ignore[override]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ def coerce(values):

# prevent overflow in case of int8 or int16
if is_integer_dtype(values.dtype):
values = values.astype("int64", copy=False)
values = values.astype("int64")
return values

values = (
Expand Down
Loading
Loading