Skip to content

Commit

Permalink
CoW: Deprecate copy keyword from first set of methods (pandas-dev#57347)
Browse files Browse the repository at this point in the history
* CoW: Remove a few copy=False statements

* Cow: Deprecate copy keyword from first set of methods

* Fixup

* Update

* Update

* Update
  • Loading branch information
phofl authored and pmhatre1 committed May 7, 2024
1 parent 0412e31 commit dc12f12
Show file tree
Hide file tree
Showing 23 changed files with 176 additions and 88 deletions.
23 changes: 23 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,29 @@ Other API changes

Deprecations
~~~~~~~~~~~~

Copy keyword
^^^^^^^^^^^^

The ``copy`` keyword argument in the following methods is deprecated and
will be removed in a future version:

- :meth:`DataFrame.truncate` / :meth:`Series.truncate`
- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert`
- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize`
- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
- :meth:`DataFrame.align` / :meth:`Series.align`
- :meth:`DataFrame.astype` / :meth:`Series.astype`
- :meth:`DataFrame.reindex` / :meth:`Series.reindex`
- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like`

Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until
necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect
starting with 3.0, so it can be safely removed from your code.

Other Deprecations
^^^^^^^^^^^^^^^^^^

- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5070,7 +5070,7 @@ def reindex(
columns=None,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = np.nan,
limit: int | None = None,
Expand All @@ -5086,6 +5086,7 @@ def reindex(
fill_value=fill_value,
limit=limit,
tolerance=tolerance,
copy=copy,
)

@overload
Expand Down
79 changes: 62 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4251,12 +4251,24 @@ def _is_view(self) -> bool:
"""Return boolean indicating if self is view of another array"""
return self._mgr.is_view

@staticmethod
def _check_copy_deprecation(copy):
if copy is not lib.no_default:
warnings.warn(
"The copy keyword is deprecated and will be removed in a future "
"version. Copy-on-Write is active in pandas since 3.0 which utilizes "
"a lazy copy mechanism that defers copies until necessary. Use "
".copy() to make an eager copy if necessary.",
DeprecationWarning,
stacklevel=find_stack_level(),
)

@final
def reindex_like(
self,
other,
method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
limit: int | None = None,
tolerance=None,
) -> Self:
Expand Down Expand Up @@ -4284,7 +4296,7 @@ def reindex_like(
* backfill / bfill: use next valid observation to fill gap
* nearest: use nearest valid observations to fill gap.
copy : bool, default True
copy : bool, default False
Return a new object, even if the passed indexes are the same.
.. note::
Expand All @@ -4298,6 +4310,8 @@ def reindex_like(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
limit : int, default None
Maximum number of consecutive labels to fill for inexact matches.
tolerance : optional
Expand Down Expand Up @@ -4366,6 +4380,7 @@ def reindex_like(
2014-02-14 NaN NaN NaN
2014-02-15 35.1 NaN medium
"""
self._check_copy_deprecation(copy)
d = other._construct_axes_dict(
axes=self._AXIS_ORDERS,
method=method,
Expand Down Expand Up @@ -5011,7 +5026,7 @@ def reindex(
columns=None,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = np.nan,
limit: int | None = None,
Expand All @@ -5038,7 +5053,7 @@ def reindex(
* backfill / bfill: Use next valid observation to fill gap.
* nearest: Use nearest valid observations to fill gap.
copy : bool, default True
copy : bool, default False
Return a new object, even if the passed indexes are the same.
.. note::
Expand All @@ -5052,6 +5067,8 @@ def reindex(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
Expand Down Expand Up @@ -5229,6 +5246,7 @@ def reindex(
"""
# TODO: Decide if we care about having different examples for different
# kinds
self._check_copy_deprecation(copy)

if index is not None and columns is not None and labels is not None:
raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.")
Expand Down Expand Up @@ -6136,7 +6154,10 @@ def dtypes(self):

@final
def astype(
self, dtype, copy: bool | None = None, errors: IgnoreRaise = "raise"
self,
dtype,
copy: bool | lib.NoDefault = lib.no_default,
errors: IgnoreRaise = "raise",
) -> Self:
"""
Cast a pandas object to a specified dtype ``dtype``.
Expand All @@ -6149,7 +6170,7 @@ def astype(
mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
a numpy.dtype or Python type to cast one or more of the DataFrame's
columns to column-specific types.
copy : bool, default True
copy : bool, default False
Return a copy when ``copy=True`` (be very careful setting
``copy=False`` as changes to values then may propagate to other
pandas objects).
Expand All @@ -6165,6 +6186,8 @@ def astype(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
errors : {'raise', 'ignore'}, default 'raise'
Control raising of exceptions on invalid data for provided dtype.
Expand Down Expand Up @@ -6254,6 +6277,7 @@ def astype(
2 2020-01-03
dtype: datetime64[ns]
"""
self._check_copy_deprecation(copy)
if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
if len(dtype) > 1 or self.name not in dtype:
Expand Down Expand Up @@ -6481,7 +6505,7 @@ def __deepcopy__(self, memo=None) -> Self:
return self.copy(deep=True)

@final
def infer_objects(self, copy: bool | None = None) -> Self:
def infer_objects(self, copy: bool | lib.NoDefault = lib.no_default) -> Self:
"""
Attempt to infer better dtypes for object columns.
Expand All @@ -6492,7 +6516,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:
Parameters
----------
copy : bool, default True
copy : bool, default False
Whether to make a copy for non-object or non-inferable columns
or Series.
Expand All @@ -6508,6 +6532,8 @@ def infer_objects(self, copy: bool | None = None) -> Self:
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
Returns
-------
same type as input object
Expand Down Expand Up @@ -6537,6 +6563,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:
A int64
dtype: object
"""
self._check_copy_deprecation(copy)
new_mgr = self._mgr.convert()
res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
return res.__finalize__(self, method="infer_objects")
Expand Down Expand Up @@ -9404,7 +9431,7 @@ def align(
join: AlignJoin = "outer",
axis: Axis | None = None,
level: Level | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
fill_value: Hashable | None = None,
) -> tuple[Self, NDFrameT]:
"""
Expand All @@ -9429,7 +9456,7 @@ def align(
level : int or level name, default None
Broadcast across a level, matching Index values on the
passed MultiIndex level.
copy : bool, default True
copy : bool, default False
Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.
Expand All @@ -9444,6 +9471,8 @@ def align(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
fill_value : scalar, default np.nan
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value.
Expand Down Expand Up @@ -9518,6 +9547,8 @@ def align(
3 60.0 70.0 80.0 90.0 NaN
4 600.0 700.0 800.0 900.0 NaN
"""
self._check_copy_deprecation(copy)

_right: DataFrame | Series
if axis is not None:
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -10336,7 +10367,7 @@ def truncate(
before=None,
after=None,
axis: Axis | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
) -> Self:
"""
Truncate a Series or DataFrame before and after some index value.
Expand All @@ -10353,7 +10384,7 @@ def truncate(
axis : {0 or 'index', 1 or 'columns'}, optional
Axis to truncate. Truncates the index (rows) by default.
For `Series` this parameter is unused and defaults to 0.
copy : bool, default is True,
copy : bool, default is False,
Return a copy of the truncated section.
.. note::
Expand All @@ -10368,6 +10399,8 @@ def truncate(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
Returns
-------
type of caller
Expand Down Expand Up @@ -10473,6 +10506,8 @@ def truncate(
2016-01-10 23:59:58 1
2016-01-10 23:59:59 1
"""
self._check_copy_deprecation(copy)

if axis is None:
axis = 0
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -10511,7 +10546,11 @@ def truncate(
@final
@doc(klass=_shared_doc_kwargs["klass"])
def tz_convert(
self, tz, axis: Axis = 0, level=None, copy: bool | None = None
self,
tz,
axis: Axis = 0,
level=None,
copy: bool | lib.NoDefault = lib.no_default,
) -> Self:
"""
Convert tz-aware axis to target time zone.
Expand All @@ -10526,7 +10565,7 @@ def tz_convert(
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise
must be None.
copy : bool, default True
copy : bool, default False
Also make a copy of the underlying data.
.. note::
Expand All @@ -10541,6 +10580,8 @@ def tz_convert(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
Returns
-------
{klass}
Expand Down Expand Up @@ -10570,6 +10611,7 @@ def tz_convert(
2018-09-14 23:30:00 1
dtype: int64
"""
self._check_copy_deprecation(copy)
axis = self._get_axis_number(axis)
ax = self._get_axis(axis)

Expand Down Expand Up @@ -10607,7 +10649,7 @@ def tz_localize(
tz,
axis: Axis = 0,
level=None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
) -> Self:
Expand All @@ -10627,7 +10669,7 @@ def tz_localize(
level : int, str, default None
If axis ia a MultiIndex, localize a specific level. Otherwise
must be None.
copy : bool, default True
copy : bool, default False
Also make a copy of the underlying data.
.. note::
Expand All @@ -10641,6 +10683,8 @@ def tz_localize(
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
.. deprecated:: 3.0.0
ambiguous : 'infer', bool, bool-ndarray, 'NaT', default 'raise'
When clocks moved backward due to DST, ambiguous times may arise.
For example in Central European Time (UTC+01), when going from
Expand Down Expand Up @@ -10766,6 +10810,7 @@ def tz_localize(
2015-03-29 03:30:00+02:00 1
dtype: int64
"""
self._check_copy_deprecation(copy)
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
if nonexistent not in nonexistent_options and not isinstance(
nonexistent, dt.timedelta
Expand Down Expand Up @@ -11720,7 +11765,7 @@ def _inplace_method(self, other, op) -> Self:

# this makes sure that we are aligned like the input
# we are updating inplace
self._update_inplace(result.reindex_like(self, copy=False))
self._update_inplace(result.reindex_like(self))
return self

@final
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def from_dummies(

# index data with a list of all columns that are dummies
try:
data_to_decode = data.astype("boolean", copy=False)
data_to_decode = data.astype("boolean")
except TypeError as err:
raise TypeError("Passed DataFrame contains non-dummy data") from err

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4844,7 +4844,7 @@ def reindex( # type: ignore[override]
*,
axis: Axis | None = None,
method: ReindexMethod | None = None,
copy: bool | None = None,
copy: bool | lib.NoDefault = lib.no_default,
level: Level | None = None,
fill_value: Scalar | None = None,
limit: int | None = None,
Expand All @@ -4857,6 +4857,7 @@ def reindex( # type: ignore[override]
fill_value=fill_value,
limit=limit,
tolerance=tolerance,
copy=copy,
)

@overload # type: ignore[override]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ def coerce(values):

# prevent overflow in case of int8 or int16
if is_integer_dtype(values.dtype):
values = values.astype("int64", copy=False)
values = values.astype("int64")
return values

values = (
Expand Down
Loading

0 comments on commit dc12f12

Please sign in to comment.