From dc12f12e2a0ceaedb276c01eef507d8f1c658833 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 24 Feb 2024 19:31:58 +0100 Subject: [PATCH] CoW: Deprecate copy keyword from first set of methods (#57347) * CoW: Remove a few copy=False statements * Cow: Deprecate copy keyword from first set of methods * Fixup * Update * Update * Update --- doc/source/whatsnew/v3.0.0.rst | 23 ++++++ pandas/core/frame.py | 3 +- pandas/core/generic.py | 79 +++++++++++++++---- pandas/core/reshape/encoding.py | 2 +- pandas/core/series.py | 3 +- pandas/core/tools/datetimes.py | 2 +- pandas/io/pytables.py | 2 +- pandas/io/sql.py | 4 +- pandas/tests/arithmetic/test_timedelta64.py | 4 +- pandas/tests/copy_view/test_astype.py | 2 +- .../tests/copy_view/test_copy_deprecation.py | 53 +++++++++++++ pandas/tests/copy_view/test_methods.py | 6 +- pandas/tests/frame/indexing/test_where.py | 2 +- pandas/tests/frame/methods/test_align.py | 2 +- pandas/tests/frame/methods/test_astype.py | 10 +-- pandas/tests/frame/methods/test_describe.py | 4 +- pandas/tests/frame/methods/test_reindex.py | 35 +------- pandas/tests/frame/methods/test_tz_convert.py | 5 +- .../tests/frame/methods/test_tz_localize.py | 5 +- pandas/tests/generic/test_finalize.py | 4 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/series/methods/test_align.py | 8 +- .../series/methods/test_infer_objects.py | 4 +- 23 files changed, 176 insertions(+), 88 deletions(-) create mode 100644 pandas/tests/copy_view/test_copy_deprecation.py diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 56babe234cc79..f11dbfa5a95ad 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,6 +102,29 @@ Other API changes Deprecations ~~~~~~~~~~~~ + +Copy keyword +^^^^^^^^^^^^ + +The ``copy`` keyword argument in the following methods is deprecated and +will be removed in a future version: + +- :meth:`DataFrame.truncate` / :meth:`Series.truncate` +- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert` +- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize` +- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects` +- :meth:`DataFrame.align` / :meth:`Series.align` +- :meth:`DataFrame.astype` / :meth:`Series.astype` +- :meth:`DataFrame.reindex` / :meth:`Series.reindex` +- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` + +Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until +necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect +starting with 3.0, so it can be safely removed from your code. + +Other Deprecations +^^^^^^^^^^^^^^^^^^ + - Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) - Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index abb043361483c..df413fda0255a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5070,7 +5070,7 @@ def reindex( columns=None, axis: Axis | None = None, method: ReindexMethod | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, level: Level | None = None, fill_value: Scalar | None = np.nan, limit: int | None = None, @@ -5086,6 +5086,7 @@ def reindex( fill_value=fill_value, limit=limit, tolerance=tolerance, + copy=copy, ) @overload diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0880e6730fd93..9424e421fd85f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4251,12 +4251,24 @@ def _is_view(self) -> bool: """Return boolean indicating if self is view of another array""" return self._mgr.is_view + @staticmethod + def _check_copy_deprecation(copy): + if copy is not lib.no_default: + warnings.warn( + "The copy keyword is deprecated and will be removed in a future " + "version. Copy-on-Write is active in pandas since 3.0 which utilizes " + "a lazy copy mechanism that defers copies until necessary. Use " + ".copy() to make an eager copy if necessary.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + @final def reindex_like( self, other, method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, limit: int | None = None, tolerance=None, ) -> Self: @@ -4284,7 +4296,7 @@ def reindex_like( * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap. - copy : bool, default True + copy : bool, default False Return a new object, even if the passed indexes are the same. .. note:: @@ -4298,6 +4310,8 @@ def reindex_like( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 limit : int, default None Maximum number of consecutive labels to fill for inexact matches. tolerance : optional @@ -4366,6 +4380,7 @@ def reindex_like( 2014-02-14 NaN NaN NaN 2014-02-15 35.1 NaN medium """ + self._check_copy_deprecation(copy) d = other._construct_axes_dict( axes=self._AXIS_ORDERS, method=method, @@ -5011,7 +5026,7 @@ def reindex( columns=None, axis: Axis | None = None, method: ReindexMethod | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, level: Level | None = None, fill_value: Scalar | None = np.nan, limit: int | None = None, @@ -5038,7 +5053,7 @@ def reindex( * backfill / bfill: Use next valid observation to fill gap. * nearest: Use nearest valid observations to fill gap. - copy : bool, default True + copy : bool, default False Return a new object, even if the passed indexes are the same. .. note:: @@ -5052,6 +5067,8 @@ def reindex( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level. @@ -5229,6 +5246,7 @@ def reindex( """ # TODO: Decide if we care about having different examples for different # kinds + self._check_copy_deprecation(copy) if index is not None and columns is not None and labels is not None: raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") @@ -6136,7 +6154,10 @@ def dtypes(self): @final def astype( - self, dtype, copy: bool | None = None, errors: IgnoreRaise = "raise" + self, + dtype, + copy: bool | lib.NoDefault = lib.no_default, + errors: IgnoreRaise = "raise", ) -> Self: """ Cast a pandas object to a specified dtype ``dtype``. @@ -6149,7 +6170,7 @@ def astype( mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. - copy : bool, default True + copy : bool, default False Return a copy when ``copy=True`` (be very careful setting ``copy=False`` as changes to values then may propagate to other pandas objects). @@ -6165,6 +6186,8 @@ def astype( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 errors : {'raise', 'ignore'}, default 'raise' Control raising of exceptions on invalid data for provided dtype. @@ -6254,6 +6277,7 @@ def astype( 2 2020-01-03 dtype: datetime64[ns] """ + self._check_copy_deprecation(copy) if is_dict_like(dtype): if self.ndim == 1: # i.e. Series if len(dtype) > 1 or self.name not in dtype: @@ -6481,7 +6505,7 @@ def __deepcopy__(self, memo=None) -> Self: return self.copy(deep=True) @final - def infer_objects(self, copy: bool | None = None) -> Self: + def infer_objects(self, copy: bool | lib.NoDefault = lib.no_default) -> Self: """ Attempt to infer better dtypes for object columns. @@ -6492,7 +6516,7 @@ def infer_objects(self, copy: bool | None = None) -> Self: Parameters ---------- - copy : bool, default True + copy : bool, default False Whether to make a copy for non-object or non-inferable columns or Series. @@ -6508,6 +6532,8 @@ def infer_objects(self, copy: bool | None = None) -> Self: You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- same type as input object @@ -6537,6 +6563,7 @@ def infer_objects(self, copy: bool | None = None) -> Self: A int64 dtype: object """ + self._check_copy_deprecation(copy) new_mgr = self._mgr.convert() res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) return res.__finalize__(self, method="infer_objects") @@ -9404,7 +9431,7 @@ def align( join: AlignJoin = "outer", axis: Axis | None = None, level: Level | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, fill_value: Hashable | None = None, ) -> tuple[Self, NDFrameT]: """ @@ -9429,7 +9456,7 @@ def align( level : int or level name, default None Broadcast across a level, matching Index values on the passed MultiIndex level. - copy : bool, default True + copy : bool, default False Always returns new objects. If copy=False and no reindexing is required then original objects are returned. @@ -9444,6 +9471,8 @@ def align( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 fill_value : scalar, default np.nan Value to use for missing values. Defaults to NaN, but can be any "compatible" value. @@ -9518,6 +9547,8 @@ def align( 3 60.0 70.0 80.0 90.0 NaN 4 600.0 700.0 800.0 900.0 NaN """ + self._check_copy_deprecation(copy) + _right: DataFrame | Series if axis is not None: axis = self._get_axis_number(axis) @@ -10336,7 +10367,7 @@ def truncate( before=None, after=None, axis: Axis | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> Self: """ Truncate a Series or DataFrame before and after some index value. @@ -10353,7 +10384,7 @@ def truncate( axis : {0 or 'index', 1 or 'columns'}, optional Axis to truncate. Truncates the index (rows) by default. For `Series` this parameter is unused and defaults to 0. - copy : bool, default is True, + copy : bool, default is False, Return a copy of the truncated section. .. note:: @@ -10368,6 +10399,8 @@ def truncate( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- type of caller @@ -10473,6 +10506,8 @@ def truncate( 2016-01-10 23:59:58 1 2016-01-10 23:59:59 1 """ + self._check_copy_deprecation(copy) + if axis is None: axis = 0 axis = self._get_axis_number(axis) @@ -10511,7 +10546,11 @@ def truncate( @final @doc(klass=_shared_doc_kwargs["klass"]) def tz_convert( - self, tz, axis: Axis = 0, level=None, copy: bool | None = None + self, + tz, + axis: Axis = 0, + level=None, + copy: bool | lib.NoDefault = lib.no_default, ) -> Self: """ Convert tz-aware axis to target time zone. @@ -10526,7 +10565,7 @@ def tz_convert( level : int, str, default None If axis is a MultiIndex, convert a specific level. Otherwise must be None. - copy : bool, default True + copy : bool, default False Also make a copy of the underlying data. .. note:: @@ -10541,6 +10580,8 @@ def tz_convert( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- {klass} @@ -10570,6 +10611,7 @@ def tz_convert( 2018-09-14 23:30:00 1 dtype: int64 """ + self._check_copy_deprecation(copy) axis = self._get_axis_number(axis) ax = self._get_axis(axis) @@ -10607,7 +10649,7 @@ def tz_localize( tz, axis: Axis = 0, level=None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: @@ -10627,7 +10669,7 @@ def tz_localize( level : int, str, default None If axis ia a MultiIndex, localize a specific level. Otherwise must be None. - copy : bool, default True + copy : bool, default False Also make a copy of the underlying data. .. note:: @@ -10641,6 +10683,8 @@ def tz_localize( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 ambiguous : 'infer', bool, bool-ndarray, 'NaT', default 'raise' When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from @@ -10766,6 +10810,7 @@ def tz_localize( 2015-03-29 03:30:00+02:00 1 dtype: int64 """ + self._check_copy_deprecation(copy) nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( nonexistent, dt.timedelta @@ -11720,7 +11765,7 @@ def _inplace_method(self, other, op) -> Self: # this makes sure that we are aligned like the input # we are updating inplace - self._update_inplace(result.reindex_like(self, copy=False)) + self._update_inplace(result.reindex_like(self)) return self @final diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index d6473f695a667..9d88e61951e99 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -499,7 +499,7 @@ def from_dummies( # index data with a list of all columns that are dummies try: - data_to_decode = data.astype("boolean", copy=False) + data_to_decode = data.astype("boolean") except TypeError as err: raise TypeError("Passed DataFrame contains non-dummy data") from err diff --git a/pandas/core/series.py b/pandas/core/series.py index 5956fa59528a7..5c9bc428e256f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4844,7 +4844,7 @@ def reindex( # type: ignore[override] *, axis: Axis | None = None, method: ReindexMethod | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, level: Level | None = None, fill_value: Scalar | None = None, limit: int | None = None, @@ -4857,6 +4857,7 @@ def reindex( # type: ignore[override] fill_value=fill_value, limit=limit, tolerance=tolerance, + copy=copy, ) @overload # type: ignore[override] diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c785f0c3a6985..e64b175cea218 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1148,7 +1148,7 @@ def coerce(values): # prevent overflow in case of int8 or int16 if is_integer_dtype(values.dtype): - values = values.astype("int64", copy=False) + values = values.astype("int64") return values values = ( diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1735cd3528276..c7ee1cac2e14a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3281,7 +3281,7 @@ def read( if len(dfs) > 0: out = concat(dfs, axis=1).copy() - return out.reindex(columns=items, copy=False) + return out.reindex(columns=items) return DataFrame(columns=axes[0], index=axes[1]) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9024961688c6b..5a16b1c8cbc24 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1311,12 +1311,12 @@ def _harmonize_columns( self.frame[col_name] = _handle_date_column(df_col, utc=utc) elif dtype_backend == "numpy" and col_type is float: # floats support NA, can always convert! - self.frame[col_name] = df_col.astype(col_type, copy=False) + self.frame[col_name] = df_col.astype(col_type) elif dtype_backend == "numpy" and len(df_col) == df_col.count(): # No NA values, can convert ints and bools if col_type is np.dtype("int64") or col_type is bool: - self.frame[col_name] = df_col.astype(col_type, copy=False) + self.frame[col_name] = df_col.astype(col_type) except KeyError: pass # this column not in results diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index d1820c3486b3a..0ecb8f9bef468 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1300,8 +1300,8 @@ def test_td64arr_add_sub_offset_index( ) tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box).astype(object, copy=False) - expected_sub = tm.box_expected(expected_sub, box).astype(object, copy=False) + expected = tm.box_expected(expected, box).astype(object) + expected_sub = tm.box_expected(expected_sub, box).astype(object) with tm.assert_produces_warning(performance_warning): res = tdi + other diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 5a9b3463cf63f..2d959bb16e7d5 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -126,7 +126,7 @@ def test_astype_string_read_only_on_pickle_roundrip(): base = Series(np.array([(1, 2), None, 1], dtype="object")) base_copy = pickle.loads(pickle.dumps(base)) base_copy._values.flags.writeable = False - base_copy.astype("string[pyarrow]", copy=False) + base_copy.astype("string[pyarrow]") tm.assert_series_equal(base, base_copy) diff --git a/pandas/tests/copy_view/test_copy_deprecation.py b/pandas/tests/copy_view/test_copy_deprecation.py new file mode 100644 index 0000000000000..ca57c02112131 --- /dev/null +++ b/pandas/tests/copy_view/test_copy_deprecation.py @@ -0,0 +1,53 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "meth, kwargs", + [ + ("truncate", {}), + ("tz_convert", {"tz": "UTC"}), + ("tz_localize", {"tz": "UTC"}), + ("infer_objects", {}), + ("astype", {"dtype": "float64"}), + ("reindex", {"index": [2, 0, 1]}), + ], +) +def test_copy_deprecation(meth, kwargs): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + if meth in ("tz_convert", "tz_localize"): + tz = None if meth == "tz_localize" else "US/Eastern" + df.index = pd.date_range("2020-01-01", freq="D", periods=len(df), tz=tz) + + with tm.assert_produces_warning(DeprecationWarning, match="copy"): + getattr(df, meth)(copy=False, **kwargs) + + with tm.assert_produces_warning(DeprecationWarning, match="copy"): + getattr(df.a, meth)(copy=False, **kwargs) + + +def test_copy_deprecation_reindex_like_align(): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + # Somehow the stack level check is incorrect here + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + df.reindex_like(df, copy=False) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + df.a.reindex_like(df.a, copy=False) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + df.align(df, copy=False) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + df.a.align(df.a, copy=False) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 05207e13c8547..8bf0e81e74e25 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -60,6 +60,7 @@ def test_copy_shallow(): assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") @pytest.mark.parametrize("copy", [True, None, False]) @pytest.mark.parametrize( "method", @@ -116,6 +117,7 @@ def test_methods_copy_keyword(request, method, copy): assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") @pytest.mark.parametrize("copy", [True, None, False]) @pytest.mark.parametrize( "method", @@ -499,7 +501,7 @@ def test_align_series(): def test_align_copy_false(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() - df2, df3 = df.align(df, copy=False) + df2, df3 = df.align(df) assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) @@ -516,7 +518,7 @@ def test_align_with_series_copy_false(): ser = Series([1, 2, 3], name="x") ser_orig = ser.copy() df_orig = df.copy() - df2, ser2 = df.align(ser, copy=False, axis=0) + df2, ser2 = df.align(ser, axis=0) assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index ebd4136f2e451..8b3596debc0b8 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -177,7 +177,7 @@ def test_where_set(self, where_frame, float_string_frame, mixed_int_frame): def _check_set(df, cond, check_dtypes=True): dfi = df.copy() - econd = cond.reindex_like(df).fillna(True).infer_objects(copy=False) + econd = cond.reindex_like(df).fillna(True).infer_objects() expected = dfi.mask(~econd) return_value = dfi.where(cond, np.nan, inplace=True) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index d580be3550c03..2677b7332a65a 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -44,7 +44,7 @@ def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr - af, bf = float_frame.align(float_frame, copy=False) + af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr # axis = 0 diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 84e642f47417b..55f8052d05cf1 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -122,11 +122,11 @@ def test_astype_with_exclude_string(self, float_frame): def test_astype_with_view_float(self, float_frame): # this is the only real reason to do it this way tf = np.round(float_frame).astype(np.int32) - tf.astype(np.float32, copy=False) + tf.astype(np.float32) # TODO(wesm): verification? tf = float_frame.astype(np.float64) - tf.astype(np.int64, copy=False) + tf.astype(np.int64) def test_astype_with_view_mixed_float(self, mixed_float_frame): tf = mixed_float_frame.reindex(columns=["A", "B", "C"]) @@ -865,7 +865,7 @@ def construct_array_type(cls): def test_frame_astype_no_copy(): # GH 42501 df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object) - result = df.astype({"a": Int16DtypeNoCopy()}, copy=False) + result = df.astype({"a": Int16DtypeNoCopy()}) assert result.a.dtype == pd.Int16Dtype() assert np.shares_memory(df.b.values, result.b.values) @@ -876,7 +876,7 @@ def test_astype_copies(dtype): # GH#50984 pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) - result = df.astype("int64[pyarrow]", copy=True) + result = df.astype("int64[pyarrow]") df.iloc[0, 0] = 100 expected = DataFrame({"a": [1, 2, 3]}, dtype="int64[pyarrow]") tm.assert_frame_equal(result, expected) @@ -888,5 +888,5 @@ def test_astype_to_string_not_modifying_input(string_storage, val): df = DataFrame({"a": ["a", "b", val]}) expected = df.copy() with option_context("mode.string_storage", string_storage): - df.astype("string", copy=False) + df.astype("string") tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 5beb09940acf3..e9206e86b7b08 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -318,9 +318,7 @@ def test_describe_tz_values2(self): "max", "std", ] - expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).reindex( - idx, copy=False - ) + expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).reindex(idx) result = df.describe(include="all") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 229ca6e6b683a..45109991c4553 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -159,37 +159,6 @@ def test_reindex_tzaware_fill_value(self): expected[1] = expected[1].astype(res.dtypes[1]) tm.assert_frame_equal(res, expected) - def test_reindex_copies(self): - # based on asv time_reindex_axis1 - N = 10 - df = DataFrame(np.random.default_rng(2).standard_normal((N * 10, N))) - cols = np.arange(N) - np.random.default_rng(2).shuffle(cols) - - result = df.reindex(columns=cols, copy=True) - assert not np.shares_memory(result[0]._values, df[0]._values) - - # pass both columns and index - result2 = df.reindex(columns=cols, index=df.index, copy=True) - assert not np.shares_memory(result2[0]._values, df[0]._values) - - def test_reindex_copies_ea(self): - # https://github.com/pandas-dev/pandas/pull/51197 - # also ensure to honor copy keyword for ExtensionDtypes - N = 10 - df = DataFrame( - np.random.default_rng(2).standard_normal((N * 10, N)), dtype="Float64" - ) - cols = np.arange(N) - np.random.default_rng(2).shuffle(cols) - - result = df.reindex(columns=cols, copy=True) - assert np.shares_memory(result[0].array._data, df[0].array._data) - - # pass both columns and index - result2 = df.reindex(columns=cols, index=df.index, copy=True) - assert np.shares_memory(result2[0].array._data, df[0].array._data) - def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated; enforced in 2.0 to cast to object arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) @@ -635,9 +604,7 @@ def test_reindex(self, float_frame): tm.assert_index_equal(series.index, nonContigFrame.index) # corner cases - - # Same index, copies values but not index if copy=False - newFrame = float_frame.reindex(float_frame.index, copy=False) + newFrame = float_frame.reindex(float_frame.index) assert newFrame.index.is_(float_frame.index) # length zero diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py index 90bec4dfb5be6..e9209f218bca9 100644 --- a/pandas/tests/frame/methods/test_tz_convert.py +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -117,15 +117,14 @@ def test_tz_convert_and_localize_bad_input(self, fn): with pytest.raises(ValueError, match="not valid"): getattr(df, fn)("US/Pacific", level=1) - @pytest.mark.parametrize("copy", [True, False]) - def test_tz_convert_copy_inplace_mutate(self, copy, frame_or_series): + def test_tz_convert_copy_inplace_mutate(self, frame_or_series): # GH#6326 obj = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="h", tz="Europe/Berlin"), ) orig = obj.copy() - result = obj.tz_convert("UTC", copy=copy) + result = obj.tz_convert("UTC") expected = frame_or_series(np.arange(0, 5), index=obj.index.tz_convert("UTC")) tm.assert_equal(result, expected) tm.assert_equal(obj, orig) diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py index b167afc17f484..c0f5a90a9d2ee 100644 --- a/pandas/tests/frame/methods/test_tz_localize.py +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -50,14 +50,13 @@ def test_tz_localize_naive(self, frame_or_series): with pytest.raises(TypeError, match="Already tz-aware"): ts.tz_localize("US/Eastern") - @pytest.mark.parametrize("copy", [True, False]) - def test_tz_localize_copy_inplace_mutate(self, copy, frame_or_series): + def test_tz_localize_copy_inplace_mutate(self, frame_or_series): # GH#6326 obj = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="1h", tz=None) ) orig = obj.copy() - result = obj.tz_localize("UTC", copy=copy) + result = obj.tz_localize("UTC") expected = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="1h", tz="UTC"), diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 7cf5ccc4ed24f..fd815c85a89b3 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -492,9 +492,9 @@ def test_binops(request, args, annotate, all_binary_operators): ] if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series): # in 2.0 silent alignment on comparisons was removed xref GH#28759 - left, right = left.align(right, axis=1, copy=False) + left, right = left.align(right, axis=1) elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame): - right, left = right.align(left, axis=1, copy=False) + right, left = right.align(left, axis=1) result = all_binary_operators(left, right) assert result.attrs == {"a": 1} diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f263f92b4f0eb..7208c688bd217 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -666,7 +666,7 @@ def test_loc_setitem_consistency_slice_column_len(self): df.loc[:, ("Respondent", "EndDate")] = to_datetime( df.loc[:, ("Respondent", "EndDate")] ) - df = df.infer_objects(copy=False) + df = df.infer_objects() # Adding a new key df.loc[:, ("Respondent", "Duration")] = ( diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index 3f5f8988a0ab4..620a8ae54fee3 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -63,7 +63,7 @@ def test_align_nocopy(datetime_series): # do not copy a = datetime_series.copy() - ra, _ = a.align(b, join="left", copy=False) + ra, _ = a.align(b, join="left") ra[:5] = 5 assert not (a[:5] == 5).any() @@ -77,17 +77,17 @@ def test_align_nocopy(datetime_series): # do not copy a = datetime_series.copy() b = datetime_series[:5].copy() - _, rb = a.align(b, join="right", copy=False) + _, rb = a.align(b, join="right") rb[:2] = 5 assert not (b[:2] == 5).any() def test_align_same_index(datetime_series): - a, b = datetime_series.align(datetime_series, copy=False) + a, b = datetime_series.align(datetime_series) assert a.index.is_(datetime_series.index) assert b.index.is_(datetime_series.index) - a, b = datetime_series.align(datetime_series, copy=True) + a, b = datetime_series.align(datetime_series) assert a.index is not datetime_series.index assert b.index is not datetime_series.index assert a.index.is_(datetime_series.index) diff --git a/pandas/tests/series/methods/test_infer_objects.py b/pandas/tests/series/methods/test_infer_objects.py index 29abac6b3780e..0d411e08a0107 100644 --- a/pandas/tests/series/methods/test_infer_objects.py +++ b/pandas/tests/series/methods/test_infer_objects.py @@ -13,12 +13,12 @@ def test_copy(self, index_or_series): # case where we don't need to do inference because it is already non-object obj = index_or_series(np.array([1, 2, 3], dtype="int64")) - result = obj.infer_objects(copy=False) + result = obj.infer_objects() assert tm.shares_memory(result, obj) # case where we try to do inference but can't do better than object obj2 = index_or_series(np.array(["foo", 2], dtype=object)) - result2 = obj2.infer_objects(copy=False) + result2 = obj2.infer_objects() assert tm.shares_memory(result2, obj2) def test_infer_objects_series(self, index_or_series):