From 8848692449cb2eab710ae94da8620df0552a7295 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 5 Feb 2024 02:00:06 +0000 Subject: [PATCH 1/7] CoW: Enforce some deprecations on the block level (#57253) --- pandas/core/frame.py | 1 - pandas/core/generic.py | 19 ++- pandas/core/internals/blocks.py | 202 ++++++++++++------------------ pandas/core/internals/managers.py | 50 +------- pandas/core/series.py | 2 +- 5 files changed, 94 insertions(+), 180 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index afa680d064c4a..614e8ff0232f5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10744,7 +10744,6 @@ def _series_round(ser: Series, decimals: int) -> Series: # type "Union[int, integer[Any]]"; expected "int" new_mgr = self._mgr.round( decimals=decimals, # type: ignore[arg-type] - using_cow=using_copy_on_write(), ) return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( self, method="round" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7bb07694c34a5..61fb757fafec4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6360,9 +6360,6 @@ def astype( 2 2020-01-03 dtype: datetime64[ns] """ - if copy and using_copy_on_write(): - copy = False - if is_dict_like(dtype): if self.ndim == 1: # i.e. Series if len(dtype) > 1 or self.name not in dtype: @@ -6371,7 +6368,7 @@ def astype( "the key in Series dtype mappings." ) new_type = dtype[self.name] - return self.astype(new_type, copy, errors) + return self.astype(new_type, errors=errors) # GH#44417 cast to Series so we can use .iat below, which will be # robust in case we @@ -6393,10 +6390,10 @@ def astype( for i, (col_name, col) in enumerate(self.items()): cdt = dtype_ser.iat[i] if isna(cdt): - res_col = col.copy(deep=copy) + res_col = col.copy(deep=False) else: try: - res_col = col.astype(dtype=cdt, copy=copy, errors=errors) + res_col = col.astype(dtype=cdt, errors=errors) except ValueError as ex: ex.args = ( f"{ex}: Error while type casting for column '{col_name}'", @@ -6410,22 +6407,20 @@ def astype( if isinstance(dtype, ExtensionDtype) and all( arr.dtype == dtype for arr in self._mgr.arrays ): - return self.copy(deep=copy) + return self.copy(deep=False) # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: self.items handles duplicate column names - results = [ - ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items() - ] + results = [ser.astype(dtype, errors=errors) for _, ser in self.items()] else: # else, only a single dtype is given - new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) + new_data = self._mgr.astype(dtype=dtype, errors=errors) res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series if not results: - return self.copy(deep=None) + return self.copy(deep=False) # GH 19920: retain column metadata after concat result = concat(results, axis=1, copy=False) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bb65e7a4d0838..02296643acc3e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -527,14 +527,13 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: f"{self.values.dtype}. Please report a bug at " "https://github.com/pandas-dev/pandas/issues." ) - return self.astype(new_dtype, copy=False) + return self.astype(new_dtype) @final def _maybe_downcast( self, blocks: list[Block], downcast, - using_cow: bool, caller: str, ) -> list[Block]: if downcast is False: @@ -551,7 +550,7 @@ def _maybe_downcast( return blocks nbs = extend_blocks( - [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] + [blk.convert(using_cow=True, copy=False) for blk in blocks] ) if caller == "fillna": if len(nbs) != len(blocks) or not all( @@ -576,7 +575,7 @@ def _maybe_downcast( elif caller == "where" and get_option("future.no_silent_downcasting") is True: return blocks else: - nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) + nbs = extend_blocks([b._downcast_2d(downcast, True) for b in blocks]) # When _maybe_downcast is called with caller="where", it is either # a) with downcast=False, which is a no-op (the desired future behavior) @@ -667,8 +666,6 @@ def convert( def convert_dtypes( self, - copy: bool, - using_cow: bool, infer_objects: bool = True, convert_string: bool = True, convert_integer: bool = True, @@ -677,14 +674,14 @@ def convert_dtypes( dtype_backend: DtypeBackend = "numpy_nullable", ) -> list[Block]: if infer_objects and self.is_object: - blks = self.convert(copy=False, using_cow=using_cow) + blks = self.convert(copy=False) else: blks = [self] if not any( [convert_floating, convert_integer, convert_boolean, convert_string] ): - return [b.copy(deep=copy) for b in blks] + return [b.copy(deep=False) for b in blks] rbs = [] for blk in blks: @@ -704,11 +701,11 @@ def convert_dtypes( ] if all(dtype == self.dtype for dtype in dtypes): # Avoid block splitting if no dtype changes - rbs.append(blk.copy(deep=copy)) + rbs.append(blk.copy(deep=False)) continue for dtype, b in zip(dtypes, sub_blks): - rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1)) + rbs.append(b.astype(dtype=dtype, squeeze=b.ndim != 1)) return rbs # --------------------------------------------------------------------- @@ -723,9 +720,7 @@ def dtype(self) -> DtypeObj: def astype( self, dtype: DtypeObj, - copy: bool = False, errors: IgnoreRaise = "raise", - using_cow: bool = False, squeeze: bool = False, ) -> Block: """ @@ -734,13 +729,9 @@ def astype( Parameters ---------- dtype : np.dtype or ExtensionDtype - copy : bool, default False - copy if indicated errors : str, {'raise', 'ignore'}, default 'raise' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object - using_cow: bool, default False - Signaling if copy on write copy logic is used. squeeze : bool, default False squeeze values to ndim=1 if only one column is given @@ -754,18 +745,18 @@ def astype( raise ValueError("Can not squeeze with more than one column.") values = values[0, :] # type: ignore[call-overload] - new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) + new_values = astype_array_safe(values, dtype, errors=errors) new_values = maybe_coerce_values(new_values) refs = None - if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype): + if astype_is_view(values.dtype, new_values.dtype): refs = self.refs newb = self.make_block(new_values, refs=refs) if newb.shape != self.shape: raise TypeError( - f"cannot set astype for copy = [{copy}] for dtype " + f"cannot set astype for dtype " f"({self.dtype.name} [{self.shape}]) to different shape " f"({newb.dtype.name} [{newb.shape}])" ) @@ -801,8 +792,16 @@ def copy(self, deep: bool = True) -> Self: # --------------------------------------------------------------------- # Copy-on-Write Helpers + def _maybe_copy(self, inplace: bool) -> Self: + if inplace: + deep = self.refs.has_reference() + return self.copy(deep=deep) + return self.copy() + @final - def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self: + def _maybe_copy_cow_check( + self, using_cow: bool = True, inplace: bool = True + ) -> Self: if using_cow and inplace: deep = self.refs.has_reference() blk = self.copy(deep=deep) @@ -811,7 +810,18 @@ def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self: return blk @final - def _get_refs_and_copy(self, using_cow: bool, inplace: bool): + def _get_refs_and_copy(self, inplace: bool): + refs = None + copy = not inplace + if inplace: + if self.refs.has_reference(): + copy = True + else: + refs = self.refs + return copy, refs + + @final + def _get_refs_and_copy_cow_check(self, using_cow: bool, inplace: bool): refs = None copy = not inplace if inplace: @@ -847,7 +857,7 @@ def replace( if isinstance(values, Categorical): # TODO: avoid special-casing # GH49404 - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) values = cast(Categorical, blk.values) values._replace(to_replace=to_replace, value=value, inplace=True) return [blk] @@ -875,7 +885,7 @@ def replace( elif self._can_hold_element(value): # TODO(CoW): Maybe split here as well into columns where mask has True # and rest? - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) putmask_inplace(blk.values, mask, value) if not (self.is_object and value is None): @@ -968,7 +978,7 @@ def _replace_regex( rx = re.compile(to_replace) - block = self._maybe_copy(using_cow, inplace) + block = self._maybe_copy_cow_check(using_cow, inplace) replace_regex(block.values, rx, value, mask) @@ -1005,7 +1015,7 @@ def replace_list( if isinstance(values, Categorical): # TODO: avoid special-casing # GH49404 - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) values = cast(Categorical, blk.values) values._replace(to_replace=src_list, value=dest_list, inplace=True) return [blk] @@ -1164,7 +1174,7 @@ def _replace_coerce( # gh-45601, gh-45836, gh-46634 if mask.any(): has_ref = self.refs.has_reference() - nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow) + nb = self.astype(np.dtype(object)) if (nb is self or using_cow) and not inplace: nb = nb.copy() elif inplace and has_ref and nb.refs.has_reference() and using_cow: @@ -1325,7 +1335,7 @@ def _unstack( # --------------------------------------------------------------------- - def setitem(self, indexer, value, using_cow: bool = False) -> Block: + def setitem(self, indexer, value) -> Block: """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -1335,8 +1345,6 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: The subset of self.values to set value : object The value being set - using_cow: bool, default False - Signaling if CoW is used. Returns ------- @@ -1375,7 +1383,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: # test_iloc_setitem_custom_object casted = setitem_datetimelike_compat(values, len(vi), casted) - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = cast(np.ndarray, self.values.T) if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 @@ -1383,7 +1391,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask(self, mask, new) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1394,7 +1402,6 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: ---------- mask : np.ndarray[bool], SparseArray[bool], or BooleanArray new : a ndarray/object - using_cow: bool, default False Returns ------- @@ -1412,14 +1419,12 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: new = extract_array(new, extract_numpy=True) if noop: - if using_cow: - return [self.copy(deep=False)] - return [self] + return [self.copy(deep=False)] try: casted = np_can_hold_element(values.dtype, new) - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = cast(np.ndarray, self.values) putmask_without_repeat(values.T, mask, casted) @@ -1435,7 +1440,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: ).putmask(mask, new) else: indexer = mask.nonzero()[0] - nb = self.setitem(indexer, new[indexer], using_cow=using_cow) + nb = self.setitem(indexer, new[indexer]) return [nb] else: @@ -1450,13 +1455,11 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: n = new[:, i : i + 1] submask = orig_mask[:, i : i + 1] - rbs = nb.putmask(submask, n, using_cow=using_cow) + rbs = nb.putmask(submask, n) res_blocks.extend(rbs) return res_blocks - def where( - self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False - ) -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1487,9 +1490,7 @@ def where( icond, noop = validate_putmask(values, ~cond) if noop: # GH-39595: Always return a copy; short-circuit up/downcasting - if using_cow: - return [self.copy(deep=False)] - return [self.copy()] + return [self.copy(deep=False)] if other is lib.no_default: other = self.fill_value @@ -1508,10 +1509,8 @@ def where( # no need to split columns block = self.coerce_to_target_dtype(other) - blocks = block.where(orig_other, cond, using_cow=using_cow) - return self._maybe_downcast( - blocks, downcast=_downcast, using_cow=using_cow, caller="where" - ) + blocks = block.where(orig_other, cond) + return self._maybe_downcast(blocks, downcast=_downcast, caller="where") else: # since _maybe_downcast would split blocks anyway, we @@ -1528,9 +1527,7 @@ def where( oth = other[:, i : i + 1] submask = cond[:, i : i + 1] - rbs = nb.where( - oth, submask, _downcast=_downcast, using_cow=using_cow - ) + rbs = nb.where(oth, submask, _downcast=_downcast) res_blocks.extend(rbs) return res_blocks @@ -1579,7 +1576,6 @@ def fillna( limit: int | None = None, inplace: bool = False, downcast=None, - using_cow: bool = False, ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to @@ -1598,24 +1594,18 @@ def fillna( if noop: # we can't process the value, but nothing to do if inplace: - if using_cow: - return [self.copy(deep=False)] - # Arbitrarily imposing the convention that we ignore downcast - # on no-op when inplace=True - return [self] + return [self.copy(deep=False)] else: # GH#45423 consistent downcasting on no-ops. - nb = self.copy(deep=not using_cow) - nbs = nb._maybe_downcast( - [nb], downcast=downcast, using_cow=using_cow, caller="fillna" - ) + nb = self.copy(deep=False) + nbs = nb._maybe_downcast([nb], downcast=downcast, caller="fillna") return nbs if limit is not None: mask[mask.cumsum(self.ndim - 1) > limit] = False if inplace: - nbs = self.putmask(mask.T, value, using_cow=using_cow) + nbs = self.putmask(mask.T, value) else: # without _downcast, we would break # test_fillna_dtype_conversion_equiv_replace @@ -1626,9 +1616,7 @@ def fillna( # different behavior in _maybe_downcast. return extend_blocks( [ - blk._maybe_downcast( - [blk], downcast=downcast, using_cow=using_cow, caller="fillna" - ) + blk._maybe_downcast([blk], downcast=downcast, caller="fillna") for blk in nbs ] ) @@ -1642,15 +1630,12 @@ def pad_or_backfill( limit: int | None = None, limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, ) -> list[Block]: if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) # Dispatch to the NumpyExtensionArray method. # We know self.array_values is a NumpyExtensionArray bc EABlock overrides @@ -1669,7 +1654,7 @@ def pad_or_backfill( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna") + return nb._maybe_downcast([nb], downcast, caller="fillna") @final def interpolate( @@ -1682,7 +1667,6 @@ def interpolate( limit_direction: Literal["forward", "backward", "both"] = "forward", limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, **kwargs, ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") @@ -1693,20 +1677,16 @@ def interpolate( if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] # TODO(3.0): this case will not be reachable once GH#53638 is enforced if self.dtype == _dtype_obj: # only deal with floats # bc we already checked that can_hold_na, we don't have int dtype here # test_interp_basic checks that we make a copy here - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) # Dispatch to the EA method. new_values = self.array_values.interpolate( @@ -1722,7 +1702,7 @@ def interpolate( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") + return nb._maybe_downcast([nb], downcast, caller="interpolate") @final def diff(self, n: int) -> list[Block]: @@ -1797,7 +1777,7 @@ def quantile( return new_block_2d(result, placement=self._mgr_locs) @final - def round(self, decimals: int, using_cow: bool = False) -> Self: + def round(self, decimals: int) -> Self: """ Rounds the values. If the block is not of an integer or float dtype, nothing happens. @@ -1809,26 +1789,19 @@ def round(self, decimals: int, using_cow: bool = False) -> Self: decimals: int, Number of decimal places to round to. Caller is responsible for validating this - using_cow: bool, - Whether Copy on Write is enabled right now """ if not self.is_numeric or self.is_bool: - return self.copy(deep=not using_cow) - refs = None + return self.copy(deep=False) # TODO: round only defined on BaseMaskedArray # Series also does this, so would need to fix both places # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]" # has no attribute "round" values = self.values.round(decimals) # type: ignore[union-attr] + + refs = None if values is self.values: - if not using_cow: - # Normally would need to do this before, but - # numpy only returns same array when round operation - # is no-op - # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636 - values = values.copy() - else: - refs = self.refs + refs = self.refs + return self.make_block_same_class(values, refs=refs) # --------------------------------------------------------------------- @@ -1923,7 +1896,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]: return [self.make_block_same_class(new_values)] @final - def setitem(self, indexer, value, using_cow: bool = False): + def setitem(self, indexer, value): """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -1936,8 +1909,6 @@ def setitem(self, indexer, value, using_cow: bool = False): The subset of self.values to set value : object The value being set - using_cow: bool, default False - Signaling if CoW is used. Returns ------- @@ -1980,9 +1951,7 @@ def setitem(self, indexer, value, using_cow: bool = False): return self @final - def where( - self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False - ) -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: # _downcast private bc we only specify it when calling from fillna arr = self.values.T @@ -2000,9 +1969,7 @@ def where( if noop: # GH#44181, GH#45135 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast - if using_cow: - return [self.copy(deep=False)] - return [self.copy()] + return [self.copy(deep=False)] try: res_values = arr._where(cond, other).T @@ -2011,19 +1978,15 @@ def where( if isinstance(self.dtype, IntervalDtype): # TestSetitemFloatIntervalWithIntIntervalValues blk = self.coerce_to_target_dtype(orig_other) - nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) - return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow, caller="where" - ) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast, caller="where") elif isinstance(self, NDArrayBackedExtensionBlock): # NB: not (yet) the same as # isinstance(values, NDArrayBackedExtensionArray) blk = self.coerce_to_target_dtype(orig_other) - nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) - return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow, caller="where" - ) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast, caller="where") else: raise @@ -2041,7 +2004,7 @@ def where( n = orig_other[:, i : i + 1] submask = orig_cond[:, i : i + 1] - rbs = nb.where(n, submask, using_cow=using_cow) + rbs = nb.where(n, submask) res_blocks.extend(rbs) return res_blocks @@ -2049,7 +2012,7 @@ def where( return [nb] @final - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask(self, mask, new) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2063,11 +2026,9 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: mask = self._maybe_squeeze_arg(mask) if not mask.any(): - if using_cow: - return [self.copy(deep=False)] - return [self] + return [self.copy(deep=False)] - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = self.values if values.ndim == 2: values = values.T @@ -2149,7 +2110,6 @@ def pad_or_backfill( limit: int | None = None, limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, ) -> list[Block]: values = self.values @@ -2191,7 +2151,6 @@ def fillna( limit: int | None = None, inplace: bool = False, downcast=None, - using_cow: bool = False, ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) @@ -2200,13 +2159,12 @@ def fillna( limit=limit, inplace=inplace, downcast=downcast, - using_cow=using_cow, ) - if using_cow and self._can_hold_na and not self.values._hasna: + if self._can_hold_na and not self.values._hasna: refs = self.refs new_values = self.values else: - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) try: new_values = self.values.fillna( @@ -2230,7 +2188,7 @@ def fillna( ) nb = self.make_block_same_class(new_values, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") + return nb._maybe_downcast([nb], downcast, caller="fillna") @cache_readonly def shape(self) -> Shape: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a8a14168d504..cda5575a2b04e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -470,7 +470,6 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: limit=limit, inplace=inplace, downcast=downcast, - using_cow=using_copy_on_write(), ) @final @@ -486,7 +485,6 @@ def where(self, other, cond, align: bool) -> Self: align_keys=align_keys, other=other, cond=cond, - using_cow=using_copy_on_write(), ) @final @@ -502,16 +500,11 @@ def putmask(self, mask, new, align: bool = True) -> Self: align_keys=align_keys, mask=mask, new=new, - using_cow=using_copy_on_write(), ) @final - def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply( - "round", - decimals=decimals, - using_cow=using_cow, - ) + def round(self, decimals: int) -> Self: + return self.apply("round", decimals=decimals) @final def replace(self, to_replace, value, inplace: bool) -> Self: @@ -558,20 +551,10 @@ def replace_list( return bm def interpolate(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "interpolate", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - ) + return self.apply("interpolate", inplace=inplace, **kwargs) def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "pad_or_backfill", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - ) + return self.apply("pad_or_backfill", inplace=inplace, **kwargs) def shift(self, periods: int, fill_value) -> Self: if fill_value is lib.no_default: @@ -622,21 +605,7 @@ def diff(self, n: int) -> Self: return self.apply("diff", n=n) def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: - if copy is None: - if using_copy_on_write(): - copy = False - else: - copy = True - elif using_copy_on_write(): - copy = False - - return self.apply( - "astype", - dtype=dtype, - copy=copy, - errors=errors, - using_cow=using_copy_on_write(), - ) + return self.apply("astype", dtype=dtype, errors=errors) def convert(self, copy: bool | None) -> Self: if copy is None: @@ -650,14 +619,7 @@ def convert(self, copy: bool | None) -> Self: return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) def convert_dtypes(self, **kwargs): - if using_copy_on_write(): - copy = False - else: - copy = True - - return self.apply( - "convert_dtypes", copy=copy, using_cow=using_copy_on_write(), **kwargs - ) + return self.apply("convert_dtypes", **kwargs) def get_values_for_csv( self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None diff --git a/pandas/core/series.py b/pandas/core/series.py index d3c199286931f..78a3bdd2281ce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2671,7 +2671,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series: dtype: float64 """ nv.validate_round(args, kwargs) - new_mgr = self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()) + new_mgr = self._mgr.round(decimals=decimals) return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( self, method="round" ) From 46fd7114d6ded7dcd58499776175d4ccfd04d9da Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 5 Feb 2024 02:01:35 +0000 Subject: [PATCH 2/7] CoW: Remove copy-on-write conditions from frame tests (#57255) --- pandas/tests/frame/indexing/test_getitem.py | 9 +--- pandas/tests/frame/indexing/test_indexing.py | 47 ++++------------- pandas/tests/frame/indexing/test_insert.py | 13 ++--- pandas/tests/frame/indexing/test_setitem.py | 35 ++++--------- pandas/tests/frame/indexing/test_xs.py | 32 ++++-------- pandas/tests/frame/methods/test_align.py | 7 +-- pandas/tests/frame/methods/test_cov_corr.py | 13 ++--- pandas/tests/frame/methods/test_fillna.py | 39 +++----------- .../tests/frame/methods/test_interpolate.py | 47 ++++++----------- pandas/tests/frame/methods/test_quantile.py | 13 ++--- pandas/tests/frame/methods/test_reindex.py | 19 ++----- pandas/tests/frame/methods/test_rename.py | 7 +-- pandas/tests/frame/methods/test_set_axis.py | 29 ++--------- .../tests/frame/methods/test_sort_values.py | 13 ++--- .../frame/methods/test_to_dict_of_blocks.py | 21 ++------ pandas/tests/frame/methods/test_to_numpy.py | 15 ++---- pandas/tests/frame/methods/test_transpose.py | 15 ++---- pandas/tests/frame/methods/test_update.py | 16 ++---- pandas/tests/frame/methods/test_values.py | 26 +++------- pandas/tests/frame/test_api.py | 8 +-- pandas/tests/frame/test_arithmetic.py | 12 ++--- pandas/tests/frame/test_block_internals.py | 52 ++++--------------- pandas/tests/frame/test_constructors.py | 39 +++----------- 23 files changed, 129 insertions(+), 398 deletions(-) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 73683922bcc92..25d6e06a4c675 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -391,18 +391,13 @@ def test_getitem_empty_frame_with_boolean(self): df2 = df[df > 0] tm.assert_frame_equal(df, df2) - def test_getitem_returns_view_when_column_is_unique_in_df( - self, using_copy_on_write - ): + def test_getitem_returns_view_when_column_is_unique_in_df(self): # GH#45316 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) df_orig = df.copy() view = df["b"] view.loc[:] = 100 - if using_copy_on_write: - expected = df_orig - else: - expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"]) + expected = df_orig tm.assert_frame_equal(df, expected) def test_getitem_frozenset_unique_in_column(self): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b48ad7e3481b9..97176b20376ff 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -565,9 +565,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): with pytest.raises(KeyError, match=r"^3$"): df2.loc[3:11] = 0 - def test_fancy_getitem_slice_mixed( - self, float_frame, float_string_frame, using_copy_on_write - ): + def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): sliced = float_string_frame.iloc[:, -3:] assert sliced["D"].dtype == np.float64 @@ -579,13 +577,7 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) sliced.loc[:, "C"] = 4.0 - if not using_copy_on_write: - assert (float_frame["C"] == 4).all() - - # with the enforcement of GH#45333 in 2.0, this remains a view - np.shares_memory(sliced["C"]._values, float_frame["C"]._values) - else: - tm.assert_frame_equal(float_frame, original) + tm.assert_frame_equal(float_frame, original) def test_getitem_setitem_non_ix_labels(self): df = DataFrame(range(20), index=date_range("2020-01-01", periods=20)) @@ -1053,7 +1045,7 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view(self, using_copy_on_write): + def test_iloc_row_slice_view(self): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) @@ -1067,11 +1059,6 @@ def test_iloc_row_slice_view(self, using_copy_on_write): exp_col = original[2].copy() subset.loc[:, 2] = 0.0 - if not using_copy_on_write: - exp_col._values[4:8] = 0.0 - - # With the enforcement of GH#45333 in 2.0, this remains a view - assert np.shares_memory(df[2], subset[2]) tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): @@ -1097,32 +1084,20 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_col_slice_view(self, using_copy_on_write): + def test_iloc_col_slice_view(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) original = df.copy() subset = df.iloc[:, slice(4, 8)] - if not using_copy_on_write: - # verify slice is view - assert np.shares_memory(df[8]._values, subset[8]._values) - - subset.loc[:, 8] = 0.0 - - assert (df[8] == 0).all() - - # with the enforcement of GH#45333 in 2.0, this remains a view - assert np.shares_memory(df[8]._values, subset[8]._values) - else: - if using_copy_on_write: - # verify slice is view - assert np.shares_memory(df[8]._values, subset[8]._values) - subset[8] = 0.0 - # subset changed - assert (subset[8] == 0).all() - # but df itself did not change (setitem replaces full column) - tm.assert_frame_equal(df, original) + # verify slice is view + assert np.shares_memory(df[8]._values, subset[8]._values) + subset[8] = 0.0 + # subset changed + assert (subset[8] == 0).all() + # but df itself did not change (setitem replaces full column) + tm.assert_frame_equal(df, original) def test_loc_duplicates(self): # gh-17105 diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index b9fc5dc195026..2558e8314664a 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -71,7 +71,7 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self, using_copy_on_write): + def test_insert_item_cache(self): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) ser = df[0] expected_warning = PerformanceWarning @@ -80,14 +80,9 @@ def test_insert_item_cache(self, using_copy_on_write): for n in range(100): df[n + 3] = df[1] * n - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df[0][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df[0][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df[0][0] + assert df.iloc[0, 0] != 99 def test_insert_EA_no_warning(self): # PerformanceWarning about fragmented frame should not be raised when diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 2df01b2cdb721..20e7651f8af83 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -324,7 +324,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit): df["dates"] = vals assert (df["dates"].values == ex_vals).all() - def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): + def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") @@ -345,10 +345,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): tm.assert_extension_array_equal(v1, v2) v1base = v1._ndarray.base v2base = v2._ndarray.base - if not using_copy_on_write: - assert v1base is None or (id(v1base) != id(v2base)) - else: - assert id(v1base) == id(v2base) + assert id(v1base) == id(v2base) # with nan df2 = df.copy() @@ -844,7 +841,7 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): class TestDataFrameSetItemWithExpansion: - def test_setitem_listlike_views(self, using_copy_on_write): + def test_setitem_listlike_views(self): # GH#38148 df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) @@ -857,10 +854,7 @@ def test_setitem_listlike_views(self, using_copy_on_write): # edit in place the first column to check view semantics df.iloc[0, 0] = 100 - if using_copy_on_write: - expected = Series([1, 2, 3], name="a") - else: - expected = Series([100, 2, 3], name="a") + expected = Series([1, 2, 3], name="a") tm.assert_series_equal(ser, expected) def test_setitem_string_column_numpy_dtype_raising(self): @@ -870,7 +864,7 @@ def test_setitem_string_column_numpy_dtype_raising(self): expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) tm.assert_frame_equal(df, expected) - def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): + def test_setitem_empty_df_duplicate_columns(self): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") df.loc[:, "a"] = list(range(2)) @@ -1199,7 +1193,7 @@ def test_setitem_always_copy(self, float_frame): assert notna(s[5:10]).all() @pytest.mark.parametrize("consolidate", [True, False]) - def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write): + def test_setitem_partial_column_inplace(self, consolidate): # This setting should be in-place, regardless of whether frame is # single-block or multi-block # GH#304 this used to be incorrectly not-inplace, in which case @@ -1215,18 +1209,11 @@ def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write): else: assert len(df._mgr.blocks) == 2 - zvals = df["z"]._values - df.loc[2:, "z"] = 42 expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") tm.assert_series_equal(df["z"], expected) - # check setting occurred in-place - if not using_copy_on_write: - tm.assert_numpy_array_equal(zvals, expected.values) - assert np.shares_memory(zvals, df["z"]._values) - def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 cols = ["A", "B"] * 2 @@ -1298,7 +1285,7 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): df[indexer] = set_value tm.assert_frame_equal(view, expected) - def test_setitem_column_update_inplace(self, using_copy_on_write): + def test_setitem_column_update_inplace(self): # https://github.com/pandas-dev/pandas/issues/47172 labels = [f"c{i}" for i in range(10)] @@ -1308,12 +1295,8 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): with tm.raises_chained_assignment_error(): for label in df.columns: df[label][label] = 1 - if not using_copy_on_write: - # diagonal values all updated - assert np.all(values[np.arange(10), np.arange(10)] == 1) - else: - # original dataframe not updated - assert np.all(values[np.arange(10), np.arange(10)] == 0) + # original dataframe not updated + assert np.all(values[np.arange(10), np.arange(10)] == 0) def test_setitem_column_frame_as_category(self): # GH31581 diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 96ae1050ed15a..4878f74bd152e 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -58,7 +58,7 @@ def test_xs_dt_error(self, datetime_frame): ): datetime_frame.xs(datetime_frame.index[0] - BDay()) - def test_xs_other(self, float_frame, using_copy_on_write): + def test_xs_other(self, float_frame): float_frame_orig = float_frame.copy() # xs get column series = float_frame.xs("A", axis=1) @@ -68,12 +68,9 @@ def test_xs_other(self, float_frame, using_copy_on_write): # view is returned if possible series = float_frame.xs("A", axis=1) series[:] = 5 - if using_copy_on_write: - # but with CoW the view shouldn't propagate mutations - tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) - assert not (expected == 5).all() - else: - assert (expected == 5).all() + # The view shouldn't propagate mutations + tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) + assert not (expected == 5).all() def test_xs_corner(self): # pathological mixed-type reordering case @@ -363,7 +360,7 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - def test_xs_droplevel_false_view(self, using_copy_on_write): + def test_xs_droplevel_false_view(self): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) @@ -371,26 +368,15 @@ def test_xs_droplevel_false_view(self, using_copy_on_write): assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) df.iloc[0, 0] = 2 - if using_copy_on_write: - # with copy on write the subset is never modified - expected = DataFrame({"a": [1]}) - else: - # modifying original df also modifies result when having a single block - expected = DataFrame({"a": [2]}) + # The subset is never modified + expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - # with mixed dataframe, modifying the parent doesn't modify result - # TODO the "split" path behaves differently here as with single block df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) df.iloc[0, 0] = 2 - if using_copy_on_write: - # with copy on write the subset is never modified - expected = DataFrame({"a": [1]}) - else: - # FIXME: iloc does not update the array inplace using - # "split" path - expected = DataFrame({"a": [1]}) + # The subset is never modified + expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) def test_xs_list_indexer_droplevel_false(self): diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 1f5d960de40c1..aa539dd0b2dbe 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -48,15 +48,12 @@ def test_frame_align_aware(self): assert new1.index.tz is timezone.utc assert new2.index.tz is timezone.utc - def test_align_float(self, float_frame, using_copy_on_write): + def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - if not using_copy_on_write: - assert af._mgr is float_frame._mgr - else: - assert af._mgr is not float_frame._mgr + assert af._mgr is not float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 2a50137c2d6ef..8e73fbf152e79 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -207,7 +207,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_corr_item_cache(self, using_copy_on_write): + def test_corr_item_cache(self): # Check that corr does not lead to incorrect entries in item_cache df = DataFrame({"A": range(10)}) @@ -218,15 +218,8 @@ def test_corr_item_cache(self, using_copy_on_write): _ = df.corr(numeric_only=True) - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.loc[0, "A"] == 0 - else: - # Check that the corr didn't break link between ser and df - ser.values[0] = 99 - assert df.loc[0, "A"] == 99 - assert df["A"] is ser - assert df.values[0, 0] == 99 + ser.iloc[0] = 99 + assert df.loc[0, "A"] == 0 @pytest.mark.parametrize("length", [2, 20, 200, 2000]) def test_corr_for_constant_columns(self, length): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index df38ddc6c3116..efb462416e132 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -20,7 +20,7 @@ class TestFillNA: - def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): + def test_fillna_dict_inplace_nonunique_columns(self): df = DataFrame( {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} ) @@ -35,27 +35,16 @@ def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): ) expected.columns = ["A", "A", "A"] tm.assert_frame_equal(df, expected) - - # TODO: what's the expected/desired behavior with CoW? - if not using_copy_on_write: - assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0]) assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1]) - if not using_copy_on_write: - assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2]) - def test_fillna_on_column_view(self, using_copy_on_write): + def test_fillna_on_column_view(self): # GH#46149 avoid unnecessary copies arr = np.full((40, 50), np.nan) df = DataFrame(arr, copy=False) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df[0].fillna(-1, inplace=True) - assert np.isnan(arr[:, 0]).all() - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df[0].fillna(-1, inplace=True) - assert (arr[:, 0] == -1).all() + with tm.raises_chained_assignment_error(): + df[0].fillna(-1, inplace=True) + assert np.isnan(arr[:, 0]).all() # i.e. we didn't create a new 49-column block assert len(df._mgr.arrays) == 1 @@ -107,17 +96,6 @@ def test_fillna_mixed_float(self, mixed_float_frame): result = mf.fillna(method="pad") _check_mixed_float(result, dtype={"C": None}) - def test_fillna_empty(self, using_copy_on_write): - if using_copy_on_write: - pytest.skip("condition is unnecessary complex and is deprecated anyway") - # empty frame (GH#2778) - df = DataFrame(columns=["x"]) - for m in ["pad", "backfill"]: - msg = "Series.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.x.fillna(method=m, inplace=True) - df.x.fillna(method=m) - def test_fillna_different_dtype(self, using_infer_string): # with different dtype (GH#3386) df = DataFrame( @@ -746,7 +724,7 @@ def test_fillna_inplace_with_columns_limit_and_value(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) - def test_inplace_dict_update_view(self, val, using_copy_on_write): + def test_inplace_dict_update_view(self, val): # GH#47188 df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() @@ -754,10 +732,7 @@ def test_inplace_dict_update_view(self, val, using_copy_on_write): df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) - if using_copy_on_write: - tm.assert_frame_equal(result_view, df_orig) - else: - tm.assert_frame_equal(result_view, expected) + tm.assert_frame_equal(result_view, df_orig) def test_single_block_df_with_horizontal_axis(self): # GH 47713 diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 5eb9aee2ffb15..483194a46ce56 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -68,7 +68,7 @@ def test_interpolate_inplace(self, frame_or_series, request): @pytest.mark.xfail( using_pyarrow_string_dtype(), reason="interpolate doesn't work for string" ) - def test_interp_basic(self, using_copy_on_write): + def test_interp_basic(self): df = DataFrame( { "A": [1, 2, np.nan, 4], @@ -93,12 +93,8 @@ def test_interp_basic(self, using_copy_on_write): # check we didn't operate inplace GH#45791 cvalues = df["C"]._values dvalues = df["D"].values - if using_copy_on_write: - assert np.shares_memory(cvalues, result["C"]._values) - assert np.shares_memory(dvalues, result["D"]._values) - else: - assert not np.shares_memory(cvalues, result["C"]._values) - assert not np.shares_memory(dvalues, result["D"]._values) + assert np.shares_memory(cvalues, result["C"]._values) + assert np.shares_memory(dvalues, result["D"]._values) with tm.assert_produces_warning(FutureWarning, match=msg): res = df.interpolate(inplace=True) @@ -371,38 +367,25 @@ def test_interp_raise_on_all_object_dtype(self): with pytest.raises(TypeError, match=msg): df.interpolate() - def test_interp_inplace(self, using_copy_on_write): + def test_interp_inplace(self): df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) - expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) - expected_cow = df.copy() + expected = df.copy() result = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - return_value = result["a"].interpolate(inplace=True) - assert return_value is None - tm.assert_frame_equal(result, expected_cow) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - return_value = result["a"].interpolate(inplace=True) - assert return_value is None - tm.assert_frame_equal(result, expected) + with tm.raises_chained_assignment_error(): + return_value = result["a"].interpolate(inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) result = df.copy() msg = "The 'downcast' keyword in Series.interpolate is deprecated" - if using_copy_on_write: - with tm.assert_produces_warning( - (FutureWarning, ChainedAssignmentError), match=msg - ): - return_value = result["a"].interpolate(inplace=True, downcast="infer") - assert return_value is None - tm.assert_frame_equal(result, expected_cow) - else: - with tm.assert_produces_warning(FutureWarning, match=msg): - return_value = result["a"].interpolate(inplace=True, downcast="infer") - assert return_value is None - tm.assert_frame_equal(result, expected.astype("int64")) + with tm.assert_produces_warning( + (FutureWarning, ChainedAssignmentError), match=msg + ): + return_value = result["a"].interpolate(inplace=True, downcast="infer") + assert return_value is None + tm.assert_frame_equal(result, expected) def test_interp_inplace_row(self): # GH 10395 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index e31e29b1b0cb2..48d55b2954360 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -723,7 +723,7 @@ def test_quantile_empty_no_columns(self, interp_method): expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache(self, interp_method, using_copy_on_write): + def test_quantile_item_cache(self, interp_method): # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method df = DataFrame( @@ -735,14 +735,9 @@ def test_quantile_item_cache(self, interp_method, using_copy_on_write): df.quantile(numeric_only=False, interpolation=interpolation, method=method) - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df["A"][0] + assert df.iloc[0, 0] != 99 def test_invalid_method(self): with pytest.raises(ValueError, match="Invalid method: foo"): diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index da6d69f36f900..76d80e87bdeb5 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -173,7 +173,7 @@ def test_reindex_copies(self): result2 = df.reindex(columns=cols, index=df.index, copy=True) assert not np.shares_memory(result2[0]._values, df[0]._values) - def test_reindex_copies_ea(self, using_copy_on_write): + def test_reindex_copies_ea(self): # https://github.com/pandas-dev/pandas/pull/51197 # also ensure to honor copy keyword for ExtensionDtypes N = 10 @@ -184,17 +184,11 @@ def test_reindex_copies_ea(self, using_copy_on_write): np.random.default_rng(2).shuffle(cols) result = df.reindex(columns=cols, copy=True) - if using_copy_on_write: - assert np.shares_memory(result[0].array._data, df[0].array._data) - else: - assert not np.shares_memory(result[0].array._data, df[0].array._data) + assert np.shares_memory(result[0].array._data, df[0].array._data) # pass both columns and index result2 = df.reindex(columns=cols, index=df.index, copy=True) - if using_copy_on_write: - assert np.shares_memory(result2[0].array._data, df[0].array._data) - else: - assert not np.shares_memory(result2[0].array._data, df[0].array._data) + assert np.shares_memory(result2[0].array._data, df[0].array._data) def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated; enforced in 2.0 to cast to object @@ -602,7 +596,7 @@ def test_reindex_sparse(self): ) tm.assert_frame_equal(result, expected) - def test_reindex(self, float_frame, using_copy_on_write): + def test_reindex(self, float_frame): datetime_series = Series( np.arange(30, dtype=np.float64), index=date_range("2020-01-01", periods=30) ) @@ -644,10 +638,7 @@ def test_reindex(self, float_frame, using_copy_on_write): # Same index, copies values but not index if copy=False newFrame = float_frame.reindex(float_frame.index, copy=False) - if using_copy_on_write: - assert newFrame.index.is_(float_frame.index) - else: - assert newFrame.index is float_frame.index + assert newFrame.index.is_(float_frame.index) # length zero newFrame = float_frame.reindex([]) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index b965a5d973fb6..996fc30552bc4 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -164,16 +164,13 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame, using_copy_on_write): + def test_rename_nocopy(self, float_frame): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) renamed.loc[:, "foo"] = 1.0 - if using_copy_on_write: - assert not (float_frame["C"] == 1.0).all() - else: - assert (float_frame["C"] == 1.0).all() + assert not (float_frame["C"] == 1.0).all() def test_rename_inplace(self, float_frame): float_frame.rename(columns={"C": "foo"}) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 8d249bc7b7fa4..8c42498b45621 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -21,7 +21,7 @@ def test_set_axis(self, obj): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) - def test_set_axis_copy(self, obj, using_copy_on_write): + def test_set_axis_copy(self, obj): # Test copy keyword GH#47932 new_index = list("abcd")[: len(obj)] @@ -32,16 +32,6 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0, copy=True) tm.assert_equal(expected, result) assert result is not obj - # check we DID make a copy - if not using_copy_on_write: - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) - result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) assert result is not obj @@ -58,20 +48,11 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj - if using_copy_on_write: - # check we DID NOT make a copy - if obj.ndim == 1: - assert tm.shares_memory(result, obj) - else: - assert any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) - # check we DID make a copy - elif obj.ndim == 1: - assert not tm.shares_memory(result, obj) + # check we DID NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(result, obj) else: - assert not any( + assert any( tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 768c85644c977..c146dcc9c2d71 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -592,7 +592,7 @@ def test_sort_values_nat_na_position_default(self): result = expected.sort_values(["A", "date"]) tm.assert_frame_equal(result, expected) - def test_sort_values_item_cache(self, using_copy_on_write): + def test_sort_values_item_cache(self): # previous behavior incorrect retained an invalid _item_cache entry df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] @@ -603,14 +603,9 @@ def test_sort_values_item_cache(self, using_copy_on_write): df.sort_values(by="A") - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df["A"][0] + assert df.iloc[0, 0] != 99 def test_sort_values_reshaping(self): # GH 39426 diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 19001f10e37e4..0f1f643209db0 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -10,8 +10,7 @@ class TestToDictOfBlocks: - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_no_copy_blocks(self, float_frame, using_copy_on_write): + def test_no_copy_blocks(self, float_frame): # GH#9607 df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -23,15 +22,10 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): _last_df = _df if column in _df: _df.loc[:, column] = _df[column] + 1 + assert _last_df is not None and not _last_df[column].equals(df[column]) - if not using_copy_on_write: - # make sure we did change the original DataFrame - assert _last_df is not None and _last_df[column].equals(df[column]) - else: - assert _last_df is not None and not _last_df[column].equals(df[column]) - -def test_to_dict_of_blocks_item_cache(using_copy_on_write): +def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) @@ -42,15 +36,8 @@ def test_to_dict_of_blocks_item_cache(using_copy_on_write): df._to_dict_of_blocks() - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - ser.values[0] = "foo" - else: - # Check that the to_dict_of_blocks didn't break link between ser and df + with pytest.raises(ValueError, match="read-only"): ser.values[0] = "foo" - assert df.loc[0, "b"] == "foo" - - assert df["b"] is ser def test_set_change_dtype_slice(): diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index d92af2775922b..d38bc06260a0e 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -20,23 +20,16 @@ def test_to_numpy_dtype(self): result = df.to_numpy(dtype="int64") tm.assert_numpy_array_equal(result, expected) - def test_to_numpy_copy(self, using_copy_on_write): + def test_to_numpy_copy(self): arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) - if using_copy_on_write: - assert df.values.base is not arr - assert df.to_numpy(copy=False).base is df.values.base - else: - assert df.values.base is arr - assert df.to_numpy(copy=False).base is arr + assert df.values.base is not arr + assert df.to_numpy(copy=False).base is df.values.base assert df.to_numpy(copy=True).base is not arr # we still don't want a copy when na_value=np.nan is passed, # and that can be respected because we are already numpy-float - if using_copy_on_write: - assert df.to_numpy(copy=False).base is df.values.base - else: - assert df.to_numpy(copy=False, na_value=np.nan).base is arr + assert df.to_numpy(copy=False).base is df.values.base def test_to_numpy_mixed_dtype_to_str(self): # https://github.com/pandas-dev/pandas/issues/35455 diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 45bd8ff0268a8..495663ce135f9 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -124,16 +124,12 @@ def test_transpose_mixed(self): for col, s in mixed_T.items(): assert s.dtype == np.object_ - def test_transpose_get_view(self, float_frame, using_copy_on_write): + def test_transpose_get_view(self, float_frame): dft = float_frame.T dft.iloc[:, 5:10] = 5 + assert (float_frame.values[5:10] != 5).all() - if using_copy_on_write: - assert (float_frame.values[5:10] != 5).all() - else: - assert (float_frame.values[5:10] == 5).all() - - def test_transpose_get_view_dt64tzget_view(self, using_copy_on_write): + def test_transpose_get_view_dt64tzget_view(self): dti = date_range("2016-01-01", periods=6, tz="US/Pacific") arr = dti._data.reshape(3, 2) df = DataFrame(arr) @@ -143,10 +139,7 @@ def test_transpose_get_view_dt64tzget_view(self, using_copy_on_write): assert result._mgr.nblocks == 1 rtrip = result._mgr.blocks[0].values - if using_copy_on_write: - assert np.shares_memory(df._mgr.blocks[0].values._ndarray, rtrip._ndarray) - else: - assert np.shares_memory(arr._ndarray, rtrip._ndarray) + assert np.shares_memory(df._mgr.blocks[0].values._ndarray, rtrip._ndarray) def test_transpose_not_inferring_dt(self): # GH#51546 diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 7ff8508c3b799..788c6220b2477 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -138,7 +138,7 @@ def test_update_datetime_tz(self): expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) - def test_update_datetime_tz_in_place(self, using_copy_on_write): + def test_update_datetime_tz_in_place(self): # https://github.com/pandas-dev/pandas/issues/56227 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) orig = result.copy() @@ -146,12 +146,9 @@ def test_update_datetime_tz_in_place(self, using_copy_on_write): result.update(result + pd.Timedelta(days=1)) expected = DataFrame([pd.Timestamp("2019-01-02", tz="UTC")]) tm.assert_frame_equal(result, expected) - if not using_copy_on_write: - tm.assert_frame_equal(view, expected) - else: - tm.assert_frame_equal(view, orig) + tm.assert_frame_equal(view, orig) - def test_update_with_different_dtype(self, using_copy_on_write): + def test_update_with_different_dtype(self): # GH#3217 df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) df["c"] = np.nan @@ -167,7 +164,7 @@ def test_update_with_different_dtype(self, using_copy_on_write): ) tm.assert_frame_equal(df, expected) - def test_update_modify_view(self, using_copy_on_write, using_infer_string): + def test_update_modify_view(self, using_infer_string): # GH#47188 df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) @@ -176,10 +173,7 @@ def test_update_modify_view(self, using_copy_on_write, using_infer_string): df2.update(df) expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) tm.assert_frame_equal(df2, expected) - if using_copy_on_write or using_infer_string: - tm.assert_frame_equal(result_view, df2_orig) - else: - tm.assert_frame_equal(result_view, expected) + tm.assert_frame_equal(result_view, df2_orig) def test_update_dt_column_with_NaT_create_column(self): # GH#16713 diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index f1230e55f9054..dfece3fc7552b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -13,14 +13,10 @@ class TestDataFrameValues: - def test_values(self, float_frame, using_copy_on_write): - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - float_frame.values[:, 0] = 5.0 - assert (float_frame.values[:, 0] != 5).all() - else: + def test_values(self, float_frame): + with pytest.raises(ValueError, match="read-only"): float_frame.values[:, 0] = 5.0 - assert (float_frame.values[:, 0] == 5).all() + assert (float_frame.values[:, 0] != 5).all() def test_more_values(self, float_string_frame): values = float_string_frame.values @@ -228,34 +224,26 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): class TestPrivateValues: - def test_private_values_dt64tz(self, using_copy_on_write): + def test_private_values_dt64tz(self): dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) df = DataFrame(dta, columns=["A"]) tm.assert_equal(df._values, dta) - if using_copy_on_write: - assert not np.shares_memory(df._values._ndarray, dta._ndarray) - else: - # we have a view - assert np.shares_memory(df._values._ndarray, dta._ndarray) + assert not np.shares_memory(df._values._ndarray, dta._ndarray) # TimedeltaArray tda = dta - dta df2 = df - df tm.assert_equal(df2._values, tda) - def test_private_values_dt64tz_multicol(self, using_copy_on_write): + def test_private_values_dt64tz_multicol(self): dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2) df = DataFrame(dta, columns=["A", "B"]) tm.assert_equal(df._values, dta) - if using_copy_on_write: - assert not np.shares_memory(df._values._ndarray, dta._ndarray) - else: - # we have a view - assert np.shares_memory(df._values._ndarray, dta._ndarray) + assert not np.shares_memory(df._values._ndarray, dta._ndarray) # TimedeltaArray tda = dta - dta diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 0112e0093c102..b849baa8cab62 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -325,7 +325,6 @@ def test_set_flags( self, allows_duplicate_labels, frame_or_series, - using_copy_on_write, ): obj = DataFrame({"A": [1, 2]}) key = (0, 0) @@ -354,12 +353,7 @@ def test_set_flags( assert np.may_share_memory(obj["A"].values, result["A"].values) result.iloc[key] = 0 - if using_copy_on_write: - assert obj.iloc[key] == 1 - else: - assert obj.iloc[key] == 0 - # set back to 1 for test below - result.iloc[key] = 1 + assert obj.iloc[key] == 1 # Now we do copy. result = obj.set_flags( diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 4fb0bbafc6879..fc40fd5329118 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2006,7 +2006,7 @@ def test_arith_list_of_arraylike_raise(to_add): to_add + df -def test_inplace_arithmetic_series_update(using_copy_on_write): +def test_inplace_arithmetic_series_update(): # https://github.com/pandas-dev/pandas/issues/36373 df = DataFrame({"A": [1, 2, 3]}) df_orig = df.copy() @@ -2014,14 +2014,8 @@ def test_inplace_arithmetic_series_update(using_copy_on_write): vals = series._values series += 1 - if using_copy_on_write: - assert series._values is not vals - tm.assert_frame_equal(df, df_orig) - else: - assert series._values is vals - - expected = DataFrame({"A": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) + assert series._values is not vals + tm.assert_frame_equal(df, df_orig) def test_arithmetic_multiindex_align(): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 36013e1ac949f..78365ad4a0004 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -81,25 +81,10 @@ def test_consolidate_inplace(self, float_frame): for letter in range(ord("A"), ord("Z")): float_frame[chr(letter)] = chr(letter) - def test_modify_values(self, float_frame, using_copy_on_write): - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - float_frame.values[5] = 5 - assert (float_frame.values[5] != 5).all() - return - - float_frame.values[5] = 5 - assert (float_frame.values[5] == 5).all() - - # unconsolidated - float_frame["E"] = 7.0 - col = float_frame["E"] - float_frame.values[6] = 6 - # as of 2.0 .values does not consolidate, so subsequent calls to .values - # does not share data - assert not (float_frame.values[6] == 6).all() - - assert (col == 7).all() + def test_modify_values(self, float_frame): + with pytest.raises(ValueError, match="read-only"): + float_frame.values[5] = 5 + assert (float_frame.values[5] != 5).all() def test_boolean_set_uncons(self, float_frame): float_frame["E"] = 7.0 @@ -332,7 +317,7 @@ def test_is_mixed_type(self, float_frame, float_string_frame): assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type - def test_stale_cached_series_bug_473(self, using_copy_on_write): + def test_stale_cached_series_bug_473(self): # this is chained, but ok with option_context("chained_assignment", None): Y = DataFrame( @@ -347,13 +332,9 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): repr(Y) Y.sum() Y["g"].sum() - if using_copy_on_write: - assert not pd.isna(Y["g"]["c"]) - else: - assert pd.isna(Y["g"]["c"]) + assert not pd.isna(Y["g"]["c"]) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_strange_column_corruption_issue(self, using_copy_on_write): + def test_strange_column_corruption_issue(self): # TODO(wesm): Unclear how exactly this is related to internal matters df = DataFrame(index=[0, 1]) df[0] = np.nan @@ -367,10 +348,7 @@ def test_strange_column_corruption_issue(self, using_copy_on_write): if col not in wasCol: wasCol[col] = 1 df[col] = np.nan - if using_copy_on_write: - df.loc[dt, col] = i - else: - df[col][dt] = i + df.loc[dt, col] = i myid = 100 @@ -408,25 +386,17 @@ def test_add_column_with_pandas_array(self): tm.assert_frame_equal(df, df2) -def test_update_inplace_sets_valid_block_values(using_copy_on_write): +def test_update_inplace_sets_valid_block_values(): # https://github.com/pandas-dev/pandas/issues/33457 df = DataFrame({"a": Series([1, 2, None], dtype="category")}) # inplace update of a single column - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].fillna(1, inplace=True) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].fillna(1, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].fillna(1, inplace=True) # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) - if not using_copy_on_write: - # smoketest for OP bug from GH#35731 - assert df.isnull().sum().sum() == 0 - def test_nonconsolidated_item_cache_take(): # https://github.com/pandas-dev/pandas/issues/35521 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 20f147e94c514..2bbb20c842dba 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -287,25 +287,16 @@ def test_constructor_dtype_copy(self): new_df["col1"] = 200.0 assert orig_df["col1"][0] == 1.0 - def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): + def test_constructor_dtype_nocast_view_dataframe(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) - if using_copy_on_write: - should_be_view.iloc[0, 0] = 99 - assert df.values[0, 0] == 1 - else: - should_be_view.iloc[0, 0] = 99 - assert df.values[0, 0] == 99 + should_be_view.iloc[0, 0] = 99 + assert df.values[0, 0] == 1 - def test_constructor_dtype_nocast_view_2d_array(self, using_copy_on_write): + def test_constructor_dtype_nocast_view_2d_array(self): df = DataFrame([[1, 2], [3, 4]], dtype="int64") - if not using_copy_on_write: - should_be_view = DataFrame(df.values, dtype=df[0].dtype) - should_be_view.iloc[0, 0] = 97 - assert df.values[0, 0] == 97 - else: - df2 = DataFrame(df.values, dtype=df[0].dtype) - assert df2._mgr.arrays[0].flags.c_contiguous + df2 = DataFrame(df.values, dtype=df[0].dtype) + assert df2._mgr.arrays[0].flags.c_contiguous @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies") def test_1d_object_array_does_not_copy(self): @@ -2127,16 +2118,12 @@ def test_constructor_frame_shallow_copy(self, float_frame): cop.index = np.arange(len(cop)) tm.assert_frame_equal(float_frame, orig) - def test_constructor_ndarray_copy(self, float_frame, using_copy_on_write): + def test_constructor_ndarray_copy(self, float_frame): arr = float_frame.values.copy() df = DataFrame(arr) arr[5] = 5 - if using_copy_on_write: - assert not (df.values[5] == 5).all() - else: - assert (df.values[5] == 5).all() - + assert not (df.values[5] == 5).all() df = DataFrame(arr, copy=True) arr[6] = 6 assert not (df.values[6] == 6).all() @@ -2473,7 +2460,6 @@ def test_dict_nocopy( copy, any_numeric_ea_dtype, any_numpy_dtype, - using_copy_on_write, ): a = np.array([1, 2], dtype=any_numpy_dtype) b = np.array([3, 4], dtype=any_numpy_dtype) @@ -2541,9 +2527,6 @@ def check_views(c_only: bool = False): # view, so we have to check in the other direction df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) assert df.dtypes.iloc[2] == c.dtype - if not copy and not using_copy_on_write: - check_views(True) - if copy: if a.dtype.kind == "M": assert a[0] == a.dtype.type(1, "ns") @@ -2553,12 +2536,6 @@ def check_views(c_only: bool = False): assert b[0] == b.dtype.type(3) # FIXME(GH#35417): enable after GH#35417 assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c - elif not using_copy_on_write: - # TODO: we can call check_views if we stop consolidating - # in setitem_with_indexer - assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c - # TODO: we can check b[0] == 0 if we stop consolidating in - # setitem_with_indexer (except for datetimelike?) def test_construct_from_dict_ea_series(self): # GH#53744 - default of copy=True should also apply for Series with From c0d235fce67d0ece8da269332b0147dc062b091a Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Tue, 6 Feb 2024 01:24:50 +0800 Subject: [PATCH 3/7] DOC: fix PR02 errors in docstrings of Index subclasses (#57261) * DOC: fix PR02 errors in docstrings of Index subclasses * mypy --- ci/code_checks.sh | 20 -------------------- pandas/core/indexes/extension.py | 2 ++ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cebc8e976425b..5b65182d1c253 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -71,26 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (PR02)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \ - pandas.CategoricalIndex.rename_categories\ - pandas.CategoricalIndex.reorder_categories\ - pandas.CategoricalIndex.add_categories\ - pandas.CategoricalIndex.remove_categories\ - pandas.CategoricalIndex.set_categories\ - pandas.IntervalIndex.set_closed\ - pandas.IntervalIndex.contains\ - pandas.IntervalIndex.overlaps\ - pandas.IntervalIndex.to_tuples\ - pandas.DatetimeIndex.round\ - pandas.DatetimeIndex.floor\ - pandas.DatetimeIndex.ceil\ - pandas.DatetimeIndex.month_name\ - pandas.DatetimeIndex.day_name\ - pandas.DatetimeIndex.to_period\ - pandas.DatetimeIndex.std\ - pandas.TimedeltaIndex.round\ - pandas.TimedeltaIndex.floor\ - pandas.TimedeltaIndex.ceil\ - pandas.PeriodIndex.strftime\ pandas.Series.dt.to_period\ pandas.Series.dt.tz_localize\ pandas.Series.dt.tz_convert\ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 61949531f37df..d6fbeb9043bc6 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +from inspect import signature from typing import ( TYPE_CHECKING, Callable, @@ -104,6 +105,7 @@ def method(self, *args, **kwargs): # type: ignore[misc] # error: "property" has no attribute "__name__" method.__name__ = name # type: ignore[attr-defined] method.__doc__ = attr.__doc__ + method.__signature__ = signature(attr) # type: ignore[attr-defined] return method From 3c535422cc41198fb55304ccfd6672940bb1a5f4 Mon Sep 17 00:00:00 2001 From: jrmylow <33999325+jrmylow@users.noreply.github.com> Date: Tue, 6 Feb 2024 01:29:51 +0800 Subject: [PATCH 4/7] DOC: Updated docstring for set_option (#57235) * Updated docstring and exceptions raised * updated code_checks.sh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * testing fix for doc build --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- ci/code_checks.sh | 1 - pandas/_config/config.py | 25 ++++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 5b65182d1c253..f8f63de2c3cda 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -136,7 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.Milli\ pandas.tseries.offsets.Micro\ pandas.tseries.offsets.Nano\ - pandas.set_option\ pandas.Timestamp.max\ pandas.Timestamp.min\ pandas.Timestamp.resolution\ diff --git a/pandas/_config/config.py b/pandas/_config/config.py index bc9d289ddbaed..7612739531695 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -326,9 +326,9 @@ def __doc__(self) -> str: # type: ignore[override] """ _set_option_tmpl = """ -set_option(pat, value) +set_option(*args, **kwargs) -Sets the value of the specified option. +Sets the value of the specified option or options. Available options: @@ -336,13 +336,18 @@ def __doc__(self) -> str: # type: ignore[override] Parameters ---------- -pat : str - Regexp which should match a single option. - Note: partial matches are supported for convenience, but unless you use the - full option name (e.g. x.y.z.option_name), your code may break in future - versions if new options with similar names are introduced. -value : object - New value of option. +*args : str | object + Arguments provided in pairs, which will be interpreted as (pattern, value) + pairs. + pattern: str + Regexp which should match a single option + value: object + New value of option + Note: partial pattern matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break in + future versions if new options with similar names are introduced. +**kwargs : str + Keyword arguments are not currently supported. Returns ------- @@ -350,6 +355,8 @@ def __doc__(self) -> str: # type: ignore[override] Raises ------ +ValueError if odd numbers of non-keyword arguments are provided +TypeError if keyword arguments are provided OptionError if no such option exists Notes From 8a8c408364c0cd82b6a4b7ee4770f2e383cdd57f Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 6 Feb 2024 00:31:48 +0700 Subject: [PATCH 5/7] Use ruff to detect banned import (#57184) * Use ruff to detect banned import * Combine rules --- .pre-commit-config.yaml | 7 --- pandas/io/common.py | 2 +- pyproject.toml | 3 + scripts/tests/test_use_io_common_urlopen.py | 23 ------- scripts/use_io_common_urlopen.py | 67 --------------------- 5 files changed, 4 insertions(+), 98 deletions(-) delete mode 100644 scripts/tests/test_use_io_common_urlopen.py delete mode 100644 scripts/use_io_common_urlopen.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccd01abc4affe..b7e43404b86bd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -299,13 +299,6 @@ repos: files: ^pandas/core/ exclude: ^pandas/core/api\.py$ types: [python] - - id: use-io-common-urlopen - name: Use pandas.io.common.urlopen instead of urllib.request.urlopen - language: python - entry: python scripts/use_io_common_urlopen.py - files: ^pandas/ - exclude: ^pandas/tests/ - types: [python] - id: no-bool-in-core-generic name: Use bool_t instead of bool in pandas/core/generic.py entry: python scripts/no_bool_in_generic.py diff --git a/pandas/io/common.py b/pandas/io/common.py index 16d7cb76f9ce9..682780a409a8b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -286,7 +286,7 @@ def urlopen(*args, **kwargs): """ import urllib.request - return urllib.request.urlopen(*args, **kwargs) + return urllib.request.urlopen(*args, **kwargs) # noqa: TID251 def is_fsspec_url(url: FilePath | BaseBuffer) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 934f66136f601..7614ceecbd8ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -330,6 +330,9 @@ exclude = [ "env", ] +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"urllib.request.urlopen".msg = "Use pandas.io.common.urlopen instead of urllib.request.urlopen" + [tool.ruff.per-file-ignores] # relative imports allowed for asv_bench "asv_bench/*" = ["TID", "NPY002"] diff --git a/scripts/tests/test_use_io_common_urlopen.py b/scripts/tests/test_use_io_common_urlopen.py deleted file mode 100644 index c2c4a7fe9cb58..0000000000000 --- a/scripts/tests/test_use_io_common_urlopen.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest - -from scripts.use_io_common_urlopen import use_io_common_urlopen - -PATH = "t.py" - - -def test_inconsistent_usage(capsys) -> None: - content = "from urllib.request import urlopen" - result_msg = ( - "t.py:1:0: Don't use urllib.request.urlopen, " - "use pandas.io.common.urlopen instead\n" - ) - with pytest.raises(SystemExit, match=None): - use_io_common_urlopen(content, PATH) - expected_msg, _ = capsys.readouterr() - assert result_msg == expected_msg - - -def test_consistent_usage() -> None: - # should not raise - content = "from pandas.io.common import urlopen" - use_io_common_urlopen(content, PATH) diff --git a/scripts/use_io_common_urlopen.py b/scripts/use_io_common_urlopen.py deleted file mode 100644 index ade97f53cd827..0000000000000 --- a/scripts/use_io_common_urlopen.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Check that pandas/core imports pandas.array as pd_array. - -This makes it easier to grep for usage of pandas array. - -This is meant to be run as a pre-commit hook - to run it manually, you can do: - - pre-commit run use-io-common-urlopen --all-files - -""" - -from __future__ import annotations - -import argparse -import ast -import sys -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Sequence - - -ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: " - "Don't use urllib.request.urlopen, use pandas.io.common.urlopen instead\n" -) - - -class Visitor(ast.NodeVisitor): - def __init__(self, path: str) -> None: - self.path = path - - def visit_ImportFrom(self, node: ast.ImportFrom) -> None: - # Check that pandas.io.common.urlopen is used instead of - # urllib.request.urlopen - if ( - node.module is not None - and node.module.startswith("urllib.request") - and any(i.name == "urlopen" for i in node.names) - ): - msg = ERROR_MESSAGE.format( - path=self.path, lineno=node.lineno, col_offset=node.col_offset - ) - sys.stdout.write(msg) - sys.exit(1) - super().generic_visit(node) - - -def use_io_common_urlopen(content: str, path: str) -> None: - tree = ast.parse(content) - visitor = Visitor(path) - visitor.visit(tree) - - -def main(argv: Sequence[str] | None = None) -> None: - parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*") - args = parser.parse_args(argv) - - for path in args.paths: - with open(path, encoding="utf-8") as fd: - content = fd.read() - use_io_common_urlopen(content, path) - - -if __name__ == "__main__": - main() From 9d50b3cc69da1dd9f6bf1fd4764aaeffc52c503a Mon Sep 17 00:00:00 2001 From: koushik-rout-samsung <146946876+koushik-rout-samsung@users.noreply.github.com> Date: Mon, 5 Feb 2024 23:02:53 +0530 Subject: [PATCH 6/7] WEB: Using Bootstrap icon instead of font awesome icons (#57226) * Using bootstrap icon in place of font awesome * bootstrap icon added * class update * class update --- web/pandas/_templates/layout.html | 12 ++++++------ web/pandas/index.html | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html index c8025aeef3791..aa4bfc92ce8a8 100644 --- a/web/pandas/_templates/layout.html +++ b/web/pandas/_templates/layout.html @@ -14,7 +14,7 @@ {% endfor %} - +
@@ -64,27 +64,27 @@ diff --git a/web/pandas/index.html b/web/pandas/index.html index 3d2e5363a2a2d..a9f5c35458bc8 100644 --- a/web/pandas/index.html +++ b/web/pandas/index.html @@ -74,17 +74,17 @@

Follow us

From 77b4824095735ee156a551427a56e2b6a85de861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <6618166+twoertwein@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:14:29 -0500 Subject: [PATCH 7/7] TYP: misc Index return types (#57256) * TYP: misc Index return types * add IndexT to ignore list --- pandas/_typing.py | 1 + pandas/core/generic.py | 2 +- pandas/core/indexes/accessors.py | 5 +++-- pandas/core/indexes/base.py | 7 ++++--- pandas/core/indexes/datetimelike.py | 4 ++-- pandas/core/indexes/datetimes.py | 6 ++++-- pandas/core/indexes/multi.py | 3 ++- pandas/core/indexes/range.py | 2 +- pandas/core/indexes/timedeltas.py | 6 +++++- pyproject.toml | 2 +- 10 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 1fec41463904c..8646b7425894d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -189,6 +189,7 @@ def __reversed__(self) -> Iterator[_T_co]: # passed in, a DataFrame is always returned. NDFrameT = TypeVar("NDFrameT", bound="NDFrame") +IndexT = TypeVar("IndexT", bound="Index") NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index") AxisInt = int diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61fb757fafec4..bbe499aad695f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10740,7 +10740,7 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self: f"does not match PeriodIndex freq " f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}" ) - new_ax = index.shift(periods) + new_ax: Index = index.shift(periods) else: new_ax = index.shift(periods, freq) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 1a24ae8530c12..a91fb0a8d718d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, + NoReturn, cast, ) import warnings @@ -108,7 +109,7 @@ def _delegate_property_get(self, name: str): # return the result as a Series return Series(result, index=index, name=self.name).__finalize__(self._parent) - def _delegate_property_set(self, name: str, value, *args, **kwargs): + def _delegate_property_set(self, name: str, value, *args, **kwargs) -> NoReturn: raise ValueError( "modifications to a property of a datetimelike object are not supported. " "Change values on the original." @@ -483,7 +484,7 @@ def to_pytimedelta(self) -> np.ndarray: return self._get_values().to_pytimedelta() @property - def components(self): + def components(self) -> DataFrame: """ Return a Dataframe of the components of the Timedeltas. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87135ce9e0dd0..42613ca4c6573 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -55,6 +55,7 @@ F, IgnoreRaise, IndexLabel, + IndexT, JoinHow, Level, NaPosition, @@ -2027,7 +2028,7 @@ def sortlevel( ascending: bool | list[bool] = True, sort_remaining=None, na_position: NaPosition = "first", - ): + ) -> tuple[Self, np.ndarray]: """ For internal compatibility with the Index API. @@ -4432,7 +4433,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool): target = self._maybe_preserve_names(target, preserve_names) return target - def _maybe_preserve_names(self, target: Index, preserve_names: bool): + def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT: if preserve_names and target.nlevels == 1 and target.name != self.name: target = target.copy(deep=False) target.name = self.name @@ -5987,7 +5988,7 @@ def sort(self, *args, **kwargs): """ raise TypeError("cannot sort an Index object in-place, use sort_values instead") - def shift(self, periods: int = 1, freq=None): + def shift(self, periods: int = 1, freq=None) -> Self: """ Shift index by desired number of time frequency increments. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ae13edab3a35a..a5670536c74f7 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -273,7 +273,7 @@ def _can_partial_date_slice(self, reso: Resolution) -> bool: def _parsed_string_to_bounds(self, reso: Resolution, parsed): raise NotImplementedError - def _parse_with_reso(self, label: str): + def _parse_with_reso(self, label: str) -> tuple[datetime, Resolution]: # overridden by TimedeltaIndex try: if self.freq is None or hasattr(self.freq, "rule_code"): @@ -295,7 +295,7 @@ def _parse_with_reso(self, label: str): reso = Resolution.from_attrname(reso_str) return parsed, reso - def _get_string_slice(self, key: str): + def _get_string_slice(self, key: str) -> slice | npt.NDArray[np.intp]: # overridden by TimedeltaIndex parsed, reso = self._parse_with_reso(key) try: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c978abd8c2427..3cf3352e64f27 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -518,7 +518,9 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex: # -------------------------------------------------------------------- # Indexing Methods - def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): + def _parsed_string_to_bounds( + self, reso: Resolution, parsed: dt.datetime + ) -> tuple[Timestamp, Timestamp]: """ Calculate datetime bounds for parsed time string and its resolution. @@ -555,7 +557,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): # which localizes parsed. return start, end - def _parse_with_reso(self, label: str): + def _parse_with_reso(self, label: str) -> tuple[Timestamp, Resolution]: parsed, reso = super()._parse_with_reso(label) parsed = Timestamp(parsed) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0495f23508c09..f4bf4f3b2f275 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -37,6 +37,7 @@ F, IgnoreRaise, IndexLabel, + IndexT, Scalar, Self, Shape, @@ -2727,7 +2728,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool): target = self._maybe_preserve_names(target, preserve_names) return target - def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: + def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT: if ( preserve_names and target.nlevels == self.nlevels diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 16b203931c073..2edf6057442b6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -876,7 +876,7 @@ def _difference(self, other, sort=None): def symmetric_difference( self, other, result_name: Hashable | None = None, sort=None - ): + ) -> Index: if not isinstance(other, RangeIndex) or sort is not None: return super().symmetric_difference(other, result_name, sort) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 08a265ba47648..db813b047b2bb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -32,6 +32,7 @@ from pandas.core.indexes.extension import inherit_names if TYPE_CHECKING: + from pandas._libs import NaTType from pandas._typing import DtypeObj @@ -245,7 +246,10 @@ def get_loc(self, key): return Index.get_loc(self, key) - def _parse_with_reso(self, label: str): + # error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso" + # incompatible with return type "tuple[datetime, Resolution]" in supertype + # "DatetimeIndexOpsMixin" + def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # type: ignore[override] # the "with_reso" is a no-op for TimedeltaIndex parsed = Timedelta(label) return parsed, None diff --git a/pyproject.toml b/pyproject.toml index 7614ceecbd8ca..a7cb87bbca4b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -761,5 +761,5 @@ exclude_lines = [ directory = "coverage_html_report" [tool.codespell] -ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs" +ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs, indext" ignore-regex = 'https://([\w/\.])+'