From 57b44187ee5e3846fe026e83f9fbe1e18b83f4c1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 10:36:34 +0100 Subject: [PATCH 1/9] BUG: remove usage of _constructor._get_axis_number (fix when _constructor properties return callables) --- pandas/_testing/__init__.py | 8 ++++---- pandas/core/reshape/concat.py | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index f0b7498225a76..e60700b31c433 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -803,11 +803,11 @@ class SubclassedSeries(Series): @property def _constructor(self): - return SubclassedSeries + return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) @property def _constructor_expanddim(self): - return SubclassedDataFrame + return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) class SubclassedDataFrame(DataFrame): @@ -815,11 +815,11 @@ class SubclassedDataFrame(DataFrame): @property def _constructor(self): - return SubclassedDataFrame + return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) @property def _constructor_sliced(self): - return SubclassedSeries + return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) class SubclassedCategorical(Categorical): diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 71b53d50273e0..67644f9c1bae2 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -467,7 +467,9 @@ def __init__( # Standardize axis parameter to int if isinstance(sample, ABCSeries): - axis = sample._constructor_expanddim._get_axis_number(axis) + from pandas import DataFrame + + axis = DataFrame._get_axis_number(axis) else: axis = sample._get_axis_number(axis) From babbeca331f88045b1452995190ce378b94bd87a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 10:50:41 +0100 Subject: [PATCH 2/9] fix typing --- pandas/core/groupby/groupby.py | 2 +- pandas/core/reshape/concat.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dbff541e9568b..50e1482c4474e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1733,7 +1733,7 @@ def _cumcount_array(self, ascending: bool = True) -> np.ndarray: @final @property - def _obj_1d_constructor(self) -> type[Series]: + def _obj_1d_constructor(self) -> Callable: # GH28330 preserve subclassed Series/DataFrames if isinstance(self.obj, DataFrame): return self.obj._constructor_sliced diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 67644f9c1bae2..cfe44808974a8 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,6 +6,7 @@ from collections import abc from typing import ( TYPE_CHECKING, + Callable, Hashable, Iterable, Literal, @@ -541,7 +542,7 @@ def __init__( self.new_axes = self._get_new_axes() def get_result(self): - cons: type[DataFrame | Series] + cons: Callable sample: DataFrame | Series # series only From 800e3213ec4b80b75e7048a6536537ba931e50d3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 11:05:27 +0100 Subject: [PATCH 3/9] also fix _constructor_sliced._replace_single in replace() --- pandas/core/generic.py | 4 +++- pandas/tests/frame/test_subclass.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bd5a114769cfb..809fdd4ef438f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6608,8 +6608,10 @@ def replace( if isinstance(to_replace, (tuple, list)): if isinstance(self, ABCDataFrame): + from pandas import Series + result = self.apply( - self._constructor_sliced._replace_single, + Series._replace_single, args=(to_replace, method, inplace, limit), ) if inplace: diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 0d6cf39f801db..9ec4179bf83fd 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -737,3 +737,11 @@ def test_equals_subclass(self): df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]}) assert df1.equals(df2) assert df2.equals(df1) + + def test_replace_list_method(self): + # https://github.com/pandas-dev/pandas/pull/46018 + df = tm.SubclassedDataFrame({"A": [0, 1, 2]}) + result = df.replace([1, 2], method="ffill") + expected = tm.SubclassedDataFrame({"A": [0, 0, 0]}) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) From 57ab39c0ab5776fc4ae3ce27653366e1214d348e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 14:59:16 +0100 Subject: [PATCH 4/9] fix groupby subclass check + fix test --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_groupby_subclass.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 50e1482c4474e..22e5557028af1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2158,7 +2158,7 @@ def size(self) -> DataFrame | Series: ) # GH28330 preserve subclassed Series/DataFrames through calls - if issubclass(self.obj._constructor, Series): + if isinstance(self.obj, Series): result = self._obj_1d_constructor(result, name=self.obj.name) else: result = self._obj_1d_constructor(result) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 3f83bc06e6c38..64b4daf49ac09 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -46,7 +46,7 @@ def test_groupby_preserves_subclass(obj, groupby_func): # Reduction or transformation kernels should preserve type slices = {"ngroup", "cumcount", "size"} if isinstance(obj, DataFrame) and groupby_func in slices: - assert isinstance(result1, obj._constructor_sliced) + assert isinstance(result1, tm.SubclassedSeries) else: assert isinstance(result1, type(obj)) From 01be021a0172da7fdd69b171ea6c4839c50f10be Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 15:33:30 +0100 Subject: [PATCH 5/9] adds whatsnew --- doc/source/whatsnew/v1.5.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 71394a858aefe..3ebe95b9828a1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -399,6 +399,9 @@ Styler Other ^^^^^ +- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) + + .. ***DO NOT USE THIS SECTION*** - From 165b7623413895856eb0ef853c9aa3e407f5645e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 16 Feb 2022 22:54:14 +0100 Subject: [PATCH 6/9] try fixing type annotations --- pandas/core/frame.py | 4 ++-- pandas/core/generic.py | 32 ++++++-------------------------- pandas/core/reshape/concat.py | 2 +- pandas/core/series.py | 4 ++-- 4 files changed, 11 insertions(+), 31 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 082a5814c2fc7..3c5de7c2b97d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -581,10 +581,10 @@ class DataFrame(NDFrame, OpsMixin): _mgr: BlockManager | ArrayManager @property - def _constructor(self) -> type[DataFrame]: + def _constructor(self) -> Callable[..., DataFrame]: return DataFrame - _constructor_sliced: type[Series] = Series + _constructor_sliced: Callable[..., Series] = Series # ---------------------------------------------------------------------- # Constructors diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b005f9850c074..6476cdd623207 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -443,7 +443,7 @@ def _validate_dtype(cls, dtype) -> DtypeObj | None: # Construction @property - def _constructor(self: NDFrameT) -> type[NDFrameT]: + def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]: """ Used when a manipulation result has the same dimensions as the original. @@ -778,17 +778,9 @@ def swapaxes(self: NDFrameT, axis1, axis2, copy=True) -> NDFrameT: if copy: new_values = new_values.copy() - # ignore needed because of NDFrame constructor is different than - # DataFrame/Series constructors. return self._constructor( - # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected - # "Union[ArrayManager, BlockManager]" - # error: Argument 2 to "NDFrame" has incompatible type "*Generator[Index, - # None, None]"; expected "bool" [arg-type] - # error: Argument 2 to "NDFrame" has incompatible type "*Generator[Index, - # None, None]"; expected "Optional[Mapping[Hashable, Any]]" - new_values, # type: ignore[arg-type] - *new_axes, # type: ignore[arg-type] + new_values, + *new_axes, ).__finalize__(self, method="swapaxes") @final @@ -2087,11 +2079,7 @@ def __array_wrap__( # ptp also requires the item_from_zerodim return res d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) - # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; - # expected "BlockManager" - return self._constructor(res, **d).__finalize__( # type: ignore[arg-type] - self, method="__array_wrap__" - ) + return self._constructor(res, **d).__finalize__(self, method="__array_wrap__") @final def __array_ufunc__( @@ -5922,11 +5910,7 @@ def astype( # GH 19920: retain column metadata after concat result = concat(results, axis=1, copy=False) # GH#40810 retain subclass - # Incompatible types in assignment (expression has type "NDFrameT", - # variable has type "DataFrame") - # Argument 1 to "NDFrame" has incompatible type "DataFrame"; expected - # "Union[ArrayManager, SingleArrayManager, BlockManager, SingleBlockManager]" - result = self._constructor(result) # type: ignore[arg-type,assignment] + result = self._constructor(result) # type: ignore[assignment] result.columns = self.columns result = result.__finalize__(self, method="astype") # https://github.com/python/mypy/issues/8354 @@ -9140,11 +9124,7 @@ def _where( # we are the same shape, so create an actual object for alignment else: - # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; - # expected "BlockManager" - other = self._constructor( - other, **self._construct_axes_dict() # type: ignore[arg-type] - ) + other = self._constructor(other, **self._construct_axes_dict()) if axis is None: axis = 0 diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index cfe44808974a8..278977b0018b2 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -542,7 +542,7 @@ def __init__( self.new_axes = self._get_new_axes() def get_result(self): - cons: Callable + cons: Callable[..., DataFrame | Series] sample: DataFrame | Series # series only diff --git a/pandas/core/series.py b/pandas/core/series.py index efbf8ecf74f1f..a43139fadf484 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -526,11 +526,11 @@ def _init_dict( # ---------------------------------------------------------------------- @property - def _constructor(self) -> type[Series]: + def _constructor(self) -> Callable[..., Series]: return Series @property - def _constructor_expanddim(self) -> type[DataFrame]: + def _constructor_expanddim(self) -> Callable[..., DataFrame]: """ Used when a manipulation result has one higher dimension as the original, such as Series.to_frame() From af63c87a855a622cee7daab14c10a7030b1ee724 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 22 Feb 2022 14:51:37 +0100 Subject: [PATCH 7/9] move whatsnew to 1.4.2 --- doc/source/whatsnew/v1.4.2.rst | 2 +- doc/source/whatsnew/v1.5.0.rst | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 68999b9bba50f..a88e487f8ce18 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d990ef7b6690a..e3f772ac026ab 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -404,9 +404,6 @@ Styler Other ^^^^^ -- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) - - .. ***DO NOT USE THIS SECTION*** - From 5b924fcf5131983eff5ea59bbb5078b2badeeefe Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 24 Feb 2022 09:34:44 +0100 Subject: [PATCH 8/9] add mypy comment --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6476cdd623207..22897a08c9a45 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5910,6 +5910,8 @@ def astype( # GH 19920: retain column metadata after concat result = concat(results, axis=1, copy=False) # GH#40810 retain subclass + # error: Incompatible types in assignment + # (expression has type "NDFrameT", variable has type "DataFrame") result = self._constructor(result) # type: ignore[assignment] result.columns = self.columns result = result.__finalize__(self, method="astype") From 43b29405c878b9e1ca1e33279c19bf5b0900294b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 24 Feb 2022 09:38:03 +0100 Subject: [PATCH 9/9] add comment --- pandas/_testing/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e60700b31c433..1b236cdc99bed 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -803,6 +803,11 @@ class SubclassedSeries(Series): @property def _constructor(self): + # For testing, those properties return a generic callable, and not + # the actual class. In this case that is equivalent, but it is to + # ensure we don't rely on the property returning a class + # See https://github.com/pandas-dev/pandas/pull/46018 and + # https://github.com/pandas-dev/pandas/issues/32638 and linked issues return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) @property