diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ed93503388893..9dbbe3f9e2b77 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -295,8 +295,10 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) - :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) + .. _whatsnew_0230.api_breaking: Backwards incompatible API changes diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c65943fbbb201..9056f78ee02ed 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -162,7 +162,7 @@ def apply_empty_result(self): pass if reduce: - return Series(np.nan, index=self.agg_axis) + return self.obj._constructor_sliced(np.nan, index=self.agg_axis) else: return self.obj.copy() @@ -175,11 +175,13 @@ def apply_raw(self): result = np.apply_along_axis(self.f, self.axis, self.values) # TODO: mixed type case - from pandas import DataFrame, Series if result.ndim == 2: - return DataFrame(result, index=self.index, columns=self.columns) + return self.obj._constructor(result, + index=self.index, + columns=self.columns) else: - return Series(result, index=self.agg_axis) + return self.obj._constructor_sliced(result, + index=self.agg_axis) def apply_broadcast(self, target): result_values = np.empty_like(target.values) @@ -232,7 +234,7 @@ def apply_standard(self): axis=self.axis, dummy=dummy, labels=labels) - return Series(result, index=labels) + return self.obj._constructor_sliced(result, index=labels) except Exception: pass @@ -291,8 +293,7 @@ def wrap_results(self): return self.wrap_results_for_axis() # dict of scalars - from pandas import Series - result = Series(results) + result = self.obj._constructor_sliced(results) result.index = self.res_index return result @@ -379,7 +380,6 @@ def wrap_results_for_axis(self): # we have a non-series and don't want inference elif not isinstance(results[0], ABCSeries): from pandas import Series - result = Series(results) result.index = self.res_index diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index c52b512c2930a..caaa311e9ee96 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -514,3 +514,59 @@ def test_subclassed_wide_to_long(self): long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def strech(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 5.5], + ['Mary', 'Bo', 'height', 6.0], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 6.0], + ['Mary', 'Bo', 'height', 6.5], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + result = df.apply(lambda x: strech(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected)