From a22652791876e62336d2939cb9ae339398384493 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 20 Sep 2023 00:25:36 +0200 Subject: [PATCH] Revert "DEPR: Deprecate returning a DataFrame in SeriesApply.apply_standard (#55189) * Revert "DEPR: Deprecate returning a DataFrame in SeriesApply.apply_standard (#52123)" This reverts commit fe415f55 * Fix tests * Add whatsnew * Add whatsnew * Add test --- doc/source/user_guide/cookbook.rst | 10 +++---- doc/source/user_guide/groupby.rst | 13 +++++++++ doc/source/whatsnew/v0.10.0.rst | 29 +++++++------------- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.1.1.rst | 2 +- pandas/core/apply.py | 8 ------ pandas/core/series.py | 5 ---- pandas/tests/apply/test_series_apply.py | 36 ++++++++++++------------- 8 files changed, 45 insertions(+), 59 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 002e88533ab938..2d2c0a4db4df60 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -794,12 +794,12 @@ Apply index=["I", "II", "III"], ) - def make_df(ser): - new_vals = [pd.Series(value, name=name) for name, value in ser.items()] - return pd.DataFrame(new_vals) - - df_orgz = pd.concat({ind: row.pipe(make_df) for ind, row in df.iterrows()}) + def SeriesFromSubList(aList): + return pd.Series(aList) + df_orgz = pd.concat( + {ind: row.apply(SeriesFromSubList) for ind, row in df.iterrows()} + ) df_orgz `Rolling apply with a DataFrame returning a Series diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 5dd14e243fbb32..4be62090ec6457 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1213,6 +1213,19 @@ The dimension of the returned result can also change: grouped.apply(f) +``apply`` on a Series can operate on a returned value from the applied function +that is itself a series, and possibly upcast the result to a DataFrame: + +.. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + Similar to :ref:`groupby.aggregate.agg`, the resulting dtype will reflect that of the apply function. If the results from different groups have different dtypes, then a common dtype will be determined in the same way as ``DataFrame`` construction. diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst index 422efc1b369460..be50c34d7d14cc 100644 --- a/doc/source/whatsnew/v0.10.0.rst +++ b/doc/source/whatsnew/v0.10.0.rst @@ -261,26 +261,15 @@ Convenience methods ``ffill`` and ``bfill`` have been added: function, that is itself a series, and possibly upcast the result to a DataFrame - .. code-block:: python - - >>> def f(x): - ... return pd.Series([x, x ** 2], index=["x", "x^2"]) - >>> - >>> s = pd.Series(np.random.rand(5)) - >>> s - 0 0.340445 - 1 0.984729 - 2 0.919540 - 3 0.037772 - 4 0.861549 - dtype: float64 - >>> s.apply(f) - x x^2 - 0 0.340445 0.115903 - 1 0.984729 0.969691 - 2 0.919540 0.845555 - 3 0.037772 0.001427 - 4 0.861549 0.742267 + .. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) - New API functions for working with pandas options (:issue:`2097`): diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 040ca048d12242..18054e0b01191b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -555,7 +555,6 @@ Other Deprecations - Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`) - Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :attr:`Series.dt` properties (:issue:`20306`) - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or NumPy array before operating instead (:issue:`51521`) -- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated the ``fastpath`` keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`) diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 6d5da7cdff3b33..c9ab496295d858 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -43,7 +43,7 @@ Bug fixes Other ~~~~~ -- +- Reverted the deprecation that disallowed :meth:`Series.apply` returning a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object (:issue:`52116`) .. --------------------------------------------------------------------------- .. _whatsnew_211.contributors: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9748d4fe667395..1525e316f345fc 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1326,14 +1326,6 @@ def curried(x): ) if len(mapped) and isinstance(mapped[0], ABCSeries): - warnings.warn( - "Returning a DataFrame from Series.apply when the supplied function " - "returns a Series is deprecated and will be removed in a future " - "version.", - FutureWarning, - stacklevel=find_stack_level(), - ) # GH52116 - # GH#43986 Need to do list(mapped) in order to get treated as nested # See also GH#25959 regarding EA support return obj._constructor_expanddim(list(mapped), index=obj.index) diff --git a/pandas/core/series.py b/pandas/core/series.py index 78ec1554198df5..3c7270107d71d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4635,11 +4635,6 @@ def apply( """ Invoke function on values of Series. - .. deprecated:: 2.1.0 - - If the result from ``func`` is a ``Series``, wrapping the output in a - ``DataFrame`` instead of a ``Series`` has been deprecated. - Can be ufunc (a NumPy function that applies to the entire Series) or a Python function that only works on single values. diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index d3e5ac1b4ca7a4..aeb6a01eb587a0 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -420,8 +420,9 @@ def test_agg_evaluate_lambdas(string_series): def test_with_nested_series(datetime_series, op_name): # GH 2316 # .agg with a reducer and a transform, what to do - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "cannot aggregate" + warning = FutureWarning if op_name == "agg" else None + with tm.assert_produces_warning(warning, match=msg): # GH52123 result = getattr(datetime_series, op_name)( lambda x: Series([x, x**2], index=["x", "x^2"]) @@ -429,6 +430,10 @@ def test_with_nested_series(datetime_series, op_name): expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) + tm.assert_frame_equal(result, expected) + def test_replicate_describe(string_series): # this also tests a result set that is all scalars @@ -512,10 +517,7 @@ def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): index = dti.tz_localize("UTC").index else: index = dti.index - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = Series(index).apply(lambda x: Series([1, 2])) + result = Series(index).apply(lambda x: Series([1, 2])) tm.assert_frame_equal(result, exp) @@ -662,19 +664,7 @@ def test_apply_dictlike_lambda(ops, by_row, expected): def test_apply_retains_column_name(by_row): # GH 16380 df = DataFrame({"x": range(3)}, Index(range(3), name="x")) - func = lambda x: Series(range(x + 1), Index(range(x + 1), name="y")) - - if not by_row: - # GH53400 - msg = "'Series' object cannot be interpreted as an integer" - with pytest.raises(TypeError, match=msg): - df.x.apply(func, by_row=by_row) - return - - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = df.x.apply(func, by_row=by_row) + result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) expected = DataFrame( [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], columns=Index(range(3), name="y"), @@ -689,3 +679,11 @@ def test_apply_type(): result = s.apply(type) expected = Series([int, str, type], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) + + +def test_series_apply_unpack_nested_data(): + # GH#55189 + ser = Series([[1, 2, 3], [4, 5, 6, 7]]) + result = ser.apply(lambda x: Series(x)) + expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]}) + tm.assert_frame_equal(result, expected)