diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 78ceeacbff85e..9ea01b18d0d28 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -221,6 +221,7 @@ Removal of prior version deprecations/changes - Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`) - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) - Removed 'fastpath' keyword in :class:`Categorical` constructor (:issue:`20110`) +- Removed 'kind' keyword in :meth:`Series.resample` and :meth:`DataFrame.resample` (:issue:`58125`) - Removed alias :class:`arrays.PandasArray` for :class:`arrays.NumpyExtensionArray` (:issue:`53694`) - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) - Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f09676c874cf1..dbe2006642484 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8662,7 +8662,6 @@ def resample( closed: Literal["right", "left"] | None = None, label: Literal["right", "left"] | None = None, convention: Literal["start", "end", "s", "e"] | lib.NoDefault = lib.no_default, - kind: Literal["timestamp", "period"] | None | lib.NoDefault = lib.no_default, on: Level | None = None, level: Level | None = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -8695,14 +8694,6 @@ def resample( .. deprecated:: 2.2.0 Convert PeriodIndex to DatetimeIndex before resampling instead. - kind : {{'timestamp', 'period'}}, optional, default None - Pass 'timestamp' to convert the resulting index to a - `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. - By default the input representation is retained. - - .. deprecated:: 2.2.0 - Convert index to desired type explicitly instead. - on : str, optional For a DataFrame, column to use instead of index for resampling. Column must be datetime-like. @@ -8994,18 +8985,6 @@ def resample( """ from pandas.core.resample import get_resampler - if kind is not lib.no_default: - # GH#55895 - warnings.warn( - f"The 'kind' keyword in {type(self).__name__}.resample is " - "deprecated and will be removed in a future version. " - "Explicitly cast the index to the desired type instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - kind = None - if convention is not lib.no_default: warnings.warn( f"The 'convention' keyword in {type(self).__name__}.resample is " @@ -9023,7 +9002,6 @@ def resample( freq=rule, label=label, closed=closed, - kind=kind, convention=convention, key=on, level=level, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4392f54d9c442..86d1f55f38c05 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -130,8 +130,6 @@ class Resampler(BaseGroupBy, PandasObject): ---------- obj : Series or DataFrame groupby : TimeGrouper - kind : str or None - 'period', 'timestamp' to override default index treatment Returns ------- @@ -154,7 +152,6 @@ class Resampler(BaseGroupBy, PandasObject): "closed", "label", "convention", - "kind", "origin", "offset", ] @@ -163,7 +160,6 @@ def __init__( self, obj: NDFrame, timegrouper: TimeGrouper, - kind=None, *, gpr_index: Index, group_keys: bool = False, @@ -173,7 +169,6 @@ def __init__( self._timegrouper = timegrouper self.keys = None self.sort = True - self.kind = kind self.group_keys = group_keys self.as_index = True self.include_groups = include_groups @@ -1580,7 +1575,7 @@ def _resampler_for_grouping(self) -> type[DatetimeIndexResamplerGroupby]: def _get_binner_for_time(self): # this is how we are actually creating the bins - if self.kind == "period": + if isinstance(self.ax, PeriodIndex): return self._timegrouper._get_time_period_bins(self.ax) return self._timegrouper._get_time_bins(self.ax) @@ -1678,7 +1673,9 @@ def _wrap_result(self, result): # we may have a different kind that we were asked originally # convert if needed - if self.kind == "period" and not isinstance(result.index, PeriodIndex): + if isinstance(self.ax, PeriodIndex) and not isinstance( + result.index, PeriodIndex + ): if isinstance(result.index, MultiIndex): # GH 24103 - e.g. groupby resample if not isinstance(result.index.levels[-1], PeriodIndex): @@ -1719,7 +1716,7 @@ def _resampler_for_grouping(self): return PeriodIndexResamplerGroupby def _get_binner_for_time(self): - if self.kind == "timestamp": + if isinstance(self.ax, DatetimeIndex): return super()._get_binner_for_time() return self._timegrouper._get_period_bins(self.ax) @@ -1736,7 +1733,7 @@ def _convert_obj(self, obj: NDFrameT) -> NDFrameT: raise NotImplementedError(msg) # convert to timestamp - if self.kind == "timestamp": + if isinstance(obj, DatetimeIndex): obj = obj.to_timestamp(how=self.convention) return obj @@ -1751,7 +1748,7 @@ def _downsample(self, how, **kwargs): **kwargs : kw args passed to how function """ # we may need to actually resample as if we are timestamps - if self.kind == "timestamp": + if isinstance(self.ax, DatetimeIndex): return super()._downsample(how, **kwargs) ax = self.ax @@ -1788,7 +1785,7 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): Value to use for missing values. """ # we may need to actually resample as if we are timestamps - if self.kind == "timestamp": + if isinstance(self.ax, DatetimeIndex): return super()._upsample(method, limit=limit, fill_value=fill_value) ax = self.ax @@ -1860,12 +1857,12 @@ def _resampler_cls(self): return TimedeltaIndexResampler -def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler: +def get_resampler(obj: Series | DataFrame, **kwds) -> Resampler: """ Create a TimeGrouper and return our resampler. """ tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type] - return tg._get_resampler(obj, kind=kind) + return tg._get_resampler(obj) get_resampler.__doc__ = Resampler.__doc__ @@ -1877,7 +1874,6 @@ def get_resampler_for_grouping( how=None, fill_method=None, limit: int | None = None, - kind=None, on=None, include_groups: bool = True, **kwargs, @@ -1887,7 +1883,7 @@ def get_resampler_for_grouping( """ # .resample uses 'on' similar to how .groupby uses 'key' tg = TimeGrouper(freq=rule, key=on, **kwargs) - resampler = tg._get_resampler(groupby.obj, kind=kind) + resampler = tg._get_resampler(groupby.obj) return resampler._get_resampler_for_grouping( groupby=groupby, include_groups=include_groups, key=tg.key ) @@ -1910,7 +1906,6 @@ class TimeGrouper(Grouper): "closed", "label", "how", - "kind", "convention", "origin", "offset", @@ -1928,7 +1923,6 @@ def __init__( how: str = "mean", fill_method=None, limit: int | None = None, - kind: str | None = None, convention: Literal["start", "end", "e", "s"] | None = None, origin: Literal["epoch", "start", "start_day", "end", "end_day"] | TimestampConvertibleTypes = "start_day", @@ -1986,7 +1980,6 @@ def __init__( self.closed = closed self.label = label - self.kind = kind self.convention = convention if convention is not None else "e" self.how = how self.fill_method = fill_method @@ -2024,15 +2017,13 @@ def __init__( super().__init__(freq=freq, key=key, **kwargs) - def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: + def _get_resampler(self, obj: NDFrame) -> Resampler: """ Return my resampler or raise if we have an invalid axis. Parameters ---------- obj : Series or DataFrame - kind : string, optional - 'period','timestamp','timedelta' are valid Returns ------- @@ -2048,11 +2039,10 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: return DatetimeIndexResampler( obj, timegrouper=self, - kind=kind, group_keys=self.group_keys, gpr_index=ax, ) - elif isinstance(ax, PeriodIndex) or kind == "period": + elif isinstance(ax, PeriodIndex): if isinstance(ax, PeriodIndex): # GH#53481 warnings.warn( @@ -2061,17 +2051,9 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: FutureWarning, stacklevel=find_stack_level(), ) - else: - warnings.warn( - "Resampling with kind='period' is deprecated. " - "Use datetime paths instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) return PeriodIndexResampler( obj, timegrouper=self, - kind=kind, group_keys=self.group_keys, gpr_index=ax, ) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index fecd24c9a4b40..5ee9b65ba9ae7 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -441,19 +441,6 @@ def test_resample_frame_basic_M_A(freq, unit): tm.assert_series_equal(result["A"], df["A"].resample(freq).mean()) -@pytest.mark.parametrize("freq", ["W-WED", "ME"]) -def test_resample_frame_basic_kind(freq, unit): - df = DataFrame( - np.random.default_rng(2).standard_normal((10, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=10, freq="B"), - ) - df.index = df.index.as_unit(unit) - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.resample(freq, kind="period").mean() - - def test_resample_upsample(unit): # from daily dti = date_range( @@ -665,9 +652,7 @@ def test_resample_timestamp_to_period( ts = simple_date_range_series("1/1/1990", "1/1/2000") ts.index = ts.index.as_unit(unit) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample(freq, kind="period").mean() + result = ts.resample(freq).mean().to_period() expected = ts.resample(freq).mean() expected.index = period_range(**expected_kwargs) tm.assert_series_equal(result, expected) @@ -985,9 +970,7 @@ def test_resample_to_period_monthly_buglet(unit): rng = date_range("1/1/2000", "12/31/2000").as_unit(unit) ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("ME", kind="period").mean() + result = ts.resample("ME").mean().to_period() exp_index = period_range("Jan-2000", "Dec-2000", freq="M") tm.assert_index_equal(result.index, exp_index) @@ -1109,18 +1092,15 @@ def test_resample_anchored_intraday(unit): df = DataFrame(rng.month, index=rng) result = df.resample("ME").mean() - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.resample("ME", kind="period").mean().to_timestamp(how="end") + expected = df.resample("ME").mean().to_period() + expected = expected.to_timestamp(how="end") expected.index += Timedelta(1, "ns") - Timedelta(1, "D") expected.index = expected.index.as_unit(unit)._with_freq("infer") assert expected.index.freq == "ME" tm.assert_frame_equal(result, expected) result = df.resample("ME", closed="left").mean() - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - exp = df.shift(1, freq="D").resample("ME", kind="period").mean() + exp = df.shift(1, freq="D").resample("ME").mean().to_period() exp = exp.to_timestamp(how="end") exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") @@ -1134,9 +1114,8 @@ def test_resample_anchored_intraday2(unit): df = DataFrame(rng.month, index=rng) result = df.resample("QE").mean() - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.resample("QE", kind="period").mean().to_timestamp(how="end") + expected = df.resample("QE").mean().to_period() + expected = expected.to_timestamp(how="end") expected.index += Timedelta(1, "ns") - Timedelta(1, "D") expected.index._data.freq = "QE" expected.index._freq = lib.no_default @@ -1144,11 +1123,8 @@ def test_resample_anchored_intraday2(unit): tm.assert_frame_equal(result, expected) result = df.resample("QE", closed="left").mean() - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = ( - df.shift(1, freq="D").resample("QE", kind="period", closed="left").mean() - ) + expected = df.shift(1, freq="D").resample("QE").mean() + expected = expected.to_period() expected = expected.to_timestamp(how="end") expected.index += Timedelta(1, "ns") - Timedelta(1, "D") expected.index._data.freq = "QE" @@ -1205,9 +1181,7 @@ def test_corner_cases_date(simple_date_range_series, unit): # resample to periods ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h") ts.index = ts.index.as_unit(unit) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("ME", kind="period").mean() + result = ts.resample("ME").mean().to_period() assert len(result) == 1 assert result.index[0] == Period("2000-04", freq="M") diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index dd058ada60974..67db427a2cdb7 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -59,22 +59,24 @@ def _simple_period_range_series(start, end, freq="D"): class TestPeriodIndex: @pytest.mark.parametrize("freq", ["2D", "1h", "2h"]) - @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) - def test_asfreq(self, frame_or_series, freq, kind): + def test_asfreq(self, frame_or_series, freq): # GH 12884, 15944 - # make sure .asfreq() returns PeriodIndex (except kind='timestamp') obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) - if kind == "timestamp": - expected = obj.to_timestamp().resample(freq).asfreq() - else: - start = obj.index[0].to_timestamp(how="start") - end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") - new_index = date_range(start=start, end=end, freq=freq, inclusive="left") - expected = obj.to_timestamp().reindex(new_index).to_period(freq) - msg = "The 'kind' keyword in (Series|DataFrame).resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = obj.resample(freq, kind=kind).asfreq() + + expected = obj.to_timestamp().resample(freq).asfreq() + result = obj.to_timestamp().resample(freq).asfreq() + tm.assert_almost_equal(result, expected) + + start = obj.index[0].to_timestamp(how="start") + end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") + new_index = date_range(start=start, end=end, freq=freq, inclusive="left") + expected = obj.to_timestamp().reindex(new_index).to_period(freq) + + result = obj.resample(freq).asfreq() + tm.assert_almost_equal(result, expected) + + result = obj.resample(freq).asfreq().to_timestamp().to_period() tm.assert_almost_equal(result, expected) def test_asfreq_fill_value(self): @@ -88,9 +90,7 @@ def test_asfreq_fill_value(self): freq="1h", ) expected = s.to_timestamp().reindex(new_index, fill_value=4.0) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample("1h", kind="timestamp").asfreq(fill_value=4.0) + result = s.to_timestamp().resample("1h").asfreq(fill_value=4.0) tm.assert_series_equal(result, expected) frame = s.to_frame("value") @@ -100,15 +100,12 @@ def test_asfreq_fill_value(self): freq="1h", ) expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = frame.resample("1h", kind="timestamp").asfreq(fill_value=3.0) + result = frame.to_timestamp().resample("1h").asfreq(fill_value=3.0) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("freq", ["h", "12h", "2D", "W"]) - @pytest.mark.parametrize("kind", [None, "period", "timestamp"]) @pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}]) - def test_selection(self, freq, kind, kwargs): + def test_selection(self, freq, kwargs): # This is a bug, these should be implemented # GH 14008 index = period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") @@ -122,10 +119,8 @@ def test_selection(self, freq, kind, kwargs): r"not currently supported, use \.set_index\(\.\.\.\) to " "explicitly set index" ) - depr_msg = "The 'kind' keyword in DataFrame.resample is deprecated" with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.resample(freq, kind=kind, **kwargs) + df.resample(freq, **kwargs) @pytest.mark.parametrize("month", MONTHS) @pytest.mark.parametrize("meth", ["ffill", "bfill"]) @@ -268,12 +263,9 @@ def test_resample_basic(self): name="idx", ) expected = Series([34.5, 79.5], index=index) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.to_period().resample("min", kind="period").mean() + result = s.to_period().resample("min").mean() tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result2 = s.resample("min", kind="period").mean() + result2 = s.resample("min").mean().to_period() tm.assert_series_equal(result2, expected) @pytest.mark.parametrize( @@ -328,9 +320,9 @@ def test_with_local_timezone(self, tz): series = Series(1, index=index) series = series.tz_convert(local_timezone) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = series.resample("D", kind="period").mean() + msg = "Converting to PeriodArray/Index representation will drop timezone" + with tm.assert_produces_warning(UserWarning, match=msg): + result = series.resample("D").mean().to_period() # Create the expected series # Index is moved back a day with the timezone conversion from UTC to @@ -432,10 +424,8 @@ def test_weekly_upsample(self, day, target, convention, simple_period_range_seri def test_resample_to_timestamps(self, simple_period_range_series): ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("Y-DEC", kind="timestamp").mean() - expected = ts.to_timestamp(how="start").resample("YE-DEC").mean() + result = ts.resample("Y-DEC").mean().to_timestamp() + expected = ts.resample("Y-DEC").mean().to_timestamp(how="start") tm.assert_series_equal(result, expected) @pytest.mark.parametrize("month", MONTHS) @@ -488,17 +478,17 @@ def test_cant_fill_missing_dups(self): with pytest.raises(InvalidIndexError, match=msg): s.resample("Y").ffill() - @pytest.mark.parametrize("freq", ["5min"]) - @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) - def test_resample_5minute(self, freq, kind): + def test_resample_5minute(self): rng = period_range("1/1/2000", "1/5/2000", freq="min") ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - expected = ts.to_timestamp().resample(freq).mean() - if kind != "timestamp": - expected = expected.to_period(freq) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample(freq, kind=kind).mean() + expected = ts.to_timestamp().resample("5min").mean() + result = ts.resample("5min").mean().to_timestamp() + tm.assert_series_equal(result, expected) + + expected = expected.to_period("5min") + result = ts.resample("5min").mean() + tm.assert_series_equal(result, expected) + result = ts.resample("5min").mean().to_timestamp().to_period() tm.assert_series_equal(result, expected) def test_upsample_daily_business_daily(self, simple_period_range_series): @@ -572,9 +562,9 @@ def test_resample_tz_localized2(self): tm.assert_series_equal(result, expected) # for good measure - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample("D", kind="period").mean() + msg = "Converting to PeriodArray/Index representation will drop timezone " + with tm.assert_produces_warning(UserWarning, match=msg): + result = s.resample("D").mean().to_period() ex_index = period_range("2001-09-20", periods=1, freq="D") expected = Series([1.5], index=ex_index) tm.assert_series_equal(result, expected) @@ -808,8 +798,7 @@ def test_evenly_divisible_with_no_extra_bins2(self): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("freq, period_mult", [("h", 24), ("12h", 2)]) - @pytest.mark.parametrize("kind", [None, "period"]) - def test_upsampling_ohlc(self, freq, period_mult, kind): + def test_upsampling_ohlc(self, freq, period_mult): # GH 13083 pi = period_range(start="2000", freq="D", periods=10) s = Series(range(len(pi)), index=pi) @@ -819,9 +808,10 @@ def test_upsampling_ohlc(self, freq, period_mult, kind): # of the last original period, so extend accordingly: new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi)) expected = expected.reindex(new_index) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample(freq, kind=kind).ohlc() + result = s.resample(freq).ohlc() + tm.assert_frame_equal(result, expected) + + result = s.resample(freq).ohlc().to_timestamp().to_period() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index b312d708ade1e..520ef40153ecd 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -686,29 +686,3 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): rs = gb.resample("2D") with pytest.raises(KeyError, match="Columns not found"): rs[["val_not_in_dataframe"]] - - -@pytest.mark.parametrize("kind", ["datetime", "period"]) -def test_groupby_resample_kind(kind): - # GH 24103 - df = DataFrame( - { - "datetime": pd.to_datetime( - ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"] - ), - "group": ["A", "B", "A", "B"], - "value": [1, 2, 3, 4], - } - ) - df = df.set_index("datetime") - result = df.groupby("group")["value"].resample("D", kind=kind).last() - - dt_level = pd.DatetimeIndex(["2018-11-01", "2018-11-02"]) - if kind == "period": - dt_level = dt_level.to_period(freq="D") - expected_index = pd.MultiIndex.from_product( - [["A", "B"], dt_level], - names=["group", "datetime"], - ) - expected = Series([1, 3, 2, 4], index=expected_index, name="value") - tm.assert_series_equal(result, expected)