From 2abed4aecbd954a66e8e5246b0cd27eeb474f20e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Nov 2023 15:23:55 -0800 Subject: [PATCH 1/3] DEPR: unit keyword in TimedeltaIndex --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/indexes/timedeltas.py | 14 +++++++++++++- pandas/core/tools/timedeltas.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 6 +++--- pandas/tests/indexes/test_datetimelike.py | 2 +- .../indexes/timedeltas/test_constructors.py | 18 +++++++++++++----- .../indexes/timedeltas/test_scalar_compat.py | 4 ++-- .../tests/reductions/test_stat_reductions.py | 3 ++- pandas/tests/resample/test_timedelta.py | 5 ++++- .../scalar/timedelta/test_constructors.py | 3 ++- 10 files changed, 42 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 292e6534a46e4..37ca935492fb8 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -306,6 +306,7 @@ Other Deprecations - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) - Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`) - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`) +- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9d8ef5b0a69cd..fbbb790a6c626 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -150,7 +150,7 @@ def _resolution_obj(self) -> Resolution | None: # type: ignore[override] def __new__( cls, data=None, - unit=None, + unit=lib.no_default, freq=lib.no_default, closed=lib.no_default, dtype=None, @@ -166,6 +166,18 @@ def __new__( stacklevel=find_stack_level(), ) + if unit is not lib.no_default: + # GH#55499 + warnings.warn( + f"The 'unit' keyword in {cls.__name__} construction is " + "deprecated and will be removed in a future version. " + "Use pd.to_timedelta instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + unit = None + name = maybe_extract_name(name, data, cls) if is_scalar(data): diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 8909776d91369..d9e1a448f6841 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -279,5 +279,5 @@ def _convert_listlike( from pandas import TimedeltaIndex - value = TimedeltaIndex(td64arr, unit="ns", name=name) + value = TimedeltaIndex(td64arr, name=name) return value diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 9718381e58fcb..5b1e8938138fa 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -217,7 +217,7 @@ def test_concat_same_type(self, arr1d): result = arr._concat_same_type([arr[:-1], arr[1:], arr]) arr2 = arr.astype(object) - expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) + expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2])) tm.assert_index_equal(self.index_cls(result), expected) @@ -1241,7 +1241,7 @@ def test_to_numpy_extra(arr): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - TimedeltaIndex([1, 2], unit="D"), + pd.to_timedelta([1, 2], unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) @@ -1272,7 +1272,7 @@ def test_searchsorted_datetimelike_with_listlike(values, klass, as_index): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - TimedeltaIndex([1, 2], unit="D"), + pd.to_timedelta([1, 2], unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 5ad2e9b2f717e..27e01427006ec 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -165,5 +165,5 @@ def test_diff(self, unit): # GH 55080 dti = pd.to_datetime([10, 20, 30], unit=unit).as_unit(unit) result = dti.diff(1) - expected = pd.TimedeltaIndex([pd.NaT, 10, 10], unit=unit).as_unit(unit) + expected = pd.to_timedelta([pd.NaT, 10, 10], unit=unit).as_unit(unit) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index ccbd61c4d6693..c71089d6f2db3 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -39,8 +39,10 @@ def test_array_of_dt64_nat_raises(self): @pytest.mark.parametrize("unit", ["Y", "y", "M"]) def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" + depr_msg = "The 'unit' keyword in TimedeltaIndex construction is deprecated" with pytest.raises(ValueError, match=msg): - TimedeltaIndex([1, 3, 7], unit) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + TimedeltaIndex([1, 3, 7], unit) def test_int64_nocopy(self): # GH#23539 check that a copy isn't made when we pass int64 data @@ -120,7 +122,7 @@ def test_float64_ns_rounded(self): def test_float64_unit_conversion(self): # GH#23539 - tdi = TimedeltaIndex([1.5, 2.25], unit="D") + tdi = to_timedelta([1.5, 2.25], unit="D") expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) tm.assert_index_equal(tdi, expected) @@ -133,6 +135,9 @@ def test_construction_base_constructor(self): tm.assert_index_equal(pd.Index(arr), TimedeltaIndex(arr)) tm.assert_index_equal(pd.Index(np.array(arr)), TimedeltaIndex(np.array(arr))) + @pytest.mark.filterwarnings( + "ignore:The 'unit' keyword in TimedeltaIndex construction:FutureWarning" + ) def test_constructor(self): expected = TimedeltaIndex( [ @@ -157,15 +162,18 @@ def test_constructor(self): expected = TimedeltaIndex( ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"] ) - tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected) + result = TimedeltaIndex(range(3), unit="s") + tm.assert_index_equal(result, expected) expected = TimedeltaIndex( ["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"] ) - tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected) + result = TimedeltaIndex([0, 5, 9], unit="s") + tm.assert_index_equal(result, expected) expected = TimedeltaIndex( ["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"] ) - tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected) + result = TimedeltaIndex([400, 450, 1200], unit="ms") + tm.assert_index_equal(result, expected) def test_constructor_iso(self): # GH #21877 diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index 63db5c1b9c91d..6713cb567208d 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -100,7 +100,7 @@ def test_round(self): t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") t2 = -1 * t1 t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s") - t1c = TimedeltaIndex([1, 1, 1], unit="D") + t1c = TimedeltaIndex(np.array([1, 1, 1], "m8[D]")).as_unit("ns") # note that negative times round DOWN! so don't give whole numbers for freq, s1, s2 in [ @@ -122,7 +122,7 @@ def test_round(self): ), ("12min", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), ("h", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), - ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")), + ("d", t1c, -1 * t1c), ]: r1 = t1.round(freq) tm.assert_index_equal(r1, s1) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 74e521ab71f41..76313f5b50ebb 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -68,7 +68,8 @@ def test_period_mean(self, box, freq): @pytest.mark.parametrize("box", [Series, pd.Index, TimedeltaArray]) def test_td64_mean(self, box): - tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D") + m8values = np.array([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], "m8[D]") + tdi = pd.TimedeltaIndex(m8values).as_unit("ns") tdarr = tdi._data obj = box(tdarr, copy=False) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 79f9492062063..5d6876343a0c9 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -98,9 +98,12 @@ def test_resample_categorical_data_with_timedeltaindex(): df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s")) df["Group"] = df["Group_obj"].astype("category") result = df.resample("10s").agg(lambda x: (x.value_counts().index[0])) + exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit( + "ns" + ) expected = DataFrame( {"Group_obj": ["A", "A"], "Group": ["A", "A"]}, - index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"), + index=exp_tdi, ) expected = expected.reindex(["Group_obj", "Group"], axis=1) expected["Group"] = expected["Group_obj"].astype("category") diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index e45e0ac3c6a45..4663f8cb71961 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -144,7 +144,8 @@ def test_unit_parser(self, unit, np_unit, wrapper): if (unit, np_unit) in (("u", "us"), ("U", "us"), ("n", "ns"), ("N", "ns")): warn = FutureWarning else: - warn = None + warn = FutureWarning + msg = "The 'unit' keyword in TimedeltaIndex construction is deprecated" with tm.assert_produces_warning(warn, match=msg): result = to_timedelta(wrapper(range(5)), unit=unit) tm.assert_index_equal(result, expected) From cb8ca75889031c590c5b30cb355643859c2cbb1c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Nov 2023 09:05:56 -0800 Subject: [PATCH 2/3] update docstring --- pandas/core/indexes/timedeltas.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index fbbb790a6c626..b1afe524bb363 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -64,6 +64,10 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): Optional timedelta-like data to construct index with. unit : {'D', 'h', 'm', 's', 'ms', 'us', 'ns'}, optional The unit of ``data``. + + .. deprecated:: 2.2.0 + Use ``pd.to_timedelta`` instead. + freq : str or pandas offset object, optional One of pandas date offset strings or corresponding objects. The string ``'infer'`` can be passed in order to set the frequency of the index as @@ -114,10 +118,6 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) - >>> pd.TimedeltaIndex([1, 2, 4, 8], unit='D') - TimedeltaIndex(['1 days', '2 days', '4 days', '8 days'], - dtype='timedelta64[ns]', freq=None) - We can also let pandas infer the frequency when possible. >>> pd.TimedeltaIndex(range(5), unit='D', freq='infer') From 52c910bc22dd4c85bc6a1f0a06b1b97383c2c0c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Nov 2023 10:50:25 -0800 Subject: [PATCH 3/3] update doctest --- pandas/core/indexes/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b1afe524bb363..bc91666eefad5 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -120,7 +120,7 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): We can also let pandas infer the frequency when possible. - >>> pd.TimedeltaIndex(range(5), unit='D', freq='infer') + >>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq='infer') TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq='D') """