DEPR: deprecate strings T, S, L, U, and N in offsets frequencies, res…

…olution abbreviations, _attrname_to_abbrevs (pandas-dev#54061)
mroeschke · Sep 11, 2023 · 866dc08 · 866dc08
1 parent b90a8fb
commit 866dc08
Show file tree

Hide file tree

Showing 118 changed files with 1,074 additions and 861 deletions.
diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -262,7 +262,7 @@ class Timeseries:
     def setup(self, tz):
         N = 10**6
         halfway = (N // 2) - 1
-        self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz))
+        self.s = Series(date_range("20010101", periods=N, freq="min", tz=tz))
         self.ts = self.s[halfway]
 
         self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))
@@ -460,7 +460,7 @@ class OffsetArrayArithmetic:
 
     def setup(self, offset):
         N = 10000
-        rng = date_range(start="1/1/2000", periods=N, freq="T")
+        rng = date_range(start="1/1/2000", periods=N, freq="min")
         self.rng = rng
         self.ser = Series(rng)
 
@@ -479,7 +479,7 @@ class ApplyIndex:
 
     def setup(self, offset):
         N = 10000
-        rng = date_range(start="1/1/2000", periods=N, freq="T")
+        rng = date_range(start="1/1/2000", periods=N, freq="min")
         self.rng = rng
 
     def time_apply_index(self, offset):

diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py
@@ -44,7 +44,7 @@ class Query:
     def setup(self):
         N = 10**6
         halfway = (N // 2) - 1
-        index = pd.date_range("20010101", periods=N, freq="T")
+        index = pd.date_range("20010101", periods=N, freq="min")
         s = pd.Series(index)
         self.ts = s.iloc[halfway]
         self.df = pd.DataFrame({"a": np.random.randn(N), "dates": index}, index=index)

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
@@ -178,7 +178,7 @@ def time_kth_smallest(self):
 class ParallelDatetimeFields:
     def setup(self):
         N = 10**6
-        self.dti = date_range("1900-01-01", periods=N, freq="T")
+        self.dti = date_range("1900-01-01", periods=N, freq="min")
         self.period = self.dti.to_period("D")
 
     def time_datetime_field_year(self):

diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py
@@ -25,14 +25,14 @@ def setup(self, index_type):
         N = 10**5
         if index_type == "MultiIndex":
             self.idx = pd.MultiIndex.from_product(
-                [pd.date_range("1/1/2000", freq="T", periods=N // 2), ["a", "b"]]
+                [pd.date_range("1/1/2000", freq="min", periods=N // 2), ["a", "b"]]
             )
         elif index_type == "DatetimeIndex":
-            self.idx = pd.date_range("1/1/2000", freq="T", periods=N)
+            self.idx = pd.date_range("1/1/2000", freq="min", periods=N)
         elif index_type == "Int64Index":
             self.idx = pd.Index(range(N), dtype="int64")
         elif index_type == "PeriodIndex":
-            self.idx = pd.period_range("1/1/2000", freq="T", periods=N)
+            self.idx = pd.period_range("1/1/2000", freq="min", periods=N)
         elif index_type == "RangeIndex":
             self.idx = pd.RangeIndex(start=0, stop=N)
         elif index_type == "IntervalIndex":

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
@@ -25,7 +25,7 @@ class SetOperations:
 
     def setup(self, index_structure, dtype, method):
         N = 10**5
-        dates_left = date_range("1/1/2000", periods=N, freq="T")
+        dates_left = date_range("1/1/2000", periods=N, freq="min")
         fmt = "%Y-%m-%d %H:%M:%S"
         date_str_left = Index(dates_left.strftime(fmt))
         int_left = Index(np.arange(N))

diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
@@ -290,7 +290,7 @@ def time_float_longint_str_lines(self):
 class ToJSONMem:
     def setup_cache(self):
         df = DataFrame([[1]])
-        df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="T"))
+        df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="min"))
         frames = {"int": df, "float": df.astype(float), "datetime": df2}
 
         return frames

diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -212,7 +212,7 @@ class JoinNonUnique:
     # outer join of non-unique
     # GH 6329
     def setup(self):
-        date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="T")
+        date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="min")
         daily_dates = date_index.to_period("D").to_timestamp("S", "S")
         self.fracofday = date_index.values - daily_dates.values
         self.fracofday = self.fracofday.astype("timedelta64[ns]")
@@ -338,7 +338,7 @@ class MergeDatetime:
     def setup(self, units, tz):
         unit_left, unit_right = units
         N = 10_000
-        keys = Series(date_range("2012-01-01", freq="T", periods=N, tz=tz))
+        keys = Series(date_range("2012-01-01", freq="min", periods=N, tz=tz))
         self.left = DataFrame(
             {
                 "key": keys.sample(N * 10, replace=True).dt.as_unit(unit_left),

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -22,7 +22,7 @@ class SparseSeriesToFrame:
     def setup(self):
         K = 50
         N = 50001
-        rng = date_range("1/1/2000", periods=N, freq="T")
+        rng = date_range("1/1/2000", periods=N, freq="min")
         self.series = {}
         for i in range(1, K):
             data = np.random.randn(N)[:-i]

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
@@ -116,7 +116,7 @@ def time_infer_freq(self, freq):
 class TimeDatetimeConverter:
     def setup(self):
         N = 100000
-        self.rng = date_range(start="1/1/2000", periods=N, freq="T")
+        self.rng = date_range(start="1/1/2000", periods=N, freq="min")
 
     def time_convert(self):
         DatetimeConverter.convert(self.rng, None, None)
@@ -129,9 +129,9 @@ class Iteration:
     def setup(self, time_index):
         N = 10**6
         if time_index is timedelta_range:
-            self.idx = time_index(start=0, freq="T", periods=N)
+            self.idx = time_index(start=0, freq="min", periods=N)
         else:
-            self.idx = time_index(start="20140101", freq="T", periods=N)
+            self.idx = time_index(start="20140101", freq="min", periods=N)
         self.exit = 10000
 
     def time_iter(self, time_index):
@@ -149,7 +149,7 @@ class ResampleDataFrame:
     param_names = ["method"]
 
     def setup(self, method):
-        rng = date_range(start="20130101", periods=100000, freq="50L")
+        rng = date_range(start="20130101", periods=100000, freq="50ms")
         df = DataFrame(np.random.randn(100000, 2), index=rng)
         self.resample = getattr(df.resample("1s"), method)
 
@@ -163,8 +163,8 @@ class ResampleSeries:
 
     def setup(self, index, freq, method):
         indexes = {
-            "period": period_range(start="1/1/2000", end="1/1/2001", freq="T"),
-            "datetime": date_range(start="1/1/2000", end="1/1/2001", freq="T"),
+            "period": period_range(start="1/1/2000", end="1/1/2001", freq="min"),
+            "datetime": date_range(start="1/1/2000", end="1/1/2001", freq="min"),
         }
         idx = indexes[index]
         ts = Series(np.random.randn(len(idx)), index=idx)
@@ -178,7 +178,7 @@ class ResampleDatetetime64:
     # GH 7754
     def setup(self):
         rng3 = date_range(
-            start="2000-01-01 00:00:00", end="2000-01-01 10:00:00", freq="555000U"
+            start="2000-01-01 00:00:00", end="2000-01-01 10:00:00", freq="555000us"
         )
         self.dt_ts = Series(5, rng3, dtype="datetime64[ns]")
 
@@ -270,7 +270,7 @@ class DatetimeAccessor:
 
     def setup(self, tz):
         N = 100000
-        self.series = Series(date_range(start="1/1/2000", periods=N, freq="T", tz=tz))
+        self.series = Series(date_range(start="1/1/2000", periods=N, freq="min", tz=tz))
 
     def time_dt_accessor(self, tz):
         self.series.dt

diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py
@@ -136,10 +136,10 @@ def time_to_julian_date(self, tz):
         self.ts.to_julian_date()
 
     def time_floor(self, tz):
-        self.ts.floor("5T")
+        self.ts.floor("5min")
 
     def time_ceil(self, tz):
-        self.ts.ceil("5T")
+        self.ts.ceil("5min")
 
 
 class TimestampAcrossDst:

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -610,7 +610,7 @@ financial applications. See the :ref:`Time Series section <timeseries>`.
 
 .. ipython:: python
 
-   rng = pd.date_range("1/1/2012", periods=100, freq="S")
+   rng = pd.date_range("1/1/2012", periods=100, freq="s")
    ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
    ts.resample("5Min").sum()
 

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -40,7 +40,7 @@ Suppose our raw dataset on disk has many columns.
        return df
 
    timeseries = [
-       make_timeseries(freq="1T", seed=i).rename(columns=lambda x: f"{x}_{i}")
+       make_timeseries(freq="1min", seed=i).rename(columns=lambda x: f"{x}_{i}")
        for i in range(10)
    ]
    ts_wide = pd.concat(timeseries, axis=1)
@@ -87,7 +87,7 @@ can store larger datasets in memory.
 .. ipython:: python
    :okwarning:
 
-   ts = make_timeseries(freq="30S", seed=0)
+   ts = make_timeseries(freq="30s", seed=0)
    ts.to_parquet("timeseries.parquet")
    ts = pd.read_parquet("timeseries.parquet")
    ts
@@ -173,7 +173,7 @@ files. Each file in the directory represents a different year of the entire data
    pathlib.Path("data/timeseries").mkdir(exist_ok=True)
 
    for i, (start, end) in enumerate(zip(starts, ends)):
-       ts = make_timeseries(start=start, end=end, freq="1T", seed=i)
+       ts = make_timeseries(start=start, end=end, freq="1min", seed=i)
        ts.to_parquet(f"data/timeseries/ts-{i:0>2d}.parquet")
 
 

diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst
@@ -390,7 +390,7 @@ The ``freq`` parameter can passed a variety of :ref:`frequency aliases <timeseri
 
 .. ipython:: python
 
-   pd.timedelta_range(start="1 days", end="2 days", freq="30T")
+   pd.timedelta_range(start="1 days", end="2 days", freq="30min")
 
    pd.timedelta_range(start="1 days", periods=5, freq="2D5H")
 

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -603,7 +603,7 @@ would include matching times on an included date:
    dft = pd.DataFrame(
        np.random.randn(100000, 1),
        columns=["A"],
-       index=pd.date_range("20130101", periods=100000, freq="T"),
+       index=pd.date_range("20130101", periods=100000, freq="min"),
    )
    dft
    dft.loc["2013"]
@@ -905,11 +905,11 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.CustomBusinessHour`, ``'CBH'``, "custom business hour"
     :class:`~pandas.tseries.offsets.Day`, ``'D'``, "one absolute day"
     :class:`~pandas.tseries.offsets.Hour`, ``'H'``, "one hour"
-    :class:`~pandas.tseries.offsets.Minute`, ``'T'`` or ``'min'``,"one minute"
-    :class:`~pandas.tseries.offsets.Second`, ``'S'``, "one second"
-    :class:`~pandas.tseries.offsets.Milli`, ``'L'`` or ``'ms'``, "one millisecond"
-    :class:`~pandas.tseries.offsets.Micro`, ``'U'`` or ``'us'``, "one microsecond"
-    :class:`~pandas.tseries.offsets.Nano`, ``'N'``, "one nanosecond"
+    :class:`~pandas.tseries.offsets.Minute`, ``'min'``,"one minute"
+    :class:`~pandas.tseries.offsets.Second`, ``'s'``, "one second"
+    :class:`~pandas.tseries.offsets.Milli`, ``'ms'``, "one millisecond"
+    :class:`~pandas.tseries.offsets.Micro`, ``'us'``, "one microsecond"
+    :class:`~pandas.tseries.offsets.Nano`, ``'ns'``, "one nanosecond"
 
 ``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback`
 methods for moving a date forward or backward respectively to a valid offset
@@ -1264,11 +1264,16 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "BAS, BYS", "business year start frequency"
     "BH", "business hour frequency"
     "H", "hourly frequency"
-    "T, min", "minutely frequency"
-    "S", "secondly frequency"
-    "L, ms", "milliseconds"
-    "U, us", "microseconds"
-    "N", "nanoseconds"
+    "min", "minutely frequency"
+    "s", "secondly frequency"
+    "ms", "milliseconds"
+    "us", "microseconds"
+    "ns", "nanoseconds"
+
+.. deprecated:: 2.2.0
+
+   Aliases ``T``, ``S``, ``L``, ``U``, and ``N`` are deprecated in favour of the aliases
+   ``min``, ``s``, ``ms``, ``us``, and ``ns``.
 
 .. note::
 
@@ -1318,11 +1323,16 @@ frequencies. We will refer to these aliases as *period aliases*.
     "Q", "quarterly frequency"
     "A, Y", "yearly frequency"
     "H", "hourly frequency"
-    "T, min", "minutely frequency"
-    "S", "secondly frequency"
-    "L, ms", "milliseconds"
-    "U, us", "microseconds"
-    "N", "nanoseconds"
+    "min", "minutely frequency"
+    "s", "secondly frequency"
+    "ms", "milliseconds"
+    "us", "microseconds"
+    "ns", "nanoseconds"
+
+.. deprecated:: 2.2.0
+
+   Aliases ``T``, ``S``, ``L``, ``U``, and ``N`` are deprecated in favour of the aliases
+   ``min``, ``s``, ``ms``, ``us``, and ``ns``.
 
 
 Combining aliases
@@ -1343,7 +1353,7 @@ You can combine together day and intraday offsets:
 
    pd.date_range(start, periods=10, freq="2h20min")
 
-   pd.date_range(start, periods=10, freq="1D10U")
+   pd.date_range(start, periods=10, freq="1D10us")
 
 Anchored offsets
 ~~~~~~~~~~~~~~~~
@@ -1635,7 +1645,7 @@ Basics
 
 .. ipython:: python
 
-   rng = pd.date_range("1/1/2012", periods=100, freq="S")
+   rng = pd.date_range("1/1/2012", periods=100, freq="s")
 
    ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
 
@@ -1725,11 +1735,11 @@ For upsampling, you can specify a way to upsample and the ``limit`` parameter to
 
    # from secondly to every 250 milliseconds
 
-   ts[:2].resample("250L").asfreq()
+   ts[:2].resample("250ms").asfreq()
 
-   ts[:2].resample("250L").ffill()
+   ts[:2].resample("250ms").ffill()
 
-   ts[:2].resample("250L").ffill(limit=2)
+   ts[:2].resample("250ms").ffill(limit=2)
 
 Sparse resampling
 ~~~~~~~~~~~~~~~~~
@@ -1752,7 +1762,7 @@ If we want to resample to the full range of the series:
 
 .. ipython:: python
 
-    ts.resample("3T").sum()
+    ts.resample("3min").sum()
 
 We can instead only resample those groups where we have points as follows:
 
@@ -1766,7 +1776,7 @@ We can instead only resample those groups where we have points as follows:
         freq = to_offset(freq)
         return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)
 
-    ts.groupby(partial(round, freq="3T")).sum()
+    ts.groupby(partial(round, freq="3min")).sum()
 
 .. _timeseries.aggregate:
 
@@ -1783,10 +1793,10 @@ Resampling a ``DataFrame``, the default will be to act on all columns with the s
 
    df = pd.DataFrame(
        np.random.randn(1000, 3),
-       index=pd.date_range("1/1/2012", freq="S", periods=1000),
+       index=pd.date_range("1/1/2012", freq="s", periods=1000),
        columns=["A", "B", "C"],
    )
-   r = df.resample("3T")
+   r = df.resample("3min")
    r.mean()
 
 We can select a specific column or columns using standard getitem.
@@ -2155,7 +2165,7 @@ Passing a string representing a lower frequency than ``PeriodIndex`` returns par
    dfp = pd.DataFrame(
        np.random.randn(600, 1),
        columns=["A"],
-       index=pd.period_range("2013-01-01 9:00", periods=600, freq="T"),
+       index=pd.period_range("2013-01-01 9:00", periods=600, freq="min"),
    )
    dfp
    dfp.loc["2013-01-01 10H"]

diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst
@@ -669,9 +669,16 @@ Enhancements
   Period conversions in the range of seconds and below were reworked and extended
   up to nanoseconds. Periods in the nanosecond range are now available.
 
-  .. ipython:: python
+  .. code-block:: python
 
-     pd.date_range('2013-01-01', periods=5, freq='5N')
+     In [79]: pd.date_range('2013-01-01', periods=5, freq='5N')
+     Out[79]:
+     DatetimeIndex([          '2013-01-01 00:00:00',
+                    '2013-01-01 00:00:00.000000005',
+                    '2013-01-01 00:00:00.000000010',
+                    '2013-01-01 00:00:00.000000015',
+                    '2013-01-01 00:00:00.000000020'],
+                   dtype='datetime64[ns]', freq='5N')
 
   or with frequency as offset