rapidsai · isVoid · Nov 16, 2021 · Nov 16, 2021 · Nov 18, 2021 · Nov 24, 2021
@@ -38,6 +38,8 @@
     "min": "m",
     "s": "s",
     "S": "s",
+    "L": "ms",
+    "ms": "ms",
     "U": "us",
     "us": "us",
     "N": "ns",
@@ -448,7 +450,6 @@ class DateOffset:
         "ns": "nanoseconds",
         "us": "microseconds",
         "ms": "milliseconds",
-        "L": "milliseconds",
         "s": "seconds",
         "m": "minutes",
         "h": "hours",
@@ -458,7 +459,7 @@ class DateOffset:
         "Y": "years",
     }
 
-    _FREQSTR_REGEX = re.compile("([0-9]*)([a-zA-Z]+)")
+    _FREQSTR_REGEX = re.compile("(-)*([0-9]*)([a-zA-Z]+)")
-    _FREQSTR_REGEX = re.compile("(-)*([0-9]*)([a-zA-Z]+)")
+    _FREQSTR_REGEX = re.compile("([+\-])?(\d*)\s*([A-Za-z]+)")
-    _FREQSTR_REGEX = re.compile("(-)*([0-9]*)([a-zA-Z]+)")
+    _FREQSTR_REGEX = re.compile("([+\-])?(\d*)\s*([A-Za-z]+)")
 
     def __init__(self, n=1, normalize=False, **kwds):
         if normalize:
@@ -629,27 +630,54 @@ def __repr__(self):
         return repr_str
 
     @classmethod
-    def _from_freqstr(cls: Type[_T], freqstr: str) -> _T:
+    def _from_str(cls: Type[_T], freqstr: str) -> _T:
         """
-        Parse a string and return a DateOffset object
-        expects strings of the form 3D, 25W, 10ms, 42ns, etc.
-        """
-        match = cls._FREQSTR_REGEX.match(freqstr)
+        Parse a string and return a DateOffset object.
+
+        A string can be a pandas `offset alias`<https://pandas.pydata.org/\
+            pandas-docs/stable/user_guide/timeseries.html#offset-aliases>_ or a
+        numpy `date/time unit code`<https://numpy.org/doc/stable/reference/arr\
+            ays.datetime.html#datetime-units>_
+
+        Note that ``m`` (lower case) is ambiguous and is not accepted in this
+        function. Use ``T``/``min`` for minutely frequency and ``M`` (upper
+        case) for monthly frequency.
+
+        Expects strings of the form 3D, 25W, -10ms, 42ns, etc.
 
+        Not all offset aliases are supported. See `_offset_alias_to_code` and
+        `_CODE_TO_UNITS` for supported list of strings.
+        """
+        match = cls._FREQSTR_REGEX.fullmatch(freqstr)
         if match is None:
             raise ValueError(f"Invalid frequency string: {freqstr}")
 
-        numeric_part = match.group(1)
-        if numeric_part == "":
-            numeric_part = "1"
-        freq_part = match.group(2)
+        # Decompose the string into separate components
+        sign_part, numeric_part, freq_part = match.groups()
+
+        # Handle various offset strings and normalize as codes
+        if freq_part == "m":
+            raise ValueError(
+                "Lower cased `m` is ambiguous. Use 'T'/'min' to specify "
+                "minutely frequency or upper cased `M` to specify monthly "
+                "frequency."
+            )
 
-        if freq_part not in cls._CODES_TO_UNITS:
+        if freq_part in _offset_alias_to_code:
+            code = _offset_alias_to_code[freq_part]
+        elif freq_part in cls._CODES_TO_UNITS:
+            code = freq_part
+        else:
             raise ValueError(f"Cannot interpret frequency str: {freqstr}")
 
-        return cls(**{cls._CODES_TO_UNITS[freq_part]: int(numeric_part)})
+        # Handle sign and numerics
+        sign = -1 if sign_part else 1
-        sign = -1 if sign_part else 1
+        sign = -1 if sign_part.startswith("-") else 1
-        sign = -1 if sign_part else 1
+        sign = -1 if sign_part.startswith("-") else 1
+        n = int(numeric_part) if numeric_part else 1
+
+        # Construct the kwds dictionary
+        return cls(**{cls._CODES_TO_UNITS[code]: n * sign})
 
-    def _maybe_as_fast_pandas_offset(self):
+    def _maybe_as_fast_pandas_offset(self) -> pd.DateOffset:
         if (
             len(self.kwds) == 1
             and _has_fixed_frequency(self)
@@ -814,23 +842,18 @@ def date_range(
     if isinstance(freq, DateOffset):
         offset = freq
     elif isinstance(freq, str):
-        # Map pandas `offset alias` into cudf DateOffset `CODE`, only
-        # fixed-frequency, non-anchored offset aliases are supported.
-        mo = re.fullmatch(
-            rf'(-)*(\d*)({"|".join(_offset_alias_to_code.keys())})', freq
-        )
-        if mo is None:
+        if (
+            any(
+                x in freq.upper()
+                for x in {"Y", "A", "Q", "B", "SM", "SMS", "CBMS", "M"}
+            )
+            or "MS" in freq
+        ):
             raise ValueError(
-                f"Unrecognized or unsupported offset alias {freq}."
+                "date_range does not yet support month, quarter, year-anchored"
+                "or business-date frequency."
             )
-
-        sign, n, offset_alias = mo.groups()
-        code = _offset_alias_to_code[offset_alias]
-
-        freq = "".join([n, code])
-        offset = DateOffset._from_freqstr(freq)
-        if sign:
-            offset.kwds.update({s: -i for s, i in offset.kwds.items()})
+        offset = DateOffset._from_str(freq)
     else:
         raise TypeError("`freq` must be a `str` or cudf.DateOffset object.")
 

@@ -1583,6 +1583,46 @@ def test_date_range_raise_overflow():
         cudf.date_range(start=start, periods=periods, freq=freq)
 
 
+@pytest.mark.parametrize(
+    "freqstr_unsupported",
+    [
+        "1M",
+        "2SM",
+        "3MS",
+        "4BM",
+        "5CBM",
+        "6SMS",
+        "7BMS",
+        "8CBMS",
+        "Q",
+        "2BQ",
+        "3BQS",
+        "10A",
+        "10Y",
+        "9BA",
+        "9BY",
+        "8AS",
+        "8YS",
+        "7BAS",
+        "7BYS",
+        "BH",
+        "B",
+    ],
+)
+def test_date_range_raise_unsupported(freqstr_unsupported):
+    s, e = "2001-01-01", "2008-01-31"
+    pd.date_range(start=s, end=e, freq=freqstr_unsupported)
+    with pytest.raises(ValueError, match="does not yet support"):
+        cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+    # 3ms would mean a millisecondly frequencies, not month start frequencies
+    if not freqstr_unsupported == "3MS":
+        freqstr_unsupported = freqstr_unsupported.lower()
+        pd.date_range(start=s, end=e, freq=freqstr_unsupported)
+        with pytest.raises(ValueError, match="does not yet support"):
+            cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+
 ##################################################################
 #                    End of Date Range Test                      #
 ##################################################################