From a9867397c02d2cdba6344860a35465d08436e918 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 22:19:28 +0200 Subject: [PATCH 1/3] fix bug dti.is_year_start breaks on freq custom business day with digit --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/fields.pyi | 2 +- pandas/_libs/tslibs/fields.pyx | 13 ++++++------- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/core/arrays/datetimes.py | 6 +++++- .../tests/indexes/datetimes/test_scalar_compat.py | 7 +++++++ 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 53b6179dbae93..d839c62003d73 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -419,6 +419,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) +- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Missing diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index c6cfd44e9f6ab..bc55e34f3d208 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -16,7 +16,7 @@ def get_date_name_field( def get_start_end_field( dtindex: npt.NDArray[np.int64], field: str, - freqstr: str | None = ..., + freq_name: str | None = ..., month_kw: int = ..., reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index ff4fb4d635d17..8f8060b2a5f83 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -210,7 +210,7 @@ cdef bint _is_on_month(int month, int compare_month, int modby) noexcept nogil: def get_start_end_field( const int64_t[:] dtindex, str field, - str freqstr=None, + str freq_name=None, int month_kw=12, NPY_DATETIMEUNIT reso=NPY_FR_ns, ): @@ -223,7 +223,7 @@ def get_start_end_field( ---------- dtindex : ndarray[int64] field : str - frestr : str or None, default None + freq_name : str or None, default None month_kw : int, default 12 reso : NPY_DATETIMEUNIT, default NPY_FR_ns @@ -243,18 +243,17 @@ def get_start_end_field( out = np.zeros(count, dtype="int8") - if freqstr: - if freqstr == "C": + if freq_name: + if freq_name == "C": raise ValueError(f"Custom business days is not supported by {field}") - is_business = freqstr[0] == "B" + is_business = freq_name[0] == "B" # YearBegin(), BYearBegin() use month = starting month of year. # QuarterBegin(), BQuarterBegin() use startingMonth = starting # month of year. Other offsets use month, startingMonth as ending # month of year. - if (freqstr[0:2] in ["MS", "QS", "YS"]) or ( - freqstr[1:3] in ["MS", "QS", "YS"]): + if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7e499d219243d..9812b65580710 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,7 +579,7 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = freq.freqstr + freqstr = freq.name else: month_kw = 12 freqstr = None diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3961b0a7672f4..b075e3d299ed0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -145,8 +145,12 @@ def f(self): kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + if freq is not None: + freq_name = freq.name + else: + freq_name = None result = fields.get_start_end_field( - values, field, self.freqstr, month_kw, reso=self._creso + values, field, freq_name, month_kw, reso=self._creso ) else: result = fields.get_date_field(values, field, reso=self._creso) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index f766894a993a0..4dba518226ec1 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -328,3 +328,10 @@ def test_dti_is_month_start_custom(self): msg = "Custom business days is not supported by is_month_start" with pytest.raises(ValueError, match=msg): dti.is_month_start + + def test_dti_is_year_start_freq_custom_business_day_with_digit(self): + # GH#58664 + dr = date_range("2020-01-01", periods=2, freq="2C") + msg = "Custom business days is not supported by is_year_start" + with pytest.raises(ValueError, match=msg): + dr.is_year_start From 6bbba80e7b714ac7355a12fb9822f9fbb282b578 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 10 May 2024 16:59:56 +0200 Subject: [PATCH 2/3] rename the variable freqstr --- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9812b65580710..0010497425c02 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,15 +579,15 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = freq.name + freq_name = freq.name else: month_kw = 12 - freqstr = None + freq_name = None val = self._maybe_convert_value_to_local() out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw, self._creso) + field, freq_name, month_kw, self._creso) return out[0] @property From 12355064eaac239facb01961153d47f29382a68e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 11 May 2024 15:49:58 +0200 Subject: [PATCH 3/3] delete double space --- pandas/_libs/tslibs/fields.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 8f8060b2a5f83..399a5c2e96cd5 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -253,7 +253,7 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]: + if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: