From ba1299dfc03e87f11cf021a67d01531ed6afd7f7 Mon Sep 17 00:00:00 2001 From: Brian Tepera Date: Mon, 3 Jun 2024 13:45:09 -0400 Subject: [PATCH] Implement day_name and month_name to match pandas (#15479) This PR implements the `month_name` and `day_name` datetime methods, matching the equivalent [month_name](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.month_name.html) and [day_name](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.day_name.html) methods from pandas. Currently this is implemented just for English locale, though it could be expanded to include additional languages in the future. Closes #12407 Authors: - Brian Tepera (https://github.com/btepera) - GALI PREM SAGAR (https://github.com/galipremsagar) - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15479 --- python/cudf/cudf/core/column/datetime.py | 29 ++++++++++ python/cudf/cudf/core/index.py | 39 +++++++++++++ python/cudf/cudf/core/series.py | 72 ++++++++++++++++++++++++ python/cudf/cudf/tests/test_datetime.py | 39 +++++++++++++ 4 files changed, 179 insertions(+) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index d92a3a00641..27f31c8f500 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -2,6 +2,7 @@ from __future__ import annotations +import calendar import datetime import functools import locale @@ -339,6 +340,34 @@ def element_indexing(self, index: int): def get_dt_field(self, field: str) -> ColumnBase: return libcudf.datetime.extract_datetime_component(self, field) + def _get_field_names( + self, + field: Literal["month", "weekday"], + labels: list[str], + locale: str | None = None, + ) -> ColumnBase: + if locale is not None: + raise NotImplementedError( + "Setting a locale is currently not supported. " + "Results will be returned in your current locale." + ) + col_labels = as_column(labels) + indices = self.get_dt_field(field) + has_nulls = indices.has_nulls() + if has_nulls: + indices = indices.fillna(len(col_labels)) + return col_labels.take(indices, nullify=True, check_bounds=has_nulls) + + def get_day_names(self, locale: str | None = None) -> ColumnBase: + return self._get_field_names( + "weekday", list(calendar.day_name), locale=locale + ) + + def get_month_names(self, locale: str | None = None) -> ColumnBase: + return self._get_field_names( + "month", list(calendar.month_name), locale=locale + ) + def ceil(self, freq: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, freq) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 49bfb150f60..2a75b374a1e 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2120,6 +2120,45 @@ def quarter(self): res = extract_quarter(self._values) return Index(res, dtype="int8") + @_cudf_nvtx_annotate + def day_name(self, locale: str | None = None) -> Index: + """ + Return the day names. Currently supports English locale only. + + Examples + -------- + >>> import cudf + >>> datetime_index = cudf.date_range("2016-12-31", "2017-01-08", freq="D") + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07'], + dtype='datetime64[ns]', freq='D') + >>> datetime_index.day_name() + Index(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday'], dtype='object') + """ + day_names = self._column.get_day_names(locale) + return Index._from_data({self.name: day_names}) + + @_cudf_nvtx_annotate + def month_name(self, locale: str | None = None) -> Index: + """ + Return the month names. Currently supports English locale only. + + Examples + -------- + >>> import cudf + >>> datetime_index = cudf.date_range("2017-12-30", periods=6, freq='W') + >>> datetime_index + DatetimeIndex(['2017-12-30', '2018-01-06', '2018-01-13', '2018-01-20', + '2018-01-27', '2018-02-03'], + dtype='datetime64[ns]', freq='7D') + >>> datetime_index.month_name() + Index(['December', 'January', 'January', 'January', 'January', 'February'], dtype='object') + """ + month_names = self._column.get_month_names(locale) + return Index._from_data({self.name: month_names}) + @_cudf_nvtx_annotate def isocalendar(self): """ diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 908347e389b..a5b204ef346 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4201,6 +4201,78 @@ def quarter(self): name=self.series.name, ) + @_cudf_nvtx_annotate + def day_name(self, locale=None): + """ + Return the day names. Currently supports English locale only. + + Examples + -------- + >>> import cudf + >>> datetime_series = cudf.Series(cudf.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.day_name() + 0 Saturday + 1 Sunday + 2 Monday + 3 Tuesday + 4 Wednesday + 5 Thursday + 6 Friday + 7 Saturday + dtype: object + """ + day_names = self.series._column.get_day_names(locale) + return Series._from_data( + ColumnAccessor({None: day_names}), + index=self.series.index, + name=self.series.name, + ) + + @_cudf_nvtx_annotate + def month_name(self, locale: str | None = None) -> Series: + """ + Return the month names. Currently supports English locale only. + + Examples + -------- + >>> import cudf + >>> datetime_series = cudf.Series(cudf.date_range("2017-12-30", periods=6, freq='W')) + >>> datetime_series + 0 2017-12-30 + 1 2018-01-06 + 2 2018-01-13 + 3 2018-01-20 + 4 2018-01-27 + 5 2018-02-03 + dtype: datetime64[ns] + >>> datetime_series.dt.month_name() + 0 December + 1 January + 2 January + 3 January + 4 January + 5 February + dtype: object + """ + month_names = self.series._column.get_month_names(locale) + return Series._from_data( + ColumnAccessor({None: month_names}), + index=self.series.index, + name=self.series.name, + ) + @_cudf_nvtx_annotate def isocalendar(self): """ diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 46a0dcd315d..4186fff038a 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2419,3 +2419,42 @@ def test_date_range_tz(): result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC") expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC") assert_eq(result, expected) + + +@pytest.mark.parametrize("meth", ["day_name", "month_name"]) +@pytest.mark.parametrize("klass", [pd.Series, pd.DatetimeIndex]) +def test_day_month_name(meth, klass): + data = [ + "2020-05-31 08:00:00", + None, + "1999-12-31 18:40:00", + "2000-12-31 04:00:00", + None, + "1900-02-28 07:00:00", + "1800-03-14 07:30:00", + "2100-03-14 07:30:00", + "1970-01-01 00:00:00", + "1969-12-31 12:59:00", + ] + + p_obj = klass(data, dtype="datetime64[s]") + g_obj = cudf.from_pandas(p_obj) + + if klass is pd.Series: + p_obj = p_obj.dt + g_obj = g_obj.dt + + expect = getattr(p_obj, meth)() + got = getattr(g_obj, meth)() + + assert_eq(expect, got) + + +@pytest.mark.parametrize("meth", ["day_name", "month_name"]) +@pytest.mark.parametrize("klass", [cudf.Series, cudf.DatetimeIndex]) +def test_day_month_name_locale_not_implemented(meth, klass): + obj = klass(cudf.date_range("2020-01-01", periods=7)) + if klass is cudf.Series: + obj = obj.dt + with pytest.raises(NotImplementedError): + getattr(obj, meth)(locale="pt_BR.utf8")