Skip to content

Commit

Permalink
Implement day_name and month_name to match pandas (#15479)
Browse files Browse the repository at this point in the history
This PR implements the `month_name` and `day_name` datetime methods, matching the equivalent [month_name](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.month_name.html) and [day_name](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.day_name.html) methods from pandas.

Currently this is implemented just for English locale, though it could be expanded to include additional languages in the future. 

Closes #12407

Authors:
  - Brian Tepera (https://github.com/btepera)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15479
  • Loading branch information
btepera authored Jun 3, 2024
1 parent e66f4f5 commit ba1299d
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 0 deletions.
29 changes: 29 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import calendar
import datetime
import functools
import locale
Expand Down Expand Up @@ -339,6 +340,34 @@ def element_indexing(self, index: int):
def get_dt_field(self, field: str) -> ColumnBase:
return libcudf.datetime.extract_datetime_component(self, field)

def _get_field_names(
self,
field: Literal["month", "weekday"],
labels: list[str],
locale: str | None = None,
) -> ColumnBase:
if locale is not None:
raise NotImplementedError(
"Setting a locale is currently not supported. "
"Results will be returned in your current locale."
)
col_labels = as_column(labels)
indices = self.get_dt_field(field)
has_nulls = indices.has_nulls()
if has_nulls:
indices = indices.fillna(len(col_labels))
return col_labels.take(indices, nullify=True, check_bounds=has_nulls)

def get_day_names(self, locale: str | None = None) -> ColumnBase:
return self._get_field_names(
"weekday", list(calendar.day_name), locale=locale
)

def get_month_names(self, locale: str | None = None) -> ColumnBase:
return self._get_field_names(
"month", list(calendar.month_name), locale=locale
)

def ceil(self, freq: str) -> ColumnBase:
return libcudf.datetime.ceil_datetime(self, freq)

Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2120,6 +2120,45 @@ def quarter(self):
res = extract_quarter(self._values)
return Index(res, dtype="int8")

@_cudf_nvtx_annotate
def day_name(self, locale: str | None = None) -> Index:
"""
Return the day names. Currently supports English locale only.
Examples
--------
>>> import cudf
>>> datetime_index = cudf.date_range("2016-12-31", "2017-01-08", freq="D")
>>> datetime_index
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07'],
dtype='datetime64[ns]', freq='D')
>>> datetime_index.day_name()
Index(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'], dtype='object')
"""
day_names = self._column.get_day_names(locale)
return Index._from_data({self.name: day_names})

@_cudf_nvtx_annotate
def month_name(self, locale: str | None = None) -> Index:
"""
Return the month names. Currently supports English locale only.
Examples
--------
>>> import cudf
>>> datetime_index = cudf.date_range("2017-12-30", periods=6, freq='W')
>>> datetime_index
DatetimeIndex(['2017-12-30', '2018-01-06', '2018-01-13', '2018-01-20',
'2018-01-27', '2018-02-03'],
dtype='datetime64[ns]', freq='7D')
>>> datetime_index.month_name()
Index(['December', 'January', 'January', 'January', 'January', 'February'], dtype='object')
"""
month_names = self._column.get_month_names(locale)
return Index._from_data({self.name: month_names})

@_cudf_nvtx_annotate
def isocalendar(self):
"""
Expand Down
72 changes: 72 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4201,6 +4201,78 @@ def quarter(self):
name=self.series.name,
)

@_cudf_nvtx_annotate
def day_name(self, locale=None):
"""
Return the day names. Currently supports English locale only.
Examples
--------
>>> import cudf
>>> datetime_series = cudf.Series(cudf.date_range('2016-12-31',
... '2017-01-08', freq='D'))
>>> datetime_series
0 2016-12-31
1 2017-01-01
2 2017-01-02
3 2017-01-03
4 2017-01-04
5 2017-01-05
6 2017-01-06
7 2017-01-07
8 2017-01-08
dtype: datetime64[ns]
>>> datetime_series.dt.day_name()
0 Saturday
1 Sunday
2 Monday
3 Tuesday
4 Wednesday
5 Thursday
6 Friday
7 Saturday
dtype: object
"""
day_names = self.series._column.get_day_names(locale)
return Series._from_data(
ColumnAccessor({None: day_names}),
index=self.series.index,
name=self.series.name,
)

@_cudf_nvtx_annotate
def month_name(self, locale: str | None = None) -> Series:
"""
Return the month names. Currently supports English locale only.
Examples
--------
>>> import cudf
>>> datetime_series = cudf.Series(cudf.date_range("2017-12-30", periods=6, freq='W'))
>>> datetime_series
0 2017-12-30
1 2018-01-06
2 2018-01-13
3 2018-01-20
4 2018-01-27
5 2018-02-03
dtype: datetime64[ns]
>>> datetime_series.dt.month_name()
0 December
1 January
2 January
3 January
4 January
5 February
dtype: object
"""
month_names = self.series._column.get_month_names(locale)
return Series._from_data(
ColumnAccessor({None: month_names}),
index=self.series.index,
name=self.series.name,
)

@_cudf_nvtx_annotate
def isocalendar(self):
"""
Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2419,3 +2419,42 @@ def test_date_range_tz():
result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
assert_eq(result, expected)


@pytest.mark.parametrize("meth", ["day_name", "month_name"])
@pytest.mark.parametrize("klass", [pd.Series, pd.DatetimeIndex])
def test_day_month_name(meth, klass):
data = [
"2020-05-31 08:00:00",
None,
"1999-12-31 18:40:00",
"2000-12-31 04:00:00",
None,
"1900-02-28 07:00:00",
"1800-03-14 07:30:00",
"2100-03-14 07:30:00",
"1970-01-01 00:00:00",
"1969-12-31 12:59:00",
]

p_obj = klass(data, dtype="datetime64[s]")
g_obj = cudf.from_pandas(p_obj)

if klass is pd.Series:
p_obj = p_obj.dt
g_obj = g_obj.dt

expect = getattr(p_obj, meth)()
got = getattr(g_obj, meth)()

assert_eq(expect, got)


@pytest.mark.parametrize("meth", ["day_name", "month_name"])
@pytest.mark.parametrize("klass", [cudf.Series, cudf.DatetimeIndex])
def test_day_month_name_locale_not_implemented(meth, klass):
obj = klass(cudf.date_range("2020-01-01", periods=7))
if klass is cudf.Series:
obj = obj.dt
with pytest.raises(NotImplementedError):
getattr(obj, meth)(locale="pt_BR.utf8")

0 comments on commit ba1299d

Please sign in to comment.