Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement day_name and month_name to match pandas #15479

Merged
merged 10 commits into from
Jun 3, 2024
29 changes: 29 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import calendar
import datetime
import functools
import locale
Expand Down Expand Up @@ -339,6 +340,34 @@ def element_indexing(self, index: int):
def get_dt_field(self, field: str) -> ColumnBase:
return libcudf.datetime.extract_datetime_component(self, field)

def _get_field_names(
self,
field: Literal["month", "weekday"],
labels: list[str],
locale: str | None = None,
) -> ColumnBase:
if locale is not None:
raise NotImplementedError(
"Setting a locale is currently not supported. "
"Results will be returned in your current locale."
)
col_labels = as_column(labels)
indices = self.get_dt_field(field)
has_nulls = indices.has_nulls()
if has_nulls:
indices = indices.fillna(len(col_labels))
return col_labels.take(indices, nullify=True, check_bounds=has_nulls)

def get_day_names(self, locale: str | None = None) -> ColumnBase:
return self._get_field_names(
"weekday", list(calendar.day_name), locale=locale
)

def get_month_names(self, locale: str | None = None) -> ColumnBase:
return self._get_field_names(
"month", list(calendar.month_name), locale=locale
)

def ceil(self, freq: str) -> ColumnBase:
return libcudf.datetime.ceil_datetime(self, freq)

Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2120,6 +2120,45 @@ def quarter(self):
res = extract_quarter(self._values)
return Index(res, dtype="int8")

@_cudf_nvtx_annotate
def day_name(self, locale: str | None = None) -> Index:
"""
Return the day names. Currently supports English locale only.

Examples
--------
>>> import cudf
>>> datetime_index = cudf.date_range("2016-12-31", "2017-01-08", freq="D")
>>> datetime_index
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07'],
dtype='datetime64[ns]', freq='D')
>>> datetime_index.day_name()
Index(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'], dtype='object')
"""
day_names = self._column.get_day_names(locale)
return Index._from_data({self.name: day_names})

@_cudf_nvtx_annotate
def month_name(self, locale: str | None = None) -> Index:
"""
Return the month names. Currently supports English locale only.

Examples
--------
>>> import cudf
>>> datetime_index = cudf.date_range("2017-12-30", periods=6, freq='W')
>>> datetime_index
DatetimeIndex(['2017-12-30', '2018-01-06', '2018-01-13', '2018-01-20',
'2018-01-27', '2018-02-03'],
dtype='datetime64[ns]', freq='7D')
>>> datetime_index.month_name()
Index(['December', 'January', 'January', 'January', 'January', 'February'], dtype='object')
"""
month_names = self._column.get_month_names(locale)
return Index._from_data({self.name: month_names})

@_cudf_nvtx_annotate
def isocalendar(self):
"""
Expand Down
72 changes: 72 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4199,6 +4199,78 @@ def quarter(self):
name=self.series.name,
)

@_cudf_nvtx_annotate
def day_name(self, locale=None):
"""
Return the day names. Currently supports English locale only.

Examples
--------
>>> import cudf
>>> datetime_series = cudf.Series(cudf.date_range('2016-12-31',
... '2017-01-08', freq='D'))
>>> datetime_series
0 2016-12-31
1 2017-01-01
2 2017-01-02
3 2017-01-03
4 2017-01-04
5 2017-01-05
6 2017-01-06
7 2017-01-07
8 2017-01-08
dtype: datetime64[ns]
>>> datetime_series.dt.day_name()
0 Saturday
1 Sunday
2 Monday
3 Tuesday
4 Wednesday
5 Thursday
6 Friday
7 Saturday
dtype: object
"""
day_names = self.series._column.get_day_names(locale)
return Series._from_data(
ColumnAccessor({None: day_names}),
index=self.series.index,
name=self.series.name,
)

@_cudf_nvtx_annotate
def month_name(self, locale: str | None = None) -> Series:
"""
Return the month names. Currently supports English locale only.

Examples
--------
>>> import cudf
>>> datetime_series = cudf.Series(cudf.date_range("2017-12-30", periods=6, freq='W'))
>>> datetime_series
0 2017-12-30
1 2018-01-06
2 2018-01-13
3 2018-01-20
4 2018-01-27
5 2018-02-03
dtype: datetime64[ns]
>>> datetime_series.dt.month_name()
0 December
1 January
2 January
3 January
4 January
5 February
dtype: object
"""
month_names = self.series._column.get_month_names(locale)
return Series._from_data(
ColumnAccessor({None: month_names}),
index=self.series.index,
name=self.series.name,
)

@_cudf_nvtx_annotate
def isocalendar(self):
"""
Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2419,3 +2419,42 @@ def test_date_range_tz():
result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
assert_eq(result, expected)


@pytest.mark.parametrize("meth", ["day_name", "month_name"])
@pytest.mark.parametrize("klass", [pd.Series, pd.DatetimeIndex])
def test_day_month_name(meth, klass):
data = [
"2020-05-31 08:00:00",
None,
"1999-12-31 18:40:00",
"2000-12-31 04:00:00",
None,
"1900-02-28 07:00:00",
"1800-03-14 07:30:00",
"2100-03-14 07:30:00",
"1970-01-01 00:00:00",
"1969-12-31 12:59:00",
]

p_obj = klass(data, dtype="datetime64[s]")
g_obj = cudf.from_pandas(p_obj)

if klass is pd.Series:
p_obj = p_obj.dt
g_obj = g_obj.dt

expect = getattr(p_obj, meth)()
got = getattr(g_obj, meth)()

assert_eq(expect, got)


@pytest.mark.parametrize("meth", ["day_name", "month_name"])
@pytest.mark.parametrize("klass", [cudf.Series, cudf.DatetimeIndex])
def test_day_month_name_locale_not_implemented(meth, klass):
obj = klass(cudf.date_range("2020-01-01", periods=7))
if klass is cudf.Series:
obj = obj.dt
with pytest.raises(NotImplementedError):
getattr(obj, meth)(locale="pt_BR.utf8")
Loading