Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabled Holiday types in cudf.pandas #15664

Merged
merged 8 commits into from
May 9, 2024
135 changes: 134 additions & 1 deletion python/cudf/cudf/pandas/_wrappers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@
import sys

import pandas as pd
from pandas.tseries.holiday import (
AbstractHolidayCalendar as pd_AbstractHolidayCalendar,
EasterMonday as pd_EasterMonday,
GoodFriday as pd_GoodFriday,
Holiday as pd_Holiday,
HolidayCalendarFactory as pd_HolidayCalendarFactory,
HolidayCalendarMetaClass as pd_HolidayCalendarMetaClass,
USColumbusDay as pd_USColumbusDay,
USFederalHolidayCalendar as pd_USFederalHolidayCalendar,
USLaborDay as pd_USLaborDay,
USMartinLutherKingJr as pd_USMartinLutherKingJr,
USMemorialDay as pd_USMemorialDay,
USPresidentsDay as pd_USPresidentsDay,
USThanksgivingDay as pd_USThanksgivingDay,
)

import cudf

Expand Down Expand Up @@ -37,7 +52,6 @@
XportReader as pd_XportReader,
)


# TODO(pandas2.1): Can import from pandas.api.typing
from pandas.core.resample import ( # isort: skip
Resampler as pd_Resampler,
Expand Down Expand Up @@ -882,6 +896,125 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs):
"_SAS7BDATReader", _Unusable, pd_SAS7BDATReader
)

USFederalHolidayCalendar = make_final_proxy_type(
"USFederalHolidayCalendar",
_Unusable,
pd_USFederalHolidayCalendar,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

HolidayCalendarMetaClass = make_final_proxy_type(
"HolidayCalendarMetaClass",
_Unusable,
pd_HolidayCalendarMetaClass,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)


@register_proxy_func(pd_HolidayCalendarFactory)
def holiday_calendar_factory_wrapper(*args, **kwargs):
# Call the original HolidayCalendarFactory
result = _FunctionProxy(_Unusable(), pd_HolidayCalendarFactory)(
*args, **kwargs
)
# Return the slow proxy of the result
return result._fsproxy_slow


AbstractHolidayCalendar = make_final_proxy_type(
"AbstractHolidayCalendar",
_Unusable,
pd_AbstractHolidayCalendar,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
meta_class=pd_HolidayCalendarMetaClass,
)

Holiday = make_final_proxy_type(
"Holiday",
_Unusable,
pd_Holiday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)
USThanksgivingDay = make_final_proxy_type(
"USThanksgivingDay",
_Unusable,
pd_USThanksgivingDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USColumbusDay = make_final_proxy_type(
"USColumbusDay",
_Unusable,
pd_USColumbusDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USLaborDay = make_final_proxy_type(
"USLaborDay",
_Unusable,
pd_USLaborDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USMemorialDay = make_final_proxy_type(
"USMemorialDay",
_Unusable,
pd_USMemorialDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USMartinLutherKingJr = make_final_proxy_type(
"USMartinLutherKingJr",
_Unusable,
pd_USMartinLutherKingJr,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USPresidentsDay = make_final_proxy_type(
"USPresidentsDay",
_Unusable,
pd_USPresidentsDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)


GoodFriday = make_final_proxy_type(
"GoodFriday",
_Unusable,
pd_GoodFriday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

EasterMonday = make_final_proxy_type(
"EasterMonday",
_Unusable,
pd_EasterMonday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

FY5253 = make_final_proxy_type(
"FY5253",
Expand Down
21 changes: 20 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ def __call__(self):
_DELETE = object()


def create_composite_metaclass(base_meta, additional_meta):
"""
Dynamically creates a composite metaclass that inherits from both provided metaclasses.
This ensures that the metaclass behaviors of both base_meta and additional_meta are preserved.
"""

class CompositeMeta(base_meta, additional_meta):
def __new__(cls, name, bases, namespace):
return super().__new__(cls, name, bases, namespace)

return CompositeMeta


def make_final_proxy_type(
name: str,
fast_type: type,
Expand All @@ -114,6 +127,7 @@ def make_final_proxy_type(
additional_attributes: Mapping[str, Any] | None = None,
postprocess: Callable[[_FinalProxy, Any, Any], Any] | None = None,
bases: Tuple = (),
meta_class=None,
) -> Type[_FinalProxy]:
"""
Defines a fast-slow proxy type for a pair of "final" fast and slow
Expand Down Expand Up @@ -217,10 +231,15 @@ def _fsproxy_state(self) -> _State:
elif v is not _DELETE:
cls_dict[k] = v

if meta_class is None:
meta_class = _FastSlowProxyMeta
else:
meta_class = create_composite_metaclass(_FastSlowProxyMeta, meta_class)

cls = types.new_class(
name,
(*bases, _FinalProxy),
{"metaclass": _FastSlowProxyMeta},
{"metaclass": meta_class},
lambda ns: ns.update(cls_dict),
)
functools.update_wrapper(
Expand Down
74 changes: 74 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pyarrow as pa
import pytest
from numba import NumbaDeprecationWarning
from pytz import utc

from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable
Expand All @@ -25,6 +26,19 @@

import pandas as xpd
import pandas._testing as tm
from pandas.tseries.holiday import (
AbstractHolidayCalendar,
EasterMonday,
GoodFriday,
Holiday,
USColumbusDay,
USLaborDay,
USMartinLutherKingJr,
USMemorialDay,
USPresidentsDay,
USThanksgivingDay,
get_calendar,
)

# Accelerated pandas has the real pandas module as an attribute
pd = xpd._fsproxy_slow
Expand Down Expand Up @@ -1255,3 +1269,63 @@ def test_timedelta(data):
xtimedelta = xpd.Timedelta(data)
timedelta = pd.Timedelta(data)
tm.assert_equal(xtimedelta, timedelta)


def test_abstract_holiday_calendar():
class TestCalendar(AbstractHolidayCalendar):
def __init__(self, name=None, rules=None) -> None:
super().__init__(name=name, rules=rules)

jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)])
jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)])

# Getting holidays for Jan 1 should not alter results for Jan 2.
expected = xpd.DatetimeIndex(["01-Jan-2015"]).as_unit("ns")
tm.assert_index_equal(jan1.holidays(), expected)

expected2 = xpd.DatetimeIndex(["02-Jan-2015"]).as_unit("ns")
tm.assert_index_equal(jan2.holidays(), expected2)


@pytest.mark.parametrize(
"holiday,start,expected",
[
(USMemorialDay, datetime.datetime(2015, 7, 1), []),
(USLaborDay, "2015-09-07", [xpd.Timestamp("2015-09-07")]),
(USColumbusDay, "2015-10-12", [xpd.Timestamp("2015-10-12")]),
(USThanksgivingDay, "2015-11-26", [xpd.Timestamp("2015-11-26")]),
(USMartinLutherKingJr, "2015-01-19", [xpd.Timestamp("2015-01-19")]),
(USPresidentsDay, datetime.datetime(2015, 7, 1), []),
(GoodFriday, datetime.datetime(2015, 7, 1), []),
(EasterMonday, "2015-04-06", [xpd.Timestamp("2015-04-06")]),
("New Year's Day", "2010-12-31", [xpd.Timestamp("2010-12-31")]),
("Independence Day", "2015-07-03", [xpd.Timestamp("2015-07-03")]),
("Veterans Day", "2012-11-11", []),
("Christmas Day", "2011-12-26", [xpd.Timestamp("2011-12-26")]),
(
"Juneteenth National Independence Day",
"2021-06-18",
[xpd.Timestamp("2021-06-18")],
),
("Juneteenth National Independence Day", "2022-06-19", []),
(
"Juneteenth National Independence Day",
"2022-06-20",
[xpd.Timestamp("2022-06-20")],
),
],
)
def test_holidays_within_dates(holiday, start, expected):
if isinstance(holiday, str):
calendar = get_calendar("USFederalHolidayCalendar")
holiday = calendar.rule_from_name(holiday)

assert list(holiday.dates(start, start)) == expected

# Verify that timezone info is preserved.
assert list(
holiday.dates(
utc.localize(xpd.Timestamp(start)),
utc.localize(xpd.Timestamp(start)),
)
) == [utc.localize(dt) for dt in expected]
Loading