From 07b8859808d1699b13f21362631e54ace6aacdc2 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 6 May 2024 16:41:17 +0000 Subject: [PATCH 1/4] Enable Holiday types --- python/cudf/cudf/pandas/_wrappers/pandas.py | 135 +++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 3c82d571939..2ac4f66532e 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -7,6 +7,21 @@ import sys import pandas as pd +from pandas.tseries.holiday import ( + AbstractHolidayCalendar as pd_AbstractHolidayCalendar, + EasterMonday as pd_EasterMonday, + GoodFriday as pd_GoodFriday, + Holiday as pd_Holiday, + HolidayCalendarFactory as pd_HolidayCalendarFactory, + HolidayCalendarMetaClass as pd_HolidayCalendarMetaClass, + USColumbusDay as pd_USColumbusDay, + USFederalHolidayCalendar as pd_USFederalHolidayCalendar, + USLaborDay as pd_USLaborDay, + USMartinLutherKingJr as pd_USMartinLutherKingJr, + USMemorialDay as pd_USMemorialDay, + USPresidentsDay as pd_USPresidentsDay, + USThanksgivingDay as pd_USThanksgivingDay, +) import cudf @@ -37,7 +52,6 @@ XportReader as pd_XportReader, ) - # TODO(pandas2.1): Can import from pandas.api.typing from pandas.core.resample import ( # isort: skip Resampler as pd_Resampler, @@ -827,6 +841,125 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs): "_SAS7BDATReader", _Unusable, pd_SAS7BDATReader ) +USFederalHolidayCalendar = make_final_proxy_type( + "USFederalHolidayCalendar", + _Unusable, + pd_USFederalHolidayCalendar, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +HolidayCalendarMetaClass = make_final_proxy_type( + "HolidayCalendarMetaClass", + _Unusable, + pd_HolidayCalendarMetaClass, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + + +@register_proxy_func(pd_HolidayCalendarFactory) +def holiday_calendar_factory_wrapper(*args, **kwargs): + # Call the original HolidayCalendarFactory + result = _FunctionProxy(_Unusable(), pd_HolidayCalendarFactory)( + *args, **kwargs + ) + # Return the slow proxy of the result + return result._fsproxy_slow + + +AbstractHolidayCalendar = make_final_proxy_type( + "AbstractHolidayCalendar", + _Unusable, + pd_AbstractHolidayCalendar, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, + meta_class=pd_HolidayCalendarMetaClass, +) + +Holiday = make_final_proxy_type( + "Holiday", + _Unusable, + pd_Holiday, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) +USThanksgivingDay = make_final_proxy_type( + "USThanksgivingDay", + _Unusable, + pd_USThanksgivingDay, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +USColumbusDay = make_final_proxy_type( + "USColumbusDay", + _Unusable, + pd_USColumbusDay, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +USLaborDay = make_final_proxy_type( + "USLaborDay", + _Unusable, + pd_USLaborDay, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +USMemorialDay = make_final_proxy_type( + "USMemorialDay", + _Unusable, + pd_USMemorialDay, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +USMartinLutherKingJr = make_final_proxy_type( + "USMartinLutherKingJr", + _Unusable, + pd_USMartinLutherKingJr, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +USPresidentsDay = make_final_proxy_type( + "USPresidentsDay", + _Unusable, + pd_USPresidentsDay, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + + +GoodFriday = make_final_proxy_type( + "GoodFriday", + _Unusable, + pd_GoodFriday, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) + +EasterMonday = make_final_proxy_type( + "EasterMonday", + _Unusable, + pd_EasterMonday, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), + additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, +) FY5253 = make_final_proxy_type( "FY5253", From 331b9f66fb5ab455c2060b3b82ab57ff0a3eda2b Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 6 May 2024 16:46:46 +0000 Subject: [PATCH 2/4] Add composite metaclass --- python/cudf/cudf/pandas/fast_slow_proxy.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 9d8c174b297..ceb027f4460 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -103,6 +103,19 @@ def __call__(self): _DELETE = object() +def create_composite_metaclass(base_meta, additional_meta): + """ + Dynamically creates a composite metaclass that inherits from both provided metaclasses. + This ensures that the metaclass behaviors of both base_meta and additional_meta are preserved. + """ + + class CompositeMeta(base_meta, additional_meta): + def __new__(cls, name, bases, namespace): + return super().__new__(cls, name, bases, namespace) + + return CompositeMeta + + def make_final_proxy_type( name: str, fast_type: type, @@ -114,6 +127,7 @@ def make_final_proxy_type( additional_attributes: Mapping[str, Any] | None = None, postprocess: Callable[[_FinalProxy, Any, Any], Any] | None = None, bases: Tuple = (), + meta_class=None, ) -> Type[_FinalProxy]: """ Defines a fast-slow proxy type for a pair of "final" fast and slow @@ -217,10 +231,15 @@ def _fsproxy_state(self) -> _State: elif v is not _DELETE: cls_dict[k] = v + if meta_class is None: + meta_class = _FastSlowProxyMeta + else: + meta_class = create_composite_metaclass(_FastSlowProxyMeta, meta_class) + cls = types.new_class( name, (*bases, _FinalProxy), - {"metaclass": _FastSlowProxyMeta}, + {"metaclass": meta_class}, lambda ns: ns.update(cls_dict), ) functools.update_wrapper( From 53dc0d0cc7d12c39079fd902250c314c417201cd Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 8 May 2024 19:54:21 +0000 Subject: [PATCH 3/4] Add tests --- .../cudf_pandas_tests/test_cudf_pandas.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index aa937d3ed4f..04e7ba618d9 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -16,6 +16,7 @@ import pyarrow as pa import pytest from numba import NumbaDeprecationWarning +from pytz import utc from cudf.pandas import LOADED, Profiler from cudf.pandas.fast_slow_proxy import _Unusable @@ -25,6 +26,19 @@ import pandas as xpd import pandas._testing as tm +from pandas.tseries.holiday import ( + AbstractHolidayCalendar, + EasterMonday, + GoodFriday, + Holiday, + USColumbusDay, + USLaborDay, + USMartinLutherKingJr, + USMemorialDay, + USPresidentsDay, + USThanksgivingDay, + get_calendar, +) # Accelerated pandas has the real pandas module as an attribute pd = xpd._fsproxy_slow @@ -1255,3 +1269,68 @@ def test_timedelta(data): xtimedelta = xpd.Timedelta(data) timedelta = pd.Timedelta(data) tm.assert_equal(xtimedelta, timedelta) + + +def test_abstract_holiday_calendar(): + class TestCalendar(AbstractHolidayCalendar): + def __init__(self, name=None, rules=None) -> None: + super().__init__(name=name, rules=rules) + + jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)]) + jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)]) + + # Getting holidays for Jan 1 should not alter results for Jan 2. + expected = xpd.DatetimeIndex(["01-Jan-2015"]).as_unit("ns") + tm.assert_index_equal(jan1.holidays(), expected) + + expected2 = xpd.DatetimeIndex(["02-Jan-2015"]).as_unit("ns") + tm.assert_index_equal(jan2.holidays(), expected2) + + +@pytest.mark.parametrize( + "holiday,start,expected", + [ + (USMemorialDay, datetime.datetime(2015, 7, 1), []), + (USLaborDay, "2015-09-07", [xpd.Timestamp("2015-09-07")]), + (USColumbusDay, "2015-10-12", [xpd.Timestamp("2015-10-12")]), + (USThanksgivingDay, "2015-11-26", [xpd.Timestamp("2015-11-26")]), + (USMartinLutherKingJr, "2015-01-19", [xpd.Timestamp("2015-01-19")]), + (USPresidentsDay, datetime.datetime(2015, 7, 1), []), + (GoodFriday, datetime.datetime(2015, 7, 1), []), + (EasterMonday, "2015-04-06", [xpd.Timestamp("2015-04-06")]), + ("New Year's Day", "2010-12-31", [xpd.Timestamp("2010-12-31")]), + ("Independence Day", "2015-07-03", [xpd.Timestamp("2015-07-03")]), + ("Veterans Day", "2012-11-11", []), + ("Christmas Day", "2011-12-26", [xpd.Timestamp("2011-12-26")]), + ( + "Juneteenth National Independence Day", + "2021-06-18", + [xpd.Timestamp("2021-06-18")], + ), + ("Juneteenth National Independence Day", "2022-06-19", []), + ( + "Juneteenth National Independence Day", + "2022-06-20", + [xpd.Timestamp("2022-06-20")], + ), + ], +) +def test_holidays_within_dates(holiday, start, expected): + # see gh-11477 + # + # Fix holiday behavior where holiday.dates returned dates outside + # start/end date, or observed rules could not be applied because the + # holiday was not in the original date range (e.g., 7/4/2015 -> 7/3/2015). + if isinstance(holiday, str): + calendar = get_calendar("USFederalHolidayCalendar") + holiday = calendar.rule_from_name(holiday) + + assert list(holiday.dates(start, start)) == expected + + # Verify that timezone info is preserved. + assert list( + holiday.dates( + utc.localize(xpd.Timestamp(start)), + utc.localize(xpd.Timestamp(start)), + ) + ) == [utc.localize(dt) for dt in expected] From ff1ef0f179e56626a8e20612b39d55f2b0af87d2 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 8 May 2024 14:56:33 -0500 Subject: [PATCH 4/4] Update test_cudf_pandas.py --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 04e7ba618d9..c11488adf5a 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1316,11 +1316,6 @@ def __init__(self, name=None, rules=None) -> None: ], ) def test_holidays_within_dates(holiday, start, expected): - # see gh-11477 - # - # Fix holiday behavior where holiday.dates returned dates outside - # start/end date, or observed rules could not be applied because the - # holiday was not in the original date range (e.g., 7/4/2015 -> 7/3/2015). if isinstance(holiday, str): calendar = get_calendar("USFederalHolidayCalendar") holiday = calendar.rule_from_name(holiday)