Skip to content

Commit

Permalink
Enabled Holiday types in cudf.pandas (#15664)
Browse files Browse the repository at this point in the history
Fixes: #15663 

This PR enables `Holiday` types in `cudf.pandas` by also adding a utility to create a composite meta class.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #15664
  • Loading branch information
galipremsagar authored May 9, 2024
1 parent c0c38eb commit c576e97
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 2 deletions.
135 changes: 134 additions & 1 deletion python/cudf/cudf/pandas/_wrappers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@
import sys

import pandas as pd
from pandas.tseries.holiday import (
AbstractHolidayCalendar as pd_AbstractHolidayCalendar,
EasterMonday as pd_EasterMonday,
GoodFriday as pd_GoodFriday,
Holiday as pd_Holiday,
HolidayCalendarFactory as pd_HolidayCalendarFactory,
HolidayCalendarMetaClass as pd_HolidayCalendarMetaClass,
USColumbusDay as pd_USColumbusDay,
USFederalHolidayCalendar as pd_USFederalHolidayCalendar,
USLaborDay as pd_USLaborDay,
USMartinLutherKingJr as pd_USMartinLutherKingJr,
USMemorialDay as pd_USMemorialDay,
USPresidentsDay as pd_USPresidentsDay,
USThanksgivingDay as pd_USThanksgivingDay,
)

import cudf

Expand Down Expand Up @@ -37,7 +52,6 @@
XportReader as pd_XportReader,
)


# TODO(pandas2.1): Can import from pandas.api.typing
from pandas.core.resample import ( # isort: skip
Resampler as pd_Resampler,
Expand Down Expand Up @@ -882,6 +896,125 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs):
"_SAS7BDATReader", _Unusable, pd_SAS7BDATReader
)

USFederalHolidayCalendar = make_final_proxy_type(
"USFederalHolidayCalendar",
_Unusable,
pd_USFederalHolidayCalendar,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

HolidayCalendarMetaClass = make_final_proxy_type(
"HolidayCalendarMetaClass",
_Unusable,
pd_HolidayCalendarMetaClass,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)


@register_proxy_func(pd_HolidayCalendarFactory)
def holiday_calendar_factory_wrapper(*args, **kwargs):
# Call the original HolidayCalendarFactory
result = _FunctionProxy(_Unusable(), pd_HolidayCalendarFactory)(
*args, **kwargs
)
# Return the slow proxy of the result
return result._fsproxy_slow


AbstractHolidayCalendar = make_final_proxy_type(
"AbstractHolidayCalendar",
_Unusable,
pd_AbstractHolidayCalendar,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
meta_class=pd_HolidayCalendarMetaClass,
)

Holiday = make_final_proxy_type(
"Holiday",
_Unusable,
pd_Holiday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)
USThanksgivingDay = make_final_proxy_type(
"USThanksgivingDay",
_Unusable,
pd_USThanksgivingDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USColumbusDay = make_final_proxy_type(
"USColumbusDay",
_Unusable,
pd_USColumbusDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USLaborDay = make_final_proxy_type(
"USLaborDay",
_Unusable,
pd_USLaborDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USMemorialDay = make_final_proxy_type(
"USMemorialDay",
_Unusable,
pd_USMemorialDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USMartinLutherKingJr = make_final_proxy_type(
"USMartinLutherKingJr",
_Unusable,
pd_USMartinLutherKingJr,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

USPresidentsDay = make_final_proxy_type(
"USPresidentsDay",
_Unusable,
pd_USPresidentsDay,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)


GoodFriday = make_final_proxy_type(
"GoodFriday",
_Unusable,
pd_GoodFriday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

EasterMonday = make_final_proxy_type(
"EasterMonday",
_Unusable,
pd_EasterMonday,
fast_to_slow=_Unusable(),
slow_to_fast=_Unusable(),
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")},
)

FY5253 = make_final_proxy_type(
"FY5253",
Expand Down
21 changes: 20 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ def __call__(self):
_DELETE = object()


def create_composite_metaclass(base_meta, additional_meta):
"""
Dynamically creates a composite metaclass that inherits from both provided metaclasses.
This ensures that the metaclass behaviors of both base_meta and additional_meta are preserved.
"""

class CompositeMeta(base_meta, additional_meta):
def __new__(cls, name, bases, namespace):
return super().__new__(cls, name, bases, namespace)

return CompositeMeta


def make_final_proxy_type(
name: str,
fast_type: type,
Expand All @@ -114,6 +127,7 @@ def make_final_proxy_type(
additional_attributes: Mapping[str, Any] | None = None,
postprocess: Callable[[_FinalProxy, Any, Any], Any] | None = None,
bases: Tuple = (),
meta_class=None,
) -> Type[_FinalProxy]:
"""
Defines a fast-slow proxy type for a pair of "final" fast and slow
Expand Down Expand Up @@ -217,10 +231,15 @@ def _fsproxy_state(self) -> _State:
elif v is not _DELETE:
cls_dict[k] = v

if meta_class is None:
meta_class = _FastSlowProxyMeta
else:
meta_class = create_composite_metaclass(_FastSlowProxyMeta, meta_class)

cls = types.new_class(
name,
(*bases, _FinalProxy),
{"metaclass": _FastSlowProxyMeta},
{"metaclass": meta_class},
lambda ns: ns.update(cls_dict),
)
functools.update_wrapper(
Expand Down
74 changes: 74 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pyarrow as pa
import pytest
from numba import NumbaDeprecationWarning
from pytz import utc

from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable
Expand All @@ -25,6 +26,19 @@

import pandas as xpd
import pandas._testing as tm
from pandas.tseries.holiday import (
AbstractHolidayCalendar,
EasterMonday,
GoodFriday,
Holiday,
USColumbusDay,
USLaborDay,
USMartinLutherKingJr,
USMemorialDay,
USPresidentsDay,
USThanksgivingDay,
get_calendar,
)

# Accelerated pandas has the real pandas module as an attribute
pd = xpd._fsproxy_slow
Expand Down Expand Up @@ -1311,3 +1325,63 @@ def test_timedelta(data):
xtimedelta = xpd.Timedelta(data)
timedelta = pd.Timedelta(data)
tm.assert_equal(xtimedelta, timedelta)


def test_abstract_holiday_calendar():
class TestCalendar(AbstractHolidayCalendar):
def __init__(self, name=None, rules=None) -> None:
super().__init__(name=name, rules=rules)

jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)])
jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)])

# Getting holidays for Jan 1 should not alter results for Jan 2.
expected = xpd.DatetimeIndex(["01-Jan-2015"]).as_unit("ns")
tm.assert_index_equal(jan1.holidays(), expected)

expected2 = xpd.DatetimeIndex(["02-Jan-2015"]).as_unit("ns")
tm.assert_index_equal(jan2.holidays(), expected2)


@pytest.mark.parametrize(
"holiday,start,expected",
[
(USMemorialDay, datetime.datetime(2015, 7, 1), []),
(USLaborDay, "2015-09-07", [xpd.Timestamp("2015-09-07")]),
(USColumbusDay, "2015-10-12", [xpd.Timestamp("2015-10-12")]),
(USThanksgivingDay, "2015-11-26", [xpd.Timestamp("2015-11-26")]),
(USMartinLutherKingJr, "2015-01-19", [xpd.Timestamp("2015-01-19")]),
(USPresidentsDay, datetime.datetime(2015, 7, 1), []),
(GoodFriday, datetime.datetime(2015, 7, 1), []),
(EasterMonday, "2015-04-06", [xpd.Timestamp("2015-04-06")]),
("New Year's Day", "2010-12-31", [xpd.Timestamp("2010-12-31")]),
("Independence Day", "2015-07-03", [xpd.Timestamp("2015-07-03")]),
("Veterans Day", "2012-11-11", []),
("Christmas Day", "2011-12-26", [xpd.Timestamp("2011-12-26")]),
(
"Juneteenth National Independence Day",
"2021-06-18",
[xpd.Timestamp("2021-06-18")],
),
("Juneteenth National Independence Day", "2022-06-19", []),
(
"Juneteenth National Independence Day",
"2022-06-20",
[xpd.Timestamp("2022-06-20")],
),
],
)
def test_holidays_within_dates(holiday, start, expected):
if isinstance(holiday, str):
calendar = get_calendar("USFederalHolidayCalendar")
holiday = calendar.rule_from_name(holiday)

assert list(holiday.dates(start, start)) == expected

# Verify that timezone info is preserved.
assert list(
holiday.dates(
utc.localize(xpd.Timestamp(start)),
utc.localize(xpd.Timestamp(start)),
)
) == [utc.localize(dt) for dt in expected]

0 comments on commit c576e97

Please sign in to comment.