From b084d747075baa9b36e2cdad7efb1fa277eb4e5d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 28 Nov 2024 06:45:01 -0800 Subject: [PATCH] Remove cudf._lib.datetime in favor of inlining pylibcudf (#17372) Contributes to https://github.com/rapidsai/cudf/issues/17317 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/17372 --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - python/cudf/cudf/_lib/__init__.py | 1 - python/cudf/cudf/_lib/datetime.pyx | 183 ----------------------- python/cudf/cudf/core/column/datetime.py | 156 ++++++++++++++----- python/cudf/cudf/core/index.py | 39 ++--- python/cudf/cudf/core/series.py | 60 +++----- python/cudf/cudf/core/tools/datetimes.py | 23 ++- 7 files changed, 176 insertions(+), 287 deletions(-) delete mode 100644 python/cudf/cudf/_lib/datetime.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 9806ae11339..8ed5d5b896c 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -18,7 +18,6 @@ set(cython_sources column.pyx copying.pyx csv.pyx - datetime.pyx filling.pyx groupby.pyx interop.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 9af1dc976a6..b71c5ea73d6 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -5,7 +5,6 @@ binaryop, copying, csv, - datetime, filling, groupby, interop, diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx deleted file mode 100644 index 7e8f29dac93..00000000000 --- a/python/cudf/cudf/_lib/datetime.pyx +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -import warnings - -from cudf.core.buffer import acquire_spill_lock - -cimport pylibcudf.libcudf.datetime as libcudf_datetime -from pylibcudf.libcudf.types cimport size_type -from pylibcudf.datetime import DatetimeComponent, RoundingFrequency - -from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar -import pylibcudf as plc - - -@acquire_spill_lock() -def add_months(Column col, Column months): - # months must be int16 dtype - return Column.from_pylibcudf( - plc.datetime.add_calendrical_months( - col.to_pylibcudf(mode="read"), - months.to_pylibcudf(mode="read") - ) - ) - - -@acquire_spill_lock() -def extract_datetime_component(Column col, object field): - component_names = { - "year": DatetimeComponent.YEAR, - "month": DatetimeComponent.MONTH, - "day": DatetimeComponent.DAY, - "weekday": DatetimeComponent.WEEKDAY, - "hour": DatetimeComponent.HOUR, - "minute": DatetimeComponent.MINUTE, - "second": DatetimeComponent.SECOND, - "millisecond": DatetimeComponent.MILLISECOND, - "microsecond": DatetimeComponent.MICROSECOND, - "nanosecond": DatetimeComponent.NANOSECOND, - } - if field == "day_of_year": - result = Column.from_pylibcudf( - plc.datetime.day_of_year( - col.to_pylibcudf(mode="read") - ) - ) - elif field in component_names: - result = Column.from_pylibcudf( - plc.datetime.extract_datetime_component( - col.to_pylibcudf(mode="read"), - component_names[field], - ) - ) - if field == "weekday": - # Pandas counts Monday-Sunday as 0-6 - # while libcudf counts Monday-Sunday as 1-7 - result = result - result.dtype.type(1) - else: - raise ValueError(f"Invalid field: '{field}'") - - return result - - -cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq): - # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html - old_to_new_freq_map = { - "H": "h", - "N": "ns", - "T": "min", - "L": "ms", - "U": "us", - "S": "s", - } - if freq in old_to_new_freq_map: - warnings.warn( - f"FutureWarning: {freq} is deprecated and will be " - "removed in a future version, please use " - f"{old_to_new_freq_map[freq]} instead.", - FutureWarning - ) - freq = old_to_new_freq_map.get(freq) - rounding_fequency_map = { - "D": RoundingFrequency.DAY, - "h": RoundingFrequency.HOUR, - "min": RoundingFrequency.MINUTE, - "s": RoundingFrequency.SECOND, - "ms": RoundingFrequency.MILLISECOND, - "us": RoundingFrequency.MICROSECOND, - "ns": RoundingFrequency.NANOSECOND, - } - if freq in rounding_fequency_map: - return rounding_fequency_map[freq] - else: - raise ValueError(f"Invalid resolution: '{freq}'") - - -@acquire_spill_lock() -def ceil_datetime(Column col, object freq): - return Column.from_pylibcudf( - plc.datetime.ceil_datetimes( - col.to_pylibcudf(mode="read"), - _get_rounding_frequency(freq), - ) - ) - - -@acquire_spill_lock() -def floor_datetime(Column col, object freq): - return Column.from_pylibcudf( - plc.datetime.floor_datetimes( - col.to_pylibcudf(mode="read"), - _get_rounding_frequency(freq), - ) - ) - - -@acquire_spill_lock() -def round_datetime(Column col, object freq): - return Column.from_pylibcudf( - plc.datetime.round_datetimes( - col.to_pylibcudf(mode="read"), - _get_rounding_frequency(freq), - ) - ) - - -@acquire_spill_lock() -def is_leap_year(Column col): - """Returns a boolean indicator whether the year of the date is a leap year - """ - return Column.from_pylibcudf( - plc.datetime.is_leap_year( - col.to_pylibcudf(mode="read") - ) - ) - - -@acquire_spill_lock() -def date_range(DeviceScalar start, size_type n, offset): - cdef size_type months = ( - offset.kwds.get("years", 0) * 12 - + offset.kwds.get("months", 0) - ) - return Column.from_pylibcudf( - plc.filling.calendrical_month_sequence( - n, - start.c_value, - months, - ) - ) - - -@acquire_spill_lock() -def extract_quarter(Column col): - """ - Returns a column which contains the corresponding quarter of the year - for every timestamp inside the input column. - """ - return Column.from_pylibcudf( - plc.datetime.extract_quarter( - col.to_pylibcudf(mode="read") - ) - ) - - -@acquire_spill_lock() -def days_in_month(Column col): - """Extracts the number of days in the month of the date - """ - return Column.from_pylibcudf( - plc.datetime.days_in_month( - col.to_pylibcudf(mode="read") - ) - ) - - -@acquire_spill_lock() -def last_day_of_month(Column col): - return Column.from_pylibcudf( - plc.datetime.last_day_of_month( - col.to_pylibcudf(mode="read") - ) - ) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b40ea4eedd3..24b55fe1bc2 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -7,6 +7,7 @@ import functools import locale import re +import warnings from locale import nl_langinfo from typing import TYPE_CHECKING, Literal, cast @@ -36,7 +37,7 @@ ) if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Callable, Sequence from cudf._typing import ( ColumnBinaryOperand, @@ -267,43 +268,60 @@ def time_unit(self) -> str: @property def quarter(self) -> ColumnBase: - return libcudf.datetime.extract_quarter(self) + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.extract_quarter(self.to_pylibcudf(mode="read")) + ) @property def year(self) -> ColumnBase: - return self.get_dt_field("year") + return self._get_dt_field(plc.datetime.DatetimeComponent.YEAR) @property def month(self) -> ColumnBase: - return self.get_dt_field("month") + return self._get_dt_field(plc.datetime.DatetimeComponent.MONTH) @property def day(self) -> ColumnBase: - return self.get_dt_field("day") + return self._get_dt_field(plc.datetime.DatetimeComponent.DAY) @property def hour(self) -> ColumnBase: - return self.get_dt_field("hour") + return self._get_dt_field(plc.datetime.DatetimeComponent.HOUR) @property def minute(self) -> ColumnBase: - return self.get_dt_field("minute") + return self._get_dt_field(plc.datetime.DatetimeComponent.MINUTE) @property def second(self) -> ColumnBase: - return self.get_dt_field("second") + return self._get_dt_field(plc.datetime.DatetimeComponent.SECOND) @property - def weekday(self) -> ColumnBase: - return self.get_dt_field("weekday") + def millisecond(self) -> ColumnBase: + return self._get_dt_field(plc.datetime.DatetimeComponent.MILLISECOND) + + @property + def microsecond(self) -> ColumnBase: + return self._get_dt_field(plc.datetime.DatetimeComponent.MICROSECOND) @property - def dayofyear(self) -> ColumnBase: - return self.get_dt_field("day_of_year") + def nanosecond(self) -> ColumnBase: + return self._get_dt_field(plc.datetime.DatetimeComponent.NANOSECOND) + + @property + def weekday(self) -> ColumnBase: + # pandas counts Monday-Sunday as 0-6 + # while libcudf counts Monday-Sunday as 1-7 + result = self._get_dt_field(plc.datetime.DatetimeComponent.WEEKDAY) + return result - result.dtype.type(1) @property def day_of_year(self) -> ColumnBase: - return self.get_dt_field("day_of_year") + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.day_of_year(self.to_pylibcudf(mode="read")) + ) @property def is_month_start(self) -> ColumnBase: @@ -311,8 +329,11 @@ def is_month_start(self) -> ColumnBase: @property def is_month_end(self) -> ColumnBase: - last_day_col = libcudf.datetime.last_day_of_month(self) - return (self.day == last_day_col.day).fillna(False) + with acquire_spill_lock(): + last_day_col = type(self).from_pylibcudf( + plc.datetime.last_day_of_month(self.to_pylibcudf(mode="read")) + ) + return (self.day == last_day_col.day).fillna(False) # type: ignore[attr-defined] @property def is_quarter_end(self) -> ColumnBase: @@ -337,7 +358,10 @@ def is_year_end(self) -> ColumnBase: @property def is_leap_year(self) -> ColumnBase: - return libcudf.datetime.is_leap_year(self) + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.is_leap_year(self.to_pylibcudf(mode="read")) + ) @property def is_year_start(self) -> ColumnBase: @@ -345,7 +369,10 @@ def is_year_start(self) -> ColumnBase: @property def days_in_month(self) -> ColumnBase: - return libcudf.datetime.days_in_month(self) + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.days_in_month(self.to_pylibcudf(mode="read")) + ) @property def day_of_week(self) -> ColumnBase: @@ -380,8 +407,16 @@ def element_indexing(self, index: int): return pd.Timestamp(result) return result - def get_dt_field(self, field: str) -> ColumnBase: - return libcudf.datetime.extract_datetime_component(self, field) + def _get_dt_field( + self, field: plc.datetime.DatetimeComponent + ) -> ColumnBase: + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.extract_datetime_component( + self.to_pylibcudf(mode="read"), + field, + ) + ) def _get_field_names( self, @@ -395,7 +430,7 @@ def _get_field_names( "Results will be returned in your current locale." ) col_labels = as_column(labels) - indices = self.get_dt_field(field) + indices = getattr(self, field) has_nulls = indices.has_nulls() if has_nulls: indices = indices.fillna(len(col_labels)) @@ -411,14 +446,58 @@ def get_month_names(self, locale: str | None = None) -> ColumnBase: "month", list(calendar.month_name), locale=locale ) + def _round_dt( + self, + round_func: Callable[ + [plc.Column, plc.datetime.RoundingFrequency], plc.Column + ], + freq: str, + ) -> ColumnBase: + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html + old_to_new_freq_map = { + "H": "h", + "N": "ns", + "T": "min", + "L": "ms", + "U": "us", + "S": "s", + } + if freq in old_to_new_freq_map: + warnings.warn( + f"{freq} is deprecated and will be " + "removed in a future version, please use " + f"{old_to_new_freq_map[freq]} instead.", + FutureWarning, + ) + freq = old_to_new_freq_map[freq] + rounding_fequency_map = { + "D": plc.datetime.RoundingFrequency.DAY, + "h": plc.datetime.RoundingFrequency.HOUR, + "min": plc.datetime.RoundingFrequency.MINUTE, + "s": plc.datetime.RoundingFrequency.SECOND, + "ms": plc.datetime.RoundingFrequency.MILLISECOND, + "us": plc.datetime.RoundingFrequency.MICROSECOND, + "ns": plc.datetime.RoundingFrequency.NANOSECOND, + } + if (plc_freq := rounding_fequency_map.get(freq)) is None: + raise ValueError(f"Invalid resolution: '{freq}'") + + with acquire_spill_lock(): + return type(self).from_pylibcudf( + round_func( + self.to_pylibcudf(mode="read"), + plc_freq, + ) + ) + def ceil(self, freq: str) -> ColumnBase: - return libcudf.datetime.ceil_datetime(self, freq) + return self._round_dt(plc.datetime.ceil_datetimes, freq) def floor(self, freq: str) -> ColumnBase: - return libcudf.datetime.floor_datetime(self, freq) + return self._round_dt(plc.datetime.floor_datetimes, freq) def round(self, freq: str) -> ColumnBase: - return libcudf.datetime.round_datetime(self, freq) + return self._round_dt(plc.datetime.round_datetimes, freq) def isocalendar(self) -> dict[str, ColumnBase]: return { @@ -537,21 +616,16 @@ def as_string_column(self) -> cudf.core.column.StringColumn: else: sub_second_res_len = 0 - has_nanos = ( - self.time_unit in {"ns"} - and self.get_dt_field("nanosecond").any() - ) + has_nanos = self.time_unit == "ns" and self.nanosecond.any() has_micros = ( - self.time_unit in {"ns", "us"} - and self.get_dt_field("microsecond").any() + self.time_unit in {"ns", "us"} and self.microsecond.any() ) has_millis = ( - self.time_unit in {"ns", "us", "ms"} - and self.get_dt_field("millisecond").any() + self.time_unit in {"ns", "us", "ms"} and self.millisecond.any() ) - has_seconds = self.get_dt_field("second").any() - has_minutes = self.get_dt_field("minute").any() - has_hours = self.get_dt_field("hour").any() + has_seconds = self.second.any() + has_minutes = self.minute.any() + has_hours = self.hour.any() if sub_second_res_len: if has_nanos: # format should be intact and rest of the @@ -984,10 +1058,16 @@ def as_datetime_column(self, dtype: Dtype) -> DatetimeColumn: return casted.tz_convert(str(dtype.tz)) return super().as_datetime_column(dtype) - def get_dt_field(self, field: str) -> ColumnBase: - return libcudf.datetime.extract_datetime_component( - self._local_time, field - ) + def _get_dt_field( + self, field: plc.datetime.DatetimeComponent + ) -> ColumnBase: + with acquire_spill_lock(): + return type(self).from_pylibcudf( + plc.datetime.extract_datetime_component( + self._local_time.to_pylibcudf(mode="read"), + field, + ) + ) def __repr__(self): # Arrow prints the UTC timestamps, but we want to print the diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 80e037c36fd..ff9cd310aef 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2219,7 +2219,7 @@ def year(self) -> Index: >>> datetime_index.year Index([2000, 2001, 2002], dtype='int16') """ # noqa: E501 - return self._get_dt_field("year") + return Index._from_column(self._column.year, name=self.name) @property # type: ignore @_performance_tracking @@ -2238,7 +2238,7 @@ def month(self) -> Index: >>> datetime_index.month Index([1, 2, 3], dtype='int16') """ # noqa: E501 - return self._get_dt_field("month") + return Index._from_column(self._column.month, name=self.name) @property # type: ignore @_performance_tracking @@ -2257,7 +2257,7 @@ def day(self) -> Index: >>> datetime_index.day Index([1, 2, 3], dtype='int16') """ # noqa: E501 - return self._get_dt_field("day") + return Index._from_column(self._column.day, name=self.name) @property # type: ignore @_performance_tracking @@ -2278,7 +2278,7 @@ def hour(self) -> Index: >>> datetime_index.hour Index([0, 1, 2], dtype='int16') """ - return self._get_dt_field("hour") + return Index._from_column(self._column.hour, name=self.name) @property # type: ignore @_performance_tracking @@ -2299,7 +2299,7 @@ def minute(self) -> Index: >>> datetime_index.minute Index([0, 1, 2], dtype='int16') """ - return self._get_dt_field("minute") + return Index._from_column(self._column.minute, name=self.name) @property # type: ignore @_performance_tracking @@ -2320,7 +2320,7 @@ def second(self) -> Index: >>> datetime_index.second Index([0, 1, 2], dtype='int16') """ - return self._get_dt_field("second") + return Index._from_column(self._column.second, name=self.name) @property # type: ignore @_performance_tracking @@ -2346,10 +2346,10 @@ def microsecond(self) -> Index: # Need to manually promote column to int32 because # pandas-matching binop behaviour requires that this # __mul__ returns an int16 column. - self._column.get_dt_field("millisecond").astype("int32") + self._column.millisecond.astype("int32") * cudf.Scalar(1000, dtype="int32") ) - + self._column.get_dt_field("microsecond"), + + self._column.microsecond, name=self.name, ) @@ -2373,7 +2373,7 @@ def nanosecond(self) -> Index: >>> datetime_index.nanosecond Index([0, 1, 2], dtype='int16') """ - return self._get_dt_field("nanosecond") + return Index._from_column(self._column.nanosecond, name=self.name) @property # type: ignore @_performance_tracking @@ -2395,7 +2395,7 @@ def weekday(self) -> Index: >>> datetime_index.weekday Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int16') """ - return self._get_dt_field("weekday") + return Index._from_column(self._column.weekday, name=self.name) @property # type: ignore @_performance_tracking @@ -2417,7 +2417,7 @@ def dayofweek(self) -> Index: >>> datetime_index.dayofweek Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int16') """ - return self._get_dt_field("weekday") + return Index._from_column(self._column.weekday, name=self.name) @property # type: ignore @_performance_tracking @@ -2440,7 +2440,7 @@ def dayofyear(self) -> Index: >>> datetime_index.dayofyear Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') """ - return self._get_dt_field("day_of_year") + return Index._from_column(self._column.day_of_year, name=self.name) @property # type: ignore @_performance_tracking @@ -2463,7 +2463,7 @@ def day_of_year(self) -> Index: >>> datetime_index.day_of_year Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') """ - return self._get_dt_field("day_of_year") + return Index._from_column(self._column.day_of_year, name=self.name) @property # type: ignore @_performance_tracking @@ -2584,19 +2584,6 @@ def to_pandas( result.freq = self._freq._maybe_as_fast_pandas_offset() return result - @_performance_tracking - def _get_dt_field(self, field: str) -> Index: - """Return an Index of a numerical component of the DatetimeIndex.""" - out_column = self._column.get_dt_field(field) - out_column = NumericalColumn( - data=out_column.base_data, - size=out_column.size, - dtype=out_column.dtype, - mask=out_column.base_mask, - offset=out_column.offset, - ) - return Index._from_column(out_column, name=self.name) - def _is_boolean(self) -> bool: return False diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 38a789b00f6..95ea22b5ad5 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3963,7 +3963,7 @@ def year(self) -> Series: 2 2002 dtype: int16 """ - return self._get_dt_field("year") + return self._return_result_like_self(self.series._column.year) @property # type: ignore @_performance_tracking @@ -3988,7 +3988,7 @@ def month(self) -> Series: 2 3 dtype: int16 """ - return self._get_dt_field("month") + return self._return_result_like_self(self.series._column.month) @property # type: ignore @_performance_tracking @@ -4013,7 +4013,7 @@ def day(self) -> Series: 2 3 dtype: int16 """ - return self._get_dt_field("day") + return self._return_result_like_self(self.series._column.day) @property # type: ignore @_performance_tracking @@ -4038,7 +4038,7 @@ def hour(self) -> Series: 2 2 dtype: int16 """ - return self._get_dt_field("hour") + return self._return_result_like_self(self.series._column.hour) @property # type: ignore @_performance_tracking @@ -4063,7 +4063,7 @@ def minute(self) -> Series: 2 2 dtype: int16 """ - return self._get_dt_field("minute") + return self._return_result_like_self(self.series._column.minute) @property # type: ignore @_performance_tracking @@ -4088,7 +4088,7 @@ def second(self) -> Series: 2 2 dtype: int16 """ - return self._get_dt_field("second") + return self._return_result_like_self(self.series._column.second) @property # type: ignore @_performance_tracking @@ -4113,13 +4113,13 @@ def microsecond(self) -> Series: 2 2 dtype: int32 """ - micro = self.series._column.get_dt_field("microsecond") + micro = self.series._column.microsecond # Need to manually promote column to int32 because # pandas-matching binop behaviour requires that this # __mul__ returns an int16 column. - extra = self.series._column.get_dt_field("millisecond").astype( - "int32" - ) * cudf.Scalar(1000, dtype="int32") + extra = self.series._column.millisecond.astype("int32") * cudf.Scalar( + 1000, dtype="int32" + ) return self._return_result_like_self(micro + extra) @property # type: ignore @@ -4145,7 +4145,7 @@ def nanosecond(self) -> Series: 2 2 dtype: int16 """ - return self._get_dt_field("nanosecond") + return self._return_result_like_self(self.series._column.nanosecond) @property # type: ignore @_performance_tracking @@ -4182,7 +4182,7 @@ def weekday(self) -> Series: 8 6 dtype: int16 """ - return self._get_dt_field("weekday") + return self._return_result_like_self(self.series._column.weekday) @property # type: ignore @_performance_tracking @@ -4219,7 +4219,7 @@ def dayofweek(self) -> Series: 8 6 dtype: int16 """ - return self._get_dt_field("weekday") + return self._return_result_like_self(self.series._column.weekday) @property # type: ignore @_performance_tracking @@ -4257,7 +4257,7 @@ def dayofyear(self) -> Series: 8 8 dtype: int16 """ - return self._get_dt_field("day_of_year") + return self._return_result_like_self(self.series._column.day_of_year) @property # type: ignore @_performance_tracking @@ -4295,7 +4295,7 @@ def day_of_year(self) -> Series: 8 8 dtype: int16 """ - return self._get_dt_field("day_of_year") + return self._return_result_like_self(self.series._column.day_of_year) @property # type: ignore @_performance_tracking @@ -4349,8 +4349,9 @@ def is_leap_year(self) -> Series: 12 True dtype: bool """ - res = libcudf.datetime.is_leap_year(self.series._column).fillna(False) - return self._return_result_like_self(res) + return self._return_result_like_self( + self.series._column.is_leap_year.fillna(False) + ) @property # type: ignore @_performance_tracking @@ -4377,10 +4378,9 @@ def quarter(self) -> Series: 1 4 dtype: int8 """ - res = libcudf.datetime.extract_quarter(self.series._column).astype( - np.int8 + return self._return_result_like_self( + self.series._column.quarter.astype(np.int8) ) - return self._return_result_like_self(res) @_performance_tracking def day_name(self, locale: str | None = None) -> Series: @@ -4731,12 +4731,6 @@ def is_year_end(self) -> Series: """ return self._return_result_like_self(self.series._column.is_year_end) - @_performance_tracking - def _get_dt_field(self, field: str) -> Series: - return self._return_result_like_self( - self.series._column.get_dt_field(field) - ) - @_performance_tracking def ceil(self, freq: str) -> Series: """ @@ -5043,7 +5037,7 @@ def days(self) -> Series: 4 37 dtype: int64 """ - return self._get_td_field("days") + return self._return_result_like_self(self.series._column.days) @property # type: ignore @_performance_tracking @@ -5082,7 +5076,7 @@ def seconds(self) -> Series: 4 234000 dtype: int64 """ - return self._get_td_field("seconds") + return self._return_result_like_self(self.series._column.seconds) @property # type: ignore @_performance_tracking @@ -5114,7 +5108,7 @@ def microseconds(self) -> Series: 4 234000 dtype: int64 """ - return self._get_td_field("microseconds") + return self._return_result_like_self(self.series._column.microseconds) @property # type: ignore @_performance_tracking @@ -5146,7 +5140,7 @@ def nanoseconds(self) -> Series: 4 234 dtype: int64 """ - return self._get_td_field("nanoseconds") + return self._return_result_like_self(self.series._column.nanoseconds) @property # type: ignore @_performance_tracking @@ -5181,12 +5175,6 @@ def components(self) -> cudf.DataFrame: ca, index=self.series.index ) - @_performance_tracking - def _get_td_field(self, field: str) -> Series: - return self._return_result_like_self( - getattr(self.series._column, field) - ) - @_performance_tracking def _align_indices(series_list, how="outer", allow_non_unique=False): diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 885e7b16644..80ee078917a 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -11,6 +11,8 @@ import pandas.tseries.offsets as pd_offset from typing_extensions import Self +import pylibcudf as plc + import cudf from cudf import _lib as libcudf from cudf._lib.strings.convert.convert_integers import ( @@ -18,6 +20,7 @@ ) from cudf.api.types import is_integer, is_scalar from cudf.core import column +from cudf.core.buffer import acquire_spill_lock from cudf.core.index import ensure_index if TYPE_CHECKING: @@ -649,7 +652,13 @@ def _datetime_binop( if not self._is_no_op: if "months" in self._scalars: rhs = self._generate_months_column(len(datetime_col), op) - datetime_col = libcudf.datetime.add_months(datetime_col, rhs) + with acquire_spill_lock(): + datetime_col = type(datetime_col).from_pylibcudf( + plc.datetime.add_calendrical_months( + datetime_col.to_pylibcudf(mode="read"), + rhs.to_pylibcudf(mode="read"), + ) + ) for unit, value in self._scalars.items(): if unit != "months": @@ -985,7 +994,17 @@ def date_range( if "months" in offset.kwds or "years" in offset.kwds: # If `offset` is non-fixed frequency, resort to libcudf. - res = libcudf.datetime.date_range(start.device_value, periods, offset) + months = offset.kwds.get("years", 0) * 12 + offset.kwds.get( + "months", 0 + ) + with acquire_spill_lock(): + res = libcudf.column.Column.from_pylibcudf( + plc.filling.calendrical_month_sequence( + periods, + start.device_value.c_value, + months, + ) + ) if _periods_not_specified: # As mentioned in [1], this is a post processing step to trim extra # elements when `periods` is an estimated value. Only offset