From 7f2cc4c21fc621cfc3f02a78fc16a24441d61652 Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Wed, 30 Jun 2021 09:28:08 -0400 Subject: [PATCH] Add dayofyear and day_of_year to Series, DatetimeColumn, and DatetimeIndex (#8626) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR: - [x] Adds `[Series/DatetimeColumn/DatetimeIndex].dt.dayofyear` and `day_of_year` - [x] Updates the existing pytests to include dayofyear/day_of_year - [x] Includes docstrings in new methods ```python import cudf import pandas as pd ​ s = pd.Series(["2021-01-08", "2021-06-28", "2020-03-09", "2021-06-30"], dtype="datetime64[ms]") s = s.repeat(25000) # 100K elements gs = cudf.from_pandas(s) ​ %timeit gs.dt.dayofyear %timeit s.dt.dayofyear 39 µs ± 169 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 6.49 ms ± 39.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) ``` This closes #8625 Authors: - Nick Becker (https://github.com/beckernick) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8626 --- python/cudf/cudf/_lib/cpp/datetime.pxd | 1 + python/cudf/cudf/_lib/datetime.pyx | 2 + python/cudf/cudf/core/column/datetime.py | 8 +++ python/cudf/cudf/core/index.py | 44 ++++++++++++++ python/cudf/cudf/core/series.py | 74 ++++++++++++++++++++++++ python/cudf/cudf/tests/test_datetime.py | 2 + 6 files changed, 131 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 20fdd2e842a..f662bfb93f2 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -16,3 +16,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& timestamps, const column_view& months ) except + + cdef unique_ptr[column] day_of_year(const column_view& column) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3e40cb62f9c..09be55abe9d 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -46,6 +46,8 @@ def extract_datetime_component(Column col, object field): c_result = move(libcudf_datetime.extract_minute(col_view)) elif field == "second": c_result = move(libcudf_datetime.extract_second(col_view)) + elif field == "day_of_year": + c_result = move(libcudf_datetime.day_of_year(col_view)) else: raise ValueError(f"Invalid datetime field: '{field}'") diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 150ce2c48ec..f3d1880b290 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -133,6 +133,14 @@ def second(self) -> ColumnBase: def weekday(self) -> ColumnBase: return self.get_dt_field("weekday") + @property + def dayofyear(self) -> ColumnBase: + return self.get_dt_field("day_of_year") + + @property + def day_of_year(self) -> ColumnBase: + return self.get_dt_field("day_of_year") + def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs ) -> "cudf.Series": diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c89718e8f07..13ea1755803 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2299,6 +2299,50 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.dayofyear + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') + """ + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.day_of_year + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') + """ + return self._get_dt_field("day_of_year") + def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 2dada48be4d..77640db6a1d 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6292,6 +6292,80 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.dayofyear + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.day_of_year + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ + return self._get_dt_field("day_of_year") + def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 653ee8389fa..12e169e699d 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -82,6 +82,8 @@ def numerical_data(): "second", "weekday", "dayofweek", + "dayofyear", + "day_of_year", ]