From e01e7c88b643b6fc964fd1db65d445f13627b4fa Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Tue, 29 Jun 2021 08:18:31 -0700 Subject: [PATCH 1/3] day_of_year bindings and additions to pytests --- python/cudf/cudf/_lib/cpp/datetime.pxd | 1 + python/cudf/cudf/_lib/datetime.pyx | 2 ++ python/cudf/cudf/core/column/datetime.py | 8 ++++++++ python/cudf/cudf/core/index.py | 12 ++++++++++++ python/cudf/cudf/core/series.py | 8 ++++++++ python/cudf/cudf/tests/test_datetime.py | 2 ++ 6 files changed, 33 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 20fdd2e842a..f662bfb93f2 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -16,3 +16,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& timestamps, const column_view& months ) except + + cdef unique_ptr[column] day_of_year(const column_view& column) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3e40cb62f9c..09be55abe9d 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -46,6 +46,8 @@ def extract_datetime_component(Column col, object field): c_result = move(libcudf_datetime.extract_minute(col_view)) elif field == "second": c_result = move(libcudf_datetime.extract_second(col_view)) + elif field == "day_of_year": + c_result = move(libcudf_datetime.day_of_year(col_view)) else: raise ValueError(f"Invalid datetime field: '{field}'") diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b96a49c2514..8686432a0b4 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -133,6 +133,14 @@ def second(self) -> ColumnBase: def weekday(self) -> ColumnBase: return self.get_dt_field("weekday") + @property + def dayofyear(self): + return self.get_dt_field("day_of_year") + + @property + def day_of_year(self): + return self.get_dt_field("day_of_year") + def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs ) -> "cudf.Series": diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c89718e8f07..3e4782bfe31 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2299,6 +2299,18 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + """ + """ + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + """ + """ + return self._get_dt_field("day_of_year") + def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 31ebf90b3c2..3d9864f20ee 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6292,6 +6292,14 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + return self._get_dt_field("day_of_year") + def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 647ff5250ba..eee3795ec30 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -82,6 +82,8 @@ def numerical_data(): "second", "weekday", "dayofweek", + "dayofyear", + "day_of_year", ] From 542731d4991a31b74715676540100fb0108da451 Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Tue, 29 Jun 2021 08:21:55 -0700 Subject: [PATCH 2/3] return type hints --- python/cudf/cudf/core/column/datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 8686432a0b4..98343397818 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -134,11 +134,11 @@ def weekday(self) -> ColumnBase: return self.get_dt_field("weekday") @property - def dayofyear(self): + def dayofyear(self) -> ColumnBase: return self.get_dt_field("day_of_year") @property - def day_of_year(self): + def day_of_year(self) -> ColumnBase: return self.get_dt_field("day_of_year") def to_pandas( From 4246a01ec9811c6a87bb8da3871cf75462f871fe Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Tue, 29 Jun 2021 08:40:49 -0700 Subject: [PATCH 3/3] docstrings for series and column --- python/cudf/cudf/core/index.py | 32 ++++++++++++++++ python/cudf/cudf/core/series.py | 66 +++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 3e4782bfe31..13ea1755803 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2302,12 +2302,44 @@ def dayofweek(self): @property def dayofyear(self): """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.dayofyear + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') """ return self._get_dt_field("day_of_year") @property def day_of_year(self): """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.day_of_year + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') """ return self._get_dt_field("day_of_year") diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 3d9864f20ee..329e1620ad0 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6294,10 +6294,76 @@ def dayofweek(self): @property def dayofyear(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.dayofyear + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ return self._get_dt_field("day_of_year") @property def day_of_year(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.day_of_year + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ return self._get_dt_field("day_of_year") def _get_dt_field(self, field):