Skip to content

Commit

Permalink
Add dayofyear and day_of_year to Series, DatetimeColumn, and Datetime…
Browse files Browse the repository at this point in the history
…Index (#8626)

This PR:
- [x] Adds `[Series/DatetimeColumn/DatetimeIndex].dt.dayofyear` and `day_of_year`
- [x] Updates the existing pytests to include dayofyear/day_of_year
- [x] Includes docstrings in new methods

```python
import cudf
import pandas as pd
​
s = pd.Series(["2021-01-08", "2021-06-28", "2020-03-09", "2021-06-30"], dtype="datetime64[ms]")
s = s.repeat(25000) # 100K elements
gs = cudf.from_pandas(s)
​
%timeit gs.dt.dayofyear
%timeit s.dt.dayofyear
39 µs ± 169 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
6.49 ms ± 39.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
```

This closes #8625

Authors:
  - Nick Becker (https://github.com/beckernick)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #8626
  • Loading branch information
beckernick authored Jun 30, 2021
1 parent 0e2a448 commit 7f2cc4c
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
const column_view& timestamps,
const column_view& months
) except +
cdef unique_ptr[column] day_of_year(const column_view& column) except +
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def extract_datetime_component(Column col, object field):
c_result = move(libcudf_datetime.extract_minute(col_view))
elif field == "second":
c_result = move(libcudf_datetime.extract_second(col_view))
elif field == "day_of_year":
c_result = move(libcudf_datetime.day_of_year(col_view))
else:
raise ValueError(f"Invalid datetime field: '{field}'")

Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ def second(self) -> ColumnBase:
def weekday(self) -> ColumnBase:
return self.get_dt_field("weekday")

@property
def dayofyear(self) -> ColumnBase:
return self.get_dt_field("day_of_year")

@property
def day_of_year(self) -> ColumnBase:
return self.get_dt_field("day_of_year")

def to_pandas(
self, index: pd.Index = None, nullable: bool = False, **kwargs
) -> "cudf.Series":
Expand Down
44 changes: 44 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2299,6 +2299,50 @@ def dayofweek(self):
"""
return self._get_dt_field("weekday")

@property
def dayofyear(self):
"""
The day of the year, from 1-365 in non-leap years and
from 1-366 in leap years.
Examples
--------
>>> import pandas as pd
>>> import cudf
>>> datetime_index = cudf.Index(pd.date_range("2016-12-31",
... "2017-01-08", freq="D"))
>>> datetime_index
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
'2017-01-08'],
dtype='datetime64[ns]')
>>> datetime_index.dayofyear
Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16')
"""
return self._get_dt_field("day_of_year")

@property
def day_of_year(self):
"""
The day of the year, from 1-365 in non-leap years and
from 1-366 in leap years.
Examples
--------
>>> import pandas as pd
>>> import cudf
>>> datetime_index = cudf.Index(pd.date_range("2016-12-31",
... "2017-01-08", freq="D"))
>>> datetime_index
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
'2017-01-08'],
dtype='datetime64[ns]')
>>> datetime_index.day_of_year
Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16')
"""
return self._get_dt_field("day_of_year")

def to_pandas(self):
nanos = self._values.astype("datetime64[ns]")
return pd.DatetimeIndex(nanos.to_pandas(), name=self.name)
Expand Down
74 changes: 74 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6292,6 +6292,80 @@ def dayofweek(self):
"""
return self._get_dt_field("weekday")

@property
def dayofyear(self):
"""
The day of the year, from 1-365 in non-leap years and
from 1-366 in leap years.
Examples
--------
>>> import pandas as pd
>>> import cudf
>>> datetime_series = cudf.Series(pd.date_range('2016-12-31',
... '2017-01-08', freq='D'))
>>> datetime_series
0 2016-12-31
1 2017-01-01
2 2017-01-02
3 2017-01-03
4 2017-01-04
5 2017-01-05
6 2017-01-06
7 2017-01-07
8 2017-01-08
dtype: datetime64[ns]
>>> datetime_series.dt.dayofyear
0 366
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
dtype: int16
"""
return self._get_dt_field("day_of_year")

@property
def day_of_year(self):
"""
The day of the year, from 1-365 in non-leap years and
from 1-366 in leap years.
Examples
--------
>>> import pandas as pd
>>> import cudf
>>> datetime_series = cudf.Series(pd.date_range('2016-12-31',
... '2017-01-08', freq='D'))
>>> datetime_series
0 2016-12-31
1 2017-01-01
2 2017-01-02
3 2017-01-03
4 2017-01-04
5 2017-01-05
6 2017-01-06
7 2017-01-07
8 2017-01-08
dtype: datetime64[ns]
>>> datetime_series.dt.day_of_year
0 366
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
dtype: int16
"""
return self._get_dt_field("day_of_year")

def _get_dt_field(self, field):
out_column = self.series._column.get_dt_field(field)
return Series(
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def numerical_data():
"second",
"weekday",
"dayofweek",
"dayofyear",
"day_of_year",
]


Expand Down

0 comments on commit 7f2cc4c

Please sign in to comment.