From 57d5df34e7991a3adfc51857bbaf2317029015dd Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Fri, 27 Aug 2021 09:50:44 +0000 Subject: [PATCH 1/7] added cython bindings --- python/cudf/cudf/_lib/cpp/datetime.pxd | 11 +++++++++++ python/cudf/cudf/_lib/datetime.pyx | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index ef97be3cf9e..c27eb324008 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -12,6 +12,17 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_hour(const column_view& column) except + cdef unique_ptr[column] extract_minute(const column_view& column) except + cdef unique_ptr[column] extract_second(const column_view& column) except + + cdef unique_ptr[column] ceil_day(const column_view& column) except + + cdef unique_ptr[column] ceil_hour(const column_view& column) except + + cdef unique_ptr[column] ceil_minute(const column_view& column) except + + cdef unique_ptr[column] ceil_second(const column_view& column) except + + cdef unique_ptr[column] ceil_millisecond( + const column_view& column + ) except + + cdef unique_ptr[column] ceil_microsecond( + const column_view& column + ) except + + cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 1b152f1a3b7..384b145e6bf 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -59,6 +59,32 @@ def extract_datetime_component(Column col, object field): return result +def ceil_datetime(Column col, object field): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + if field == "D": + c_result = move(libcudf_datetime.ceil_day(col_view)) + elif field == "H": + c_result = move(libcudf_datetime.ceil_hour(col_view)) + elif field == "M": + c_result = move(libcudf_datetime.ceil_minute(col_view)) + elif field == "S": + c_result = move(libcudf_datetime.ceil_second(col_view)) + elif field == "L": + c_result = move(libcudf_datetime.ceil_millisecond(col_view)) + elif field == "U": + c_result = move(libcudf_datetime.ceil_microsecond(col_view)) + elif field == "ns": + c_result = move(libcudf_datetime.ceil_nanosecond(col_view)) + else: + raise ValueError(f"Invalid resolution: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) + return result + + def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ From 30ad3e3afad357bfb4ed49b23f78cb5c12a4d4ba Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Sat, 28 Aug 2021 00:21:00 +0000 Subject: [PATCH 2/7] added python bindings --- python/cudf/cudf/core/column/datetime.py | 3 +++ python/cudf/cudf/core/series.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 46ff1990ac2..e686e0a950a 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -170,6 +170,9 @@ def values(self): def get_dt_field(self, field: str) -> ColumnBase: return libcudf.datetime.extract_datetime_component(self, field) + def ceil_datetime(self, field: str) -> ColumnBase: + return libcudf.datetime.ceil_datetime(self, field) + def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): return other diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index ff3b9fc68ef..1c6f37ca261 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5923,6 +5923,13 @@ def _get_dt_field(self, field): data=out_column, index=self.series._index, name=self.series.name ) + def _ceil_datetime(self, field): + out_column = self.series._column.ceil_datetime(field) + + return Series( + data=out_column, index=self.series._index, name=self.series.name + ) + def strftime(self, date_format, *args, **kwargs): """ Convert to Series using specified ``date_format``. From e3190cfbe50b2b4465c0942ef9e51d4a2cd09a16 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 31 Aug 2021 02:27:51 +0000 Subject: [PATCH 3/7] added test skeleton --- python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 1c6f37ca261..e166110163c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5923,7 +5923,7 @@ def _get_dt_field(self, field): data=out_column, index=self.series._index, name=self.series.name ) - def _ceil_datetime(self, field): + def ceil(self, field): out_column = self.series._column.ceil_datetime(field) return Series( diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 65e87e88f55..9f09e910b60 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1551,3 +1551,27 @@ def test_error_values(): match="DateTime Arrays is not yet implemented in cudf", ): s.values + + +@pytest.mark.parametrize( + "data", + [ + ( + [ + "2020-05-31 08:00:00", + None, + "1999-12-31 18:40:00", + "2000-12-31 04:00:00", + None, + "1900-02-28 07:00:00", + "1800-03-14 07:30:00", + "2100-03-14 07:30:00", + "1970-01-01 00:00:00", + "1969-12-31 12:59:00", + ] + ) + ], +) +def test_ceil(data): + # ps = pd.Series(data, dtype="datetime64[ns]") + return None From a8ad4ff930afb9972e2c65c68223cf5bb3aad33d Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Fri, 3 Sep 2021 03:34:05 +0000 Subject: [PATCH 4/7] finished tests and updated resolution --- python/cudf/cudf/_lib/datetime.pyx | 5 +++-- python/cudf/cudf/tests/test_datetime.py | 29 +++++++++++++++---------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 384b145e6bf..4921d1b4ace 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -64,11 +64,12 @@ def ceil_datetime(Column col, object field): cdef column_view col_view = col.view() with nogil: + # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html if field == "D": c_result = move(libcudf_datetime.ceil_day(col_view)) elif field == "H": c_result = move(libcudf_datetime.ceil_hour(col_view)) - elif field == "M": + elif field == "T": c_result = move(libcudf_datetime.ceil_minute(col_view)) elif field == "S": c_result = move(libcudf_datetime.ceil_second(col_view)) @@ -76,7 +77,7 @@ def ceil_datetime(Column col, object field): c_result = move(libcudf_datetime.ceil_millisecond(col_view)) elif field == "U": c_result = move(libcudf_datetime.ceil_microsecond(col_view)) - elif field == "ns": + elif field == "N": c_result = move(libcudf_datetime.ceil_nanosecond(col_view)) else: raise ValueError(f"Invalid resolution: '{field}'") diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 9f09e910b60..25a353bbf76 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1559,19 +1559,26 @@ def test_error_values(): ( [ "2020-05-31 08:00:00", - None, - "1999-12-31 18:40:00", - "2000-12-31 04:00:00", - None, - "1900-02-28 07:00:00", - "1800-03-14 07:30:00", - "2100-03-14 07:30:00", - "1970-01-01 00:00:00", - "1969-12-31 12:59:00", + "1999-12-31 18:40:10", + "2000-12-31 04:00:05", + "1900-02-28 07:00:06", + "1800-03-14 07:30:20", + "2100-03-14 07:30:20", + "1970-01-01 00:00:09", + "1969-12-31 12:59:10", ] ) ], ) def test_ceil(data): - # ps = pd.Series(data, dtype="datetime64[ns]") - return None + resolutions = ["D", "H", "T", "S", "L", "U", "N"] + + for time_type in DATETIME_TYPES: + ps = pd.Series(data, dtype=time_type) + gs = cudf.from_pandas(ps) + + for resolution in resolutions: + expect = ps.dt.ceil(resolution) + got = gs.dt.ceil(resolution) + + assert_eq(expect, got) From d288e40b8da4a8c03ad6fc7f4707a7eaa8a912c5 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Fri, 3 Sep 2021 16:11:21 +0000 Subject: [PATCH 5/7] parameterized resolutions --- python/cudf/cudf/tests/test_datetime.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 25a353bbf76..6b0af8fb35a 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1570,15 +1570,12 @@ def test_error_values(): ) ], ) -def test_ceil(data): - resolutions = ["D", "H", "T", "S", "L", "U", "N"] - +@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) +def test_ceil(data, resolution): for time_type in DATETIME_TYPES: ps = pd.Series(data, dtype=time_type) gs = cudf.from_pandas(ps) - for resolution in resolutions: - expect = ps.dt.ceil(resolution) - got = gs.dt.ceil(resolution) - - assert_eq(expect, got) + expect = ps.dt.ceil(resolution) + got = gs.dt.ceil(resolution) + assert_eq(expect, got) From 06bc924bf94e20b61c0eb8d95e6d51515fbdef2a Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Fri, 3 Sep 2021 17:31:55 +0000 Subject: [PATCH 6/7] renamed functions --- python/cudf/cudf/core/column/datetime.py | 2 +- python/cudf/cudf/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index e686e0a950a..ad1a2d852f1 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -170,7 +170,7 @@ def values(self): def get_dt_field(self, field: str) -> ColumnBase: return libcudf.datetime.extract_datetime_component(self, field) - def ceil_datetime(self, field: str) -> ColumnBase: + def ceil(self, field: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, field) def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e166110163c..e44bb18b629 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5924,7 +5924,7 @@ def _get_dt_field(self, field): ) def ceil(self, field): - out_column = self.series._column.ceil_datetime(field) + out_column = self.series._column.ceil(field) return Series( data=out_column, index=self.series._index, name=self.series.name From ecc7568abf4329a3c1205a3b206a9823e08c112e Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Fri, 3 Sep 2021 20:29:57 +0000 Subject: [PATCH 7/7] refactored tests --- python/cudf/cudf/tests/test_datetime.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 6b0af8fb35a..6e5b3c39dc4 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1570,11 +1570,12 @@ def test_error_values(): ) ], ) +@pytest.mark.parametrize("time_type", DATETIME_TYPES) @pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) -def test_ceil(data, resolution): - for time_type in DATETIME_TYPES: - ps = pd.Series(data, dtype=time_type) - gs = cudf.from_pandas(ps) +def test_ceil(data, time_type, resolution): + + ps = pd.Series(data, dtype=time_type) + gs = cudf.from_pandas(ps) expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution)