From 44d24e21bcbc0e98198c8877f8d328c1cc540834 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Fri, 6 Aug 2021 18:44:57 +0000 Subject: [PATCH 01/12] Added support and tests for Series.dt.is_month_end --- python/cudf/cudf/_lib/cpp/datetime.pxd | 3 +++ python/cudf/cudf/_lib/datetime.pyx | 9 ++++++++ python/cudf/cudf/core/series.py | 18 ++++++++++++++++ python/cudf/cudf/tests/test_datetime.py | 28 +++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 56ebc3a77fc..33cc6ead2c5 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -18,3 +18,6 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: ) except + cdef unique_ptr[column] day_of_year(const column_view& column) except + cdef unique_ptr[column] is_leap_year(const column_view& column) except + + cdef unique_ptr[column] last_day_of_month( + const column_view& column + ) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3b13cedcfd7..3f5c7eb9ceb 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -67,3 +67,12 @@ def is_leap_year(Column col): c_result = move(libcudf_datetime.is_leap_year(col_view)) return Column.from_unique_ptr(move(c_result)) + +def last_day_of_month(Column col): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + c_result = move(libcudf_datetime.last_day_of_month(col_view)) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f786853b3f4..497672244a8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6453,6 +6453,24 @@ def is_month_start(self): """ return (self.day == 1).fillna(False) + @property + def is_month_end(self): + """ + Boolean indicator if the date is the last day of the month. + + Returns + ------- + Series + Booleans indicating if dates are the last day of the month. + """ + last_day = libcudf.datetime.last_day_of_month(self.series._column) + last_day = Series._from_data( + ColumnAccessor({None: last_day}), + index=self.series._index, + name=self.series.name, + ) + return (self.day == last_day.dt.day).fillna(False) + def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 5f5a0a78414..ce79f04a58b 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1326,3 +1326,31 @@ def test_is_month_start(data, dtype): got = gs.dt.is_month_start assert_eq(expect, got) + +@pytest.mark.parametrize( + "data", + [ + [ + "2020-05-31", + None, + "1999-12-01", + "2000-12-21", + None, + "1900-02-28", + "1800-03-14", + "2100-03-10", + "1970-01-01", + "1969-12-11", + ] + ], +) +@pytest.mark.parametrize("dtype", ["datetime64[ns]"]) +def test_is_month_end(data, dtype): + # Series + ps = pd.Series(data, dtype=dtype) + gs = cudf.from_pandas(ps) + + expect = ps.dt.is_month_end + got = gs.dt.is_month_end + + assert_eq(expect, got) From 08c8d975a944967fdfee0753e772309bf55f7526 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Fri, 6 Aug 2021 18:48:47 +0000 Subject: [PATCH 02/12] flake8 --- python/cudf/cudf/_lib/datetime.pyx | 1 + python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3f5c7eb9ceb..edaa6c75aa3 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -68,6 +68,7 @@ def is_leap_year(Column col): return Column.from_unique_ptr(move(c_result)) + def last_day_of_month(Column col): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 497672244a8..c76220a9406 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6457,7 +6457,7 @@ def is_month_start(self): def is_month_end(self): """ Boolean indicator if the date is the last day of the month. - + Returns ------- Series diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index ce79f04a58b..b838c161660 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1327,6 +1327,7 @@ def test_is_month_start(data, dtype): assert_eq(expect, got) + @pytest.mark.parametrize( "data", [ From 1634f45c8e6a350b2067990dbd33db85375931e2 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 9 Aug 2021 16:08:46 +0000 Subject: [PATCH 03/12] Added leap day to tests --- python/cudf/cudf/tests/test_datetime.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index b838c161660..2f8ed0502dc 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1333,6 +1333,7 @@ def test_is_month_start(data, dtype): [ [ "2020-05-31", + "2020-02-29", None, "1999-12-01", "2000-12-21", From 736b0f5d180ab250dd71fb12f9515a119b702d83 Mon Sep 17 00:00:00 2001 From: TravisHester <34654270+TravisHester@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:20:30 -0500 Subject: [PATCH 04/12] Update datetime.pyx --- python/cudf/cudf/_lib/datetime.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index dfcad54e94b..6a977fc2046 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -91,5 +91,3 @@ def last_day_of_month(Column col): c_result = move(libcudf_datetime.last_day_of_month(col_view)) return Column.from_unique_ptr(move(c_result)) - - From 0d4362418d14e3b0e63654f6142647bda9fcb31d Mon Sep 17 00:00:00 2001 From: sft-managed Date: Tue, 10 Aug 2021 17:18:50 +0000 Subject: [PATCH 05/12] Added Series.dt.is_month_end example in docstring --- python/cudf/cudf/core/series.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e86169e8466..818757ac5bc 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6015,6 +6015,34 @@ def is_month_end(self): ------- Series Booleans indicating if dates are the last day of the month. + + Example + ------- + >>> import pandas as pd, cudf + >>> s = cudf.Series( + ... pd.date_range(start='2000-08-026', end='2000-09-03', freq='1D')) + >>> s + 0 2000-08-26 + 1 2000-08-27 + 2 2000-08-28 + 3 2000-08-29 + 4 2000-08-30 + 5 2000-08-31 + 6 2000-09-01 + 7 2000-09-02 + 8 2000-09-03 + dtype: datetime64[ns] + >>> s.dt.is_month_end + 0 False + 1 False + 2 False + 3 False + 4 False + 5 True + 6 False + 7 False + 8 False + dtype: bool """ last_day = libcudf.datetime.last_day_of_month(self.series._column) last_day = Series._from_data( From 0b2d2c0c1e8193d7fa5883378d67d56b708a1949 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Tue, 10 Aug 2021 17:54:10 +0000 Subject: [PATCH 06/12] flake8 --- python/cudf/cudf/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 818757ac5bc..155a834f2d7 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6015,7 +6015,7 @@ def is_month_end(self): ------- Series Booleans indicating if dates are the last day of the month. - + Example ------- >>> import pandas as pd, cudf From 9ff604ae1f1252d16177671d1874d2725c8dc45a Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 10 Aug 2021 16:56:30 -0500 Subject: [PATCH 07/12] Update python/cudf/cudf/core/series.py --- python/cudf/cudf/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 155a834f2d7..86bd7fac619 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6043,7 +6043,7 @@ def is_month_end(self): 7 False 8 False dtype: bool - """ + """ # noqa: E501 last_day = libcudf.datetime.last_day_of_month(self.series._column) last_day = Series._from_data( ColumnAccessor({None: last_day}), From b15bdeffc748ccd12ab6d2de8f38658d396c136d Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 16 Aug 2021 19:32:36 +0000 Subject: [PATCH 08/12] Added Series.dt.is_quarter_start and Series.dt.is_quarter_end --- python/cudf/cudf/core/series.py | 78 ++++++++++++++++++++++++- python/cudf/cudf/tests/test_datetime.py | 64 ++++++++++++++++++++ 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 155a834f2d7..fcb33396ba6 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6020,7 +6020,7 @@ def is_month_end(self): ------- >>> import pandas as pd, cudf >>> s = cudf.Series( - ... pd.date_range(start='2000-08-026', end='2000-09-03', freq='1D')) + ... pd.date_range(start='2000-08-26', end='2000-09-03', freq='1D')) >>> s 0 2000-08-26 1 2000-08-27 @@ -6051,6 +6051,82 @@ def is_month_end(self): name=self.series.name, ) return (self.day == last_day.dt.day).fillna(False) + + @property + def is_quarter_start(self): + """ + Boolean indicator if the date is the first day of a quarter. + + Returns + ------- + Series + Booleans indicating if dates are the begining of a quarter + + Example + ------- + >>> import pandas as pd, cudf + >>> s = cudf.Series( + ... pd.date_range(start='2000-09-26', end='2000-10-03', freq='1D')) + >>> s + 0 2000-09-26 + 1 2000-09-27 + 2 2000-09-28 + 3 2000-09-29 + 4 2000-09-30 + 5 2000-10-01 + 6 2000-10-02 + 7 2000-10-03 + dtype: datetime64[ns] + >>> s.dt.is_quarter_start + 0 False + 1 False + 2 False + 3 False + 4 False + 5 True + 6 False + 7 False + dtype: bool + """ + return ((self.day == 1) & self.month.isin([1, 4, 7, 10])).fillna(False) + + @property + def is_quarter_end(self): + """ + Boolean indicator if the date is the last day of a quarter. + + Returns + ------- + Series + Booleans indicating if dates are the end of a quarter + + Example + ------- + >>> import pandas as pd, cudf + >>> s = cudf.Series( + ... pd.date_range(start='2000-09-26', end='2000-10-03', freq='1D')) + >>> s + 0 2000-09-26 + 1 2000-09-27 + 2 2000-09-28 + 3 2000-09-29 + 4 2000-09-30 + 5 2000-10-01 + 6 2000-10-02 + 7 2000-10-03 + dtype: datetime64[ns] + >>> s.dt.is_quarter_end + 0 False + 1 False + 2 False + 3 False + 4 True + 5 False + 6 False + 7 False + dtype: bool + """ + return (self.is_month_end & self.month.isin([3, 6, 9, 12])).fillna(False) def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index cf7fb336d80..f3e312a5a67 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1376,3 +1376,67 @@ def test_is_month_end(data, dtype): got = gs.dt.is_month_end assert_eq(expect, got) + +@pytest.mark.parametrize( + "data", + [ + [ + "2020-05-01", + "2020-05-31", + "2020-02-29", + None, + "1999-12-01", + "2000-12-21", + None, + "1900-02-28", + "1800-03-14", + "2100-03-10", + "1970-04-1", + "1970-01-01", + "1969-12-11", + "2020-12-31" + ] + ], +) +@pytest.mark.parametrize("dtype", ["datetime64[ns]"]) +def test_is_quarter_start(data, dtype): + # Series + ps = pd.Series(data, dtype=dtype) + gs = cudf.from_pandas(ps) + + expect = ps.dt.is_quarter_start + got = gs.dt.is_quarter_start + + assert_eq(expect, got) + +@pytest.mark.parametrize( + "data", + [ + [ + "2020-05-01", + "2020-05-31", + "2020-02-29", + None, + "1999-12-01", + "2000-12-21", + None, + "1900-02-28", + "1800-03-14", + "2100-03-10", + "1970-04-1", + "1970-01-01", + "1969-12-11", + "2020-12-31" + ] + ], +) +@pytest.mark.parametrize("dtype", ["datetime64[ns]"]) +def test_is_quarter_end(data, dtype): + # Series + ps = pd.Series(data, dtype=dtype) + gs = cudf.from_pandas(ps) + + expect = ps.dt.is_quarter_end + got = gs.dt.is_quarter_end + + assert_eq(expect, got) From c19c4f8663ab97a2a462e64ba6a6dc0565828407 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 16 Aug 2021 19:35:50 +0000 Subject: [PATCH 09/12] flake8 --- python/cudf/cudf/core/series.py | 8 +++++--- python/cudf/cudf/tests/test_datetime.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 4e23b9dc421..d5f8113dca1 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6078,7 +6078,7 @@ def is_month_end(self): name=self.series.name, ) return (self.day == last_day.dt.day).fillna(False) - + @property def is_quarter_start(self): """ @@ -6116,7 +6116,7 @@ def is_quarter_start(self): dtype: bool """ return ((self.day == 1) & self.month.isin([1, 4, 7, 10])).fillna(False) - + @property def is_quarter_end(self): """ @@ -6153,7 +6153,9 @@ def is_quarter_end(self): 7 False dtype: bool """ - return (self.is_month_end & self.month.isin([3, 6, 9, 12])).fillna(False) + return (self.is_month_end & self.month.isin([3, 6, 9, 12])).fillna( + False + ) def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 81007a3fa05..c104770387a 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1410,6 +1410,7 @@ def test_is_month_end(data, dtype): assert_eq(expect, got) + @pytest.mark.parametrize( "data", [ @@ -1427,7 +1428,7 @@ def test_is_month_end(data, dtype): "1970-04-1", "1970-01-01", "1969-12-11", - "2020-12-31" + "2020-12-31", ] ], ) @@ -1441,7 +1442,8 @@ def test_is_quarter_start(data, dtype): got = gs.dt.is_quarter_start assert_eq(expect, got) - + + @pytest.mark.parametrize( "data", [ @@ -1459,7 +1461,7 @@ def test_is_quarter_start(data, dtype): "1970-04-1", "1970-01-01", "1969-12-11", - "2020-12-31" + "2020-12-31", ] ], ) From b62a312378d7d07a120eed5b30423be19635933b Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 16 Aug 2021 21:19:24 +0000 Subject: [PATCH 10/12] optimized is_quarter_start/end --- python/cudf/cudf/core/series.py | 34 ++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 12a8c7d7e79..b1404401819 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6086,12 +6086,12 @@ def is_month_end(self): def is_quarter_start(self): """ Boolean indicator if the date is the first day of a quarter. - + Returns ------- Series Booleans indicating if dates are the begining of a quarter - + Example ------- >>> import pandas as pd, cudf @@ -6118,18 +6118,28 @@ def is_quarter_start(self): 7 False dtype: bool """ - return ((self.day == 1) & self.month.isin([1, 4, 7, 10])).fillna(False) + day = self.series._column.get_dt_field("day") + first_month = self.series._column.get_dt_field("month").isin( + [1, 4, 7, 10] + ) + + result = ((day == cudf.Scalar(1)) & first_month).fillna(False) + return Series._from_data( + ColumnAccessor({None: result}), + index=self.series._index, + name=self.series.name, + ) @property def is_quarter_end(self): """ Boolean indicator if the date is the last day of a quarter. - + Returns ------- Series Booleans indicating if dates are the end of a quarter - + Example ------- >>> import pandas as pd, cudf @@ -6156,8 +6166,18 @@ def is_quarter_end(self): 7 False dtype: bool """ - return (self.is_month_end & self.month.isin([3, 6, 9, 12])).fillna( - False + day = self.series._column.get_dt_field("day") + last_day = libcudf.datetime.last_day_of_month(self.series._column) + last_day = last_day.get_dt_field("day") + last_month = self.series._column.get_dt_field("month").isin( + [3, 6, 9, 12] + ) + + result = ((day == last_day) & last_month).fillna(False) + return Series._from_data( + ColumnAccessor({None: result}), + index=self.series._index, + name=self.series.name, ) @property From 6f207280541ae78dfc738a0bab0008e8ff00a5a1 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 16 Aug 2021 21:27:12 +0000 Subject: [PATCH 11/12] minor fix --- python/cudf/cudf/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index b1404401819..e2154bfbd44 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6125,7 +6125,7 @@ def is_quarter_start(self): result = ((day == cudf.Scalar(1)) & first_month).fillna(False) return Series._from_data( - ColumnAccessor({None: result}), + {None: result}, index=self.series._index, name=self.series.name, ) @@ -6175,7 +6175,7 @@ def is_quarter_end(self): result = ((day == last_day) & last_month).fillna(False) return Series._from_data( - ColumnAccessor({None: result}), + {None: result}, index=self.series._index, name=self.series.name, ) From 94df215cdda984558ef27845c0e21d1d42d4f09f Mon Sep 17 00:00:00 2001 From: sft-managed Date: Mon, 16 Aug 2021 21:36:54 +0000 Subject: [PATCH 12/12] black --- python/cudf/cudf/core/series.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e2154bfbd44..b15a7b01db6 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6125,9 +6125,7 @@ def is_quarter_start(self): result = ((day == cudf.Scalar(1)) & first_month).fillna(False) return Series._from_data( - {None: result}, - index=self.series._index, - name=self.series.name, + {None: result}, index=self.series._index, name=self.series.name, ) @property @@ -6175,9 +6173,7 @@ def is_quarter_end(self): result = ((day == last_day) & last_month).fillna(False) return Series._from_data( - {None: result}, - index=self.series._index, - name=self.series.name, + {None: result}, index=self.series._index, name=self.series.name, ) @property