From e08ae9cb15fe260015cf70a22181fa67123e779f Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Tue, 16 Nov 2021 18:03:14 -0800 Subject: [PATCH] Implement Series.datetime.floor (#9571) Fixes: #7102 Replaces: [#9488](https://github.com/rapidsai/cudf/pull/9488/files) Authors: - Sheilah Kirui (https://github.com/skirui-source) - Mayank Anand (https://github.com/mayankanand007) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Michael Wang (https://github.com/isVoid) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/9571 --- cpp/include/cudf/datetime.hpp | 93 ++++++++++- cpp/src/datetime/datetime_ops.cu | 199 ++++++++++++++++++----- cpp/tests/datetime/datetime_ops_test.cpp | 92 ++++++++++- docs/cudf/source/api_docs/series.rst | 2 + python/cudf/cudf/_lib/cpp/datetime.pxd | 17 +- python/cudf/cudf/_lib/datetime.pyx | 33 +++- python/cudf/cudf/core/column/datetime.py | 7 +- python/cudf/cudf/core/series.py | 73 ++++++++- python/cudf/cudf/tests/test_datetime.py | 39 ++++- 9 files changed, 502 insertions(+), 53 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index d67984daa7c..71e5968bf07 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -378,5 +378,96 @@ std::unique_ptr ceil_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Round down to the nearest day + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_day( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest hour + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_hour( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest minute + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_minute( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest second + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_second( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest millisecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_millisecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest microsecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_microsecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest nanosecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_nanosecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index ccfad56b4ea..717bd7ac0a8 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -54,6 +54,8 @@ enum class datetime_component { NANOSECOND }; +enum class rounding_kind { CEIL, FLOOR }; + template struct extract_component_operator { template @@ -88,32 +90,59 @@ struct extract_component_operator { } }; -template -struct ceil_timestamp { +// This functor takes the rounding type as runtime info and dispatches to the ceil/floor/round +// function. +template +struct RoundFunctor { + template + CUDA_DEVICE_CALLABLE auto operator()(rounding_kind round_kind, Timestamp dt) + { + switch (round_kind) { + case rounding_kind::CEIL: return cuda::std::chrono::ceil(dt); + case rounding_kind::FLOOR: return cuda::std::chrono::floor(dt); + default: cudf_assert(false && "Unsupported rounding kind."); + } + __builtin_unreachable(); + } +}; + +struct RoundingDispatcher { + rounding_kind round_kind; + datetime_component component; + + RoundingDispatcher(rounding_kind round_kind, datetime_component component) + : round_kind(round_kind), component(component) + { + } + template CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const { - using namespace cuda::std::chrono; - // want to use this with D, H, T (minute), S, L (millisecond), U - switch (COMPONENT) { + switch (component) { case datetime_component::DAY: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::HOUR: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::MINUTE: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::SECOND: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::MILLISECOND: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::MICROSECOND: - return time_point_cast(ceil(ts)); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); case datetime_component::NANOSECOND: - return time_point_cast(ceil(ts)); - default: cudf_assert(false && "Unexpected resolution"); + return time_point_cast( + RoundFunctor{}(round_kind, ts)); + default: cudf_assert(false && "Unsupported datetime rounding resolution."); } - - return {}; + __builtin_unreachable(); } }; @@ -196,10 +225,11 @@ struct is_leap_year_op { }; // Specific function for applying ceil/floor date ops -template -struct dispatch_ceil { +struct dispatch_round { template std::enable_if_t(), std::unique_ptr> operator()( + rounding_kind round_kind, + datetime_component component, cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -221,7 +251,7 @@ struct dispatch_ceil { column.begin(), column.end(), output->mutable_view().begin(), - TransformFunctor{}); + RoundingDispatcher{round_kind, component}); return output; } @@ -384,13 +414,14 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu } } -template -std::unique_ptr ceil_general(column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr round_general(rounding_kind round_kind, + datetime_component component, + column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - column.type(), dispatch_ceil>{}, column, stream, mr); + column.type(), dispatch_round{}, round_kind, component, column, stream, mr); } std::unique_ptr extract_year(column_view const& column, @@ -498,53 +529,147 @@ std::unique_ptr extract_quarter(column_view const& column, std::unique_ptr ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::DAY, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::HOUR, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::MINUTE, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::SECOND, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_millisecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::MILLISECOND, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_microsecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::MICROSECOND, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr ceil_nanosecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ceil_general( - column, rmm::cuda_stream_default, mr); + return detail::round_general(detail::rounding_kind::CEIL, + detail::datetime_component::NANOSECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::DAY, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::HOUR, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::MINUTE, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::SECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_millisecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::MILLISECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_microsecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::MICROSECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr floor_nanosecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::FLOOR, + detail::datetime_component::NANOSECOND, + column, + rmm::cuda_stream_default, + mr); } std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index c0d2d1cc447..b70ac29fd5d 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -357,9 +357,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime) using namespace cuda::std::chrono; auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT - auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT - auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop_)); + auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); auto host_val = to_host(input); thrust::host_vector timestamps = host_val.first; @@ -403,6 +403,22 @@ TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime) auto expected_millisecond = fixed_width_column_wrapper( ceiled_millisecond.begin(), ceiled_millisecond.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_millisecond(input), expected_millisecond); + + std::vector ceiled_microsecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_microsecond.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_microsecond = fixed_width_column_wrapper( + ceiled_microsecond.begin(), ceiled_microsecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_microsecond(input), expected_microsecond); + + std::vector ceiled_nanosecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_nanosecond.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_nanosecond = fixed_width_column_wrapper( + ceiled_nanosecond.begin(), ceiled_nanosecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_nanosecond(input), expected_nanosecond); } TEST_F(BasicDatetimeOpsTest, TestDayOfYearWithDate) @@ -827,4 +843,76 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } +TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) +{ + using T = TypeParam; + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT + auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + + auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + + auto host_val = to_host(input); + thrust::host_vector timestamps = host_val.first; + + std::vector floored_day(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_day = fixed_width_column_wrapper(floored_day.begin(), + floored_day.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); + + std::vector floored_hour(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_hour = fixed_width_column_wrapper( + floored_hour.begin(), floored_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + + std::vector floored_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + floored_minute.begin(), floored_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + + std::vector floored_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_second = fixed_width_column_wrapper( + floored_second.begin(), floored_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + + std::vector floored_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + floored_millisecond.begin(), floored_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); + + std::vector floored_microsecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_microsecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_microsecond = fixed_width_column_wrapper( + floored_microsecond.begin(), floored_microsecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_microsecond); + + std::vector floored_nanosecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_nanosecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_nanosecond = fixed_width_column_wrapper( + floored_nanosecond.begin(), floored_nanosecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_nanosecond); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 46a31a0dcf6..b90ee628332 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -300,6 +300,8 @@ Datetime methods strftime isocalendar + ceil + floor Timedelta properties diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 2af4dd648c5..38ed9fbd769 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -23,7 +23,22 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_microsecond( const column_view& column ) except + - cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + + cdef unique_ptr[column] ceil_nanosecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_day(const column_view& column) except + + cdef unique_ptr[column] floor_hour(const column_view& column) except + + cdef unique_ptr[column] floor_minute(const column_view& column) except + + cdef unique_ptr[column] floor_second(const column_view& column) except + + cdef unique_ptr[column] floor_millisecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_microsecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_nanosecond( + const column_view& column + ) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 5cda06362b6..3215088c438 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -72,13 +72,13 @@ def ceil_datetime(Column col, object field): c_result = move(libcudf_datetime.ceil_day(col_view)) elif field == "H": c_result = move(libcudf_datetime.ceil_hour(col_view)) - elif field == "T": + elif field == "T" or field == "min": c_result = move(libcudf_datetime.ceil_minute(col_view)) elif field == "S": c_result = move(libcudf_datetime.ceil_second(col_view)) - elif field == "L": + elif field == "L" or field == "ms": c_result = move(libcudf_datetime.ceil_millisecond(col_view)) - elif field == "U": + elif field == "U" or field == "us": c_result = move(libcudf_datetime.ceil_microsecond(col_view)) elif field == "N": c_result = move(libcudf_datetime.ceil_nanosecond(col_view)) @@ -89,6 +89,33 @@ def ceil_datetime(Column col, object field): return result +def floor_datetime(Column col, object field): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html + if field == "D": + c_result = move(libcudf_datetime.floor_day(col_view)) + elif field == "H": + c_result = move(libcudf_datetime.floor_hour(col_view)) + elif field == "T" or field == "min": + c_result = move(libcudf_datetime.floor_minute(col_view)) + elif field == "S": + c_result = move(libcudf_datetime.floor_second(col_view)) + elif field == "L" or field == "ms": + c_result = move(libcudf_datetime.floor_millisecond(col_view)) + elif field == "U" or field == "us": + c_result = move(libcudf_datetime.floor_microsecond(col_view)) + elif field == "N": + c_result = move(libcudf_datetime.floor_nanosecond(col_view)) + else: + raise ValueError(f"Invalid resolution: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) + return result + + def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 68379002e6b..756e48edccb 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -222,8 +222,11 @@ def values(self): def get_dt_field(self, field: str) -> ColumnBase: return libcudf.datetime.extract_datetime_component(self, field) - def ceil(self, field: str) -> ColumnBase: - return libcudf.datetime.ceil_datetime(self, field) + def ceil(self, freq: str) -> ColumnBase: + return libcudf.datetime.ceil_datetime(self, freq) + + def floor(self, freq: str) -> ColumnBase: + return libcudf.datetime.floor_datetime(self, freq) def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 00a8ebabe34..c804f2bca2c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4592,11 +4592,76 @@ def _get_dt_field(self, field): data=out_column, index=self.series._index, name=self.series.name ) - def ceil(self, field): - out_column = self.series._column.ceil(field) + def ceil(self, freq): + """ + Perform ceil operation on the data to the specified freq. - return Series( - data=out_column, index=self.series._index, name=self.series.name + Parameters + ---------- + freq : str + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). + See `frequency aliases `__ + for more details on these aliases. + + Returns + ------- + Series + Series with all timestamps rounded up to the specified frequency. + The index is preserved. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.ceil("T") + 0 2001-01-01 00:05:00 + 1 2001-01-01 00:05:00 + 2 2001-01-01 00:06:00 + dtype: datetime64[ns] + """ + out_column = self.series._column.ceil(freq) + + return Series._from_data( + data={self.series.name: out_column}, index=self.series._index + ) + + def floor(self, freq): + """ + Perform floor operation on the data to the specified freq. + + Parameters + ---------- + freq : str + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). + See `frequency aliases `__ + for more details on these aliases. + + Returns + ------- + Series + Series with all timestamps rounded up to the specified frequency. + The index is preserved. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.floor("T") + 0 2001-01-01 00:04:00 + 1 2001-01-01 00:04:00 + 2 2001-01-01 00:05:00 + dtype: datetime64[ns] + """ + out_column = self.series._column.floor(freq) + + return Series._from_data( + data={self.series.name: out_column}, index=self.series._index ) def strftime(self, date_format, *args, **kwargs): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index d666dfc0ec1..bf75badc06f 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1777,12 +1777,45 @@ def test_error_values(): ], ) @pytest.mark.parametrize("time_type", DATETIME_TYPES) -@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) def test_ceil(data, time_type, resolution): - ps = pd.Series(data, dtype=time_type) - gs = cudf.from_pandas(ps) + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", + [ + ( + [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:10", + "2000-12-31 04:00:05", + "1900-02-28 07:00:06", + "1800-03-14 07:30:20", + "2100-03-14 07:30:20", + "1970-01-01 00:00:09", + "1969-12-31 12:59:10", + ] + ) + ], +) +@pytest.mark.parametrize("time_type", DATETIME_TYPES) +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) +def test_floor(data, time_type, resolution): + + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() + + expect = ps.dt.floor(resolution) + got = gs.dt.floor(resolution) + assert_eq(expect, got)