From 897a9eaebd8396728a1a91093554ba99ea3e85ba Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Tue, 4 Jan 2022 11:02:58 -0500 Subject: [PATCH] Refactoring ceil/round/floor code for datetime64 types (#9926) This PR is a follow up to #9820 where @bdice and @vyasr raised the point of having a design such that we avoid writing bunch of boilerplate code, which is common in the implementations of ceil/round/floor. The aim is to reduce the total number of functions, as well as have a cleaner design. Authors: - Mayank Anand (https://github.com/mayankanand007) Approvers: - Ashwin Srinath (https://github.com/shwina) - Karthikeyan (https://github.com/karthikeyann) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/9926 --- cpp/include/cudf/datetime.hpp | 278 +++-------------------- cpp/src/datetime/datetime_ops.cu | 242 ++------------------ cpp/tests/datetime/datetime_ops_test.cpp | 200 ++++++++-------- python/cudf/cudf/_lib/cpp/datetime.pxd | 53 ++--- python/cudf/cudf/_lib/datetime.pyx | 89 +++----- 5 files changed, 215 insertions(+), 647 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 17bea935dfd..117119cd40f 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -285,280 +285,66 @@ std::unique_ptr extract_quarter( cudf::column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** @} */ // end of group - -/** - * @brief Round up to the nearest day - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_day( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest hour - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_hour( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest minute - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_minute( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest second - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_second( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest millisecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_millisecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest microsecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_microsecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round up to the nearest nanosecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr ceil_nanosecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** - * @brief Round down to the nearest day - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @brief Fixed frequencies supported by datetime rounding functions ceil, floor, round. * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column */ -std::unique_ptr floor_day( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +enum class rounding_frequency : int32_t { + DAY, + HOUR, + MINUTE, + SECOND, + MILLISECOND, + MICROSECOND, + NANOSECOND +}; /** - * @brief Round down to the nearest hour + * @brief Round datetimes up to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values + * @param column cudf::column_view of the input datetime values. + * @param freq rounding_frequency indicating the frequency to round up to. * @param mr Device memory resource used to allocate device memory of the returned column. * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column + * @throw cudf::logic_error if input column datatype is not TIMESTAMP. + * @return cudf::column of the same datetime resolution as the input column. */ -std::unique_ptr floor_hour( +std::unique_ptr ceil_datetimes( cudf::column_view const& column, + rounding_frequency freq, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest minute + * @brief Round datetimes down to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values + * @param column cudf::column_view of the input datetime values. + * @param freq rounding_frequency indicating the frequency to round down to. * @param mr Device memory resource used to allocate device memory of the returned column. * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column + * @throw cudf::logic_error if input column datatype is not TIMESTAMP. + * @return cudf::column of the same datetime resolution as the input column. */ -std::unique_ptr floor_minute( +std::unique_ptr floor_datetimes( cudf::column_view const& column, + rounding_frequency freq, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest second + * @brief Round datetimes to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values + * @param column cudf::column_view of the input datetime values. + * @param freq rounding_frequency indicating the frequency to round to. * @param mr Device memory resource used to allocate device memory of the returned column. * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column + * @throw cudf::logic_error if input column datatype is not TIMESTAMP. + * @return cudf::column of the same datetime resolution as the input column. */ -std::unique_ptr floor_second( +std::unique_ptr round_datetimes( cudf::column_view const& column, + rounding_frequency freq, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Round down to the nearest millisecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_millisecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest microsecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_microsecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest nanosecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_nanosecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest day - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_day( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest hour - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_hour( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest minute - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_minute( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest second - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_second( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest millisecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_millisecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest microsecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_microsecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round to the nearest nanosecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr round_nanosecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** @} */ // end of group } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 85653b4f0be..1e9a39560b8 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -113,9 +113,9 @@ struct RoundFunctor { struct RoundingDispatcher { rounding_function round_kind; - datetime_component component; + rounding_frequency component; - RoundingDispatcher(rounding_function round_kind, datetime_component component) + RoundingDispatcher(rounding_function round_kind, rounding_frequency component) : round_kind(round_kind), component(component) { } @@ -124,25 +124,25 @@ struct RoundingDispatcher { CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const { switch (component) { - case datetime_component::DAY: + case rounding_frequency::DAY: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::HOUR: + case rounding_frequency::HOUR: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::MINUTE: + case rounding_frequency::MINUTE: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::SECOND: + case rounding_frequency::SECOND: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::MILLISECOND: + case rounding_frequency::MILLISECOND: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::MICROSECOND: + case rounding_frequency::MICROSECOND: return time_point_cast( RoundFunctor{}(round_kind, ts)); - case datetime_component::NANOSECOND: + case rounding_frequency::NANOSECOND: return time_point_cast( RoundFunctor{}(round_kind, ts)); default: cudf_assert(false && "Unsupported datetime rounding resolution."); @@ -234,7 +234,7 @@ struct dispatch_round { template std::enable_if_t(), std::unique_ptr> operator()( rounding_function round_kind, - datetime_component component, + rounding_frequency component, cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -420,7 +420,7 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu } std::unique_ptr round_general(rounding_function round_kind, - datetime_component component, + rounding_frequency component, column_view const& column, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -531,223 +531,31 @@ std::unique_ptr extract_quarter(column_view const& column, } // namespace detail -std::unique_ptr ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::DAY, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::HOUR, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::MINUTE, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::SECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr ceil_millisecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::MILLISECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr ceil_microsecond(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr ceil_datetimes(column_view const& column, + rounding_frequency freq, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::MICROSECOND, - column, - rmm::cuda_stream_default, - mr); + return detail::round_general( + detail::rounding_function::CEIL, freq, column, rmm::cuda_stream_default, mr); } -std::unique_ptr ceil_nanosecond(column_view const& column, +std::unique_ptr floor_datetimes(column_view const& column, + rounding_frequency freq, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::CEIL, - detail::datetime_component::NANOSECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::DAY, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::HOUR, - column, - rmm::cuda_stream_default, - mr); + return detail::round_general( + detail::rounding_function::FLOOR, freq, column, rmm::cuda_stream_default, mr); } -std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::MINUTE, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::SECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_millisecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::MILLISECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_microsecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::MICROSECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr floor_nanosecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::FLOOR, - detail::datetime_component::NANOSECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_day(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::DAY, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_hour(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::HOUR, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_minute(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::MINUTE, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_second(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::SECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_millisecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::MILLISECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_microsecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::MICROSECOND, - column, - rmm::cuda_stream_default, - mr); -} - -std::unique_ptr round_nanosecond(column_view const& column, - rmm::mr::device_memory_resource* mr) +std::unique_ptr round_datetimes(column_view const& column, + rounding_frequency freq, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_function::ROUND, - detail::datetime_component::NANOSECOND, - column, - rmm::cuda_stream_default, - mr); + return detail::round_general( + detail::rounding_function::ROUND, freq, column, rmm::cuda_stream_default, mr); } std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 62b8425704f..655fbf5679b 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -347,78 +347,6 @@ TEST_F(BasicDatetimeOpsTest, TestLastDayOfMonthWithDate) verbosity); } -TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime) -{ - using T = TypeParam; - using namespace cudf::test; - using namespace cudf::datetime; - using namespace cuda::std::chrono; - - auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT - auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT - - auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); - - auto host_val = to_host(input); - thrust::host_vector timestamps = host_val.first; - - thrust::host_vector ceiled_day(timestamps.size()); - thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_day = - fixed_width_column_wrapper(ceiled_day.begin(), ceiled_day.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_day(input), expected_day); - - thrust::host_vector ceiled_hour(timestamps.size()); - thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_hour = fixed_width_column_wrapper(ceiled_hour.begin(), - ceiled_hour.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_hour(input), expected_hour); - - std::vector ceiled_minute(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_minute = fixed_width_column_wrapper( - ceiled_minute.begin(), ceiled_minute.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_minute(input), expected_minute); - - std::vector ceiled_second(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_second = fixed_width_column_wrapper( - ceiled_second.begin(), ceiled_second.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_second(input), expected_second); - - std::vector ceiled_millisecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_millisecond = fixed_width_column_wrapper( - ceiled_millisecond.begin(), ceiled_millisecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_millisecond(input), expected_millisecond); - - std::vector ceiled_microsecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), ceiled_microsecond.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_microsecond = fixed_width_column_wrapper( - ceiled_microsecond.begin(), ceiled_microsecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_microsecond(input), expected_microsecond); - - std::vector ceiled_nanosecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), ceiled_nanosecond.begin(), [](auto i) { - return time_point_cast(ceil(i)); - }); - auto expected_nanosecond = fixed_width_column_wrapper( - ceiled_nanosecond.begin(), ceiled_nanosecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_nanosecond(input), expected_nanosecond); -} - TEST_F(BasicDatetimeOpsTest, TestDayOfYearWithDate) { using namespace cudf::test; @@ -841,7 +769,7 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } -TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) +TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime) { using T = TypeParam; using namespace cudf::test; @@ -851,10 +779,85 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT - auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + auto const input = + generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + auto const timestamps = to_host(input).first; + + std::vector ceiled_day(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_day = + fixed_width_column_wrapper(ceiled_day.begin(), ceiled_day.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::DAY), expected_day); + + std::vector ceiled_hour(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_hour = fixed_width_column_wrapper(ceiled_hour.begin(), + ceiled_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::HOUR), expected_hour); + + std::vector ceiled_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + ceiled_minute.begin(), ceiled_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MINUTE), + expected_minute); + + std::vector ceiled_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_second = fixed_width_column_wrapper( + ceiled_second.begin(), ceiled_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::SECOND), + expected_second); + + std::vector ceiled_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + ceiled_millisecond.begin(), ceiled_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MILLISECOND), + expected_millisecond); + + std::vector ceiled_microsecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_microsecond.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_microsecond = fixed_width_column_wrapper( + ceiled_microsecond.begin(), ceiled_microsecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MICROSECOND), + expected_microsecond); + + std::vector ceiled_nanosecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), ceiled_nanosecond.begin(), [](auto i) { + return time_point_cast(ceil(i)); + }); + auto expected_nanosecond = fixed_width_column_wrapper( + ceiled_nanosecond.begin(), ceiled_nanosecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::NANOSECOND), + expected_nanosecond); +} + +TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) +{ + using T = TypeParam; + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; - auto host_val = to_host(input); - thrust::host_vector timestamps = host_val.first; + auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT + auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + + auto const input = + generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + auto const timestamps = to_host(input).first; std::vector floored_day(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { @@ -862,7 +865,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_day = fixed_width_column_wrapper(floored_day.begin(), floored_day.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::DAY), expected_day); std::vector floored_hour(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { @@ -870,7 +873,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_hour = fixed_width_column_wrapper( floored_hour.begin(), floored_hour.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::HOUR), expected_hour); std::vector floored_minute(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { @@ -878,7 +881,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_minute = fixed_width_column_wrapper( floored_minute.begin(), floored_minute.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MINUTE), + expected_minute); std::vector floored_second(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { @@ -886,7 +890,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_second = fixed_width_column_wrapper( floored_second.begin(), floored_second.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::SECOND), + expected_second); std::vector floored_millisecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { @@ -894,7 +899,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_millisecond = fixed_width_column_wrapper( floored_millisecond.begin(), floored_millisecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MILLISECOND), + expected_millisecond); std::vector floored_microsecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_microsecond.begin(), [](auto i) { @@ -902,7 +908,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_microsecond = fixed_width_column_wrapper( floored_microsecond.begin(), floored_microsecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_microsecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MICROSECOND), + expected_microsecond); std::vector floored_nanosecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), floored_nanosecond.begin(), [](auto i) { @@ -910,7 +917,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) }); auto expected_nanosecond = fixed_width_column_wrapper( floored_nanosecond.begin(), floored_nanosecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_nanosecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::NANOSECOND), + expected_nanosecond); } TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) @@ -923,10 +931,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT - auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); - - auto host_val = to_host(input); - auto timestamps = host_val.first; + auto const input = + generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + auto const timestamps = to_host(input).first; std::vector rounded_day(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_day.begin(), [](auto i) { @@ -934,7 +941,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_day = fixed_width_column_wrapper(rounded_day.begin(), rounded_day.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_day(input), expected_day); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::DAY), expected_day); std::vector rounded_hour(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_hour.begin(), [](auto i) { @@ -942,7 +949,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_hour = fixed_width_column_wrapper( rounded_hour.begin(), rounded_hour.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_hour(input), expected_hour); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::HOUR), expected_hour); std::vector rounded_minute(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_minute.begin(), [](auto i) { @@ -950,7 +957,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_minute = fixed_width_column_wrapper( rounded_minute.begin(), rounded_minute.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_minute(input), expected_minute); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MINUTE), + expected_minute); std::vector rounded_second(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_second.begin(), [](auto i) { @@ -958,7 +966,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_second = fixed_width_column_wrapper( rounded_second.begin(), rounded_second.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_second); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::SECOND), + expected_second); std::vector rounded_millisecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_millisecond.begin(), [](auto i) { @@ -966,7 +975,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_millisecond = fixed_width_column_wrapper( rounded_millisecond.begin(), rounded_millisecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_millisecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MILLISECOND), + expected_millisecond); std::vector rounded_microsecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_microsecond.begin(), [](auto i) { @@ -974,7 +984,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_microsecond = fixed_width_column_wrapper( rounded_microsecond.begin(), rounded_microsecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_microsecond(input), expected_microsecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MICROSECOND), + expected_microsecond); std::vector rounded_nanosecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_nanosecond.begin(), [](auto i) { @@ -982,7 +993,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_nanosecond = fixed_width_column_wrapper( rounded_nanosecond.begin(), rounded_nanosecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_nanosecond(input), expected_nanosecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::NANOSECOND), + expected_nanosecond); } CUDF_TEST_PROGRAM_MAIN() diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index f75b39ce6ee..498fc313cf9 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -13,45 +13,26 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_hour(const column_view& column) except + cdef unique_ptr[column] extract_minute(const column_view& column) except + cdef unique_ptr[column] extract_second(const column_view& column) except + - cdef unique_ptr[column] ceil_day(const column_view& column) except + - cdef unique_ptr[column] ceil_hour(const column_view& column) except + - cdef unique_ptr[column] ceil_minute(const column_view& column) except + - cdef unique_ptr[column] ceil_second(const column_view& column) except + - cdef unique_ptr[column] ceil_millisecond( - const column_view& column - ) except + - cdef unique_ptr[column] ceil_microsecond( - const column_view& column - ) except + - cdef unique_ptr[column] ceil_nanosecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_day(const column_view& column) except + - cdef unique_ptr[column] floor_hour(const column_view& column) except + - cdef unique_ptr[column] floor_minute(const column_view& column) except + - cdef unique_ptr[column] floor_second(const column_view& column) except + - cdef unique_ptr[column] floor_millisecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_microsecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_nanosecond( - const column_view& column - ) except + - cdef unique_ptr[column] round_day(const column_view& column) except + - cdef unique_ptr[column] round_hour(const column_view& column) except + - cdef unique_ptr[column] round_minute(const column_view& column) except + - cdef unique_ptr[column] round_second(const column_view& column) except + - cdef unique_ptr[column] round_millisecond( - const column_view& column + + ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": + DAY "cudf::datetime::rounding_frequency::DAY" + HOUR "cudf::datetime::rounding_frequency::HOUR" + MINUTE "cudf::datetime::rounding_frequency::MINUTE" + SECOND "cudf::datetime::rounding_frequency::SECOND" + MILLISECOND "cudf::datetime::rounding_frequency::MILLISECOND" + MICROSECOND "cudf::datetime::rounding_frequency::MICROSECOND" + NANOSECOND "cudf::datetime::rounding_frequency::NANOSECOND" + + cdef unique_ptr[column] ceil_datetimes( + const column_view& column, rounding_frequency freq ) except + - cdef unique_ptr[column] round_microsecond( - const column_view& column + cdef unique_ptr[column] floor_datetimes( + const column_view& column, rounding_frequency freq ) except + - cdef unique_ptr[column] round_nanosecond( - const column_view& column + cdef unique_ptr[column] round_datetimes( + const column_view& column, rounding_frequency freq ) except + + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3c05a17c268..e41016645cd 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -62,82 +62,63 @@ def extract_datetime_component(Column col, object field): return result -def ceil_datetime(Column col, object field): +cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq): + cdef libcudf_datetime.rounding_frequency freq_val + + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html + if freq == "D": + freq_val = libcudf_datetime.rounding_frequency.DAY + elif freq == "H": + freq_val = libcudf_datetime.rounding_frequency.HOUR + elif freq in ("T", "min"): + freq_val = libcudf_datetime.rounding_frequency.MINUTE + elif freq == "S": + freq_val = libcudf_datetime.rounding_frequency.SECOND + elif freq in ("L", "ms"): + freq_val = libcudf_datetime.rounding_frequency.MILLISECOND + elif freq in ("U", "us"): + freq_val = libcudf_datetime.rounding_frequency.MICROSECOND + elif freq == "N": + freq_val = libcudf_datetime.rounding_frequency.NANOSECOND + else: + raise ValueError(f"Invalid resolution: '{freq}'") + return freq_val + + +def ceil_datetime(Column col, object freq): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() + cdef libcudf_datetime.rounding_frequency freq_val = \ + _get_rounding_frequency(freq) with nogil: - # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html - if field == "D": - c_result = move(libcudf_datetime.ceil_day(col_view)) - elif field == "H": - c_result = move(libcudf_datetime.ceil_hour(col_view)) - elif field == "T" or field == "min": - c_result = move(libcudf_datetime.ceil_minute(col_view)) - elif field == "S": - c_result = move(libcudf_datetime.ceil_second(col_view)) - elif field == "L" or field == "ms": - c_result = move(libcudf_datetime.ceil_millisecond(col_view)) - elif field == "U" or field == "us": - c_result = move(libcudf_datetime.ceil_microsecond(col_view)) - elif field == "N": - c_result = move(libcudf_datetime.ceil_nanosecond(col_view)) - else: - raise ValueError(f"Invalid resolution: '{field}'") + c_result = move(libcudf_datetime.ceil_datetimes(col_view, freq_val)) result = Column.from_unique_ptr(move(c_result)) return result -def floor_datetime(Column col, object field): +def floor_datetime(Column col, object freq): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() + cdef libcudf_datetime.rounding_frequency freq_val = \ + _get_rounding_frequency(freq) with nogil: - # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html - if field == "D": - c_result = move(libcudf_datetime.floor_day(col_view)) - elif field == "H": - c_result = move(libcudf_datetime.floor_hour(col_view)) - elif field == "T" or field == "min": - c_result = move(libcudf_datetime.floor_minute(col_view)) - elif field == "S": - c_result = move(libcudf_datetime.floor_second(col_view)) - elif field == "L" or field == "ms": - c_result = move(libcudf_datetime.floor_millisecond(col_view)) - elif field == "U" or field == "us": - c_result = move(libcudf_datetime.floor_microsecond(col_view)) - elif field == "N": - c_result = move(libcudf_datetime.floor_nanosecond(col_view)) - else: - raise ValueError(f"Invalid resolution: '{field}'") + c_result = move(libcudf_datetime.floor_datetimes(col_view, freq_val)) result = Column.from_unique_ptr(move(c_result)) return result -def round_datetime(Column col, object field): +def round_datetime(Column col, object freq): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() + cdef libcudf_datetime.rounding_frequency freq_val = \ + _get_rounding_frequency(freq) with nogil: - # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html - if field == "D": - c_result = move(libcudf_datetime.round_day(col_view)) - elif field == "H": - c_result = move(libcudf_datetime.round_hour(col_view)) - elif field == "T" or field == "min": - c_result = move(libcudf_datetime.round_minute(col_view)) - elif field == "S": - c_result = move(libcudf_datetime.round_second(col_view)) - elif field == "L" or field == "ms": - c_result = move(libcudf_datetime.round_millisecond(col_view)) - elif field == "U" or field == "us": - c_result = move(libcudf_datetime.round_microsecond(col_view)) - elif field == "N": - c_result = move(libcudf_datetime.round_nanosecond(col_view)) - else: - raise ValueError(f"Invalid resolution: '{field}'") + c_result = move(libcudf_datetime.round_datetimes(col_view, freq_val)) result = Column.from_unique_ptr(move(c_result)) return result