From edc058f0e250e4fe6a1cd3829683c13b6a394373 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 17 Oct 2022 14:48:21 -0500 Subject: [PATCH] Add `nanosecond` & `microsecond` to `DatetimeProperties` (#11911) This PR: - [x] Implemented `extract_milli_second`, `extract_micro_second` and `extract_nano_second` in libcudf. - [x] Added `nanosecond` and `microsecond` in `DatetimeProperties` & `DatetimeIndex`. - [x] Updated docs - [x] Added & modified tests Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - David Wendt (https://github.com/davidwendt) - Matthew Roeschke (https://github.com/mroeschke) - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) - https://github.com/nvdbaranec - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11911 --- cpp/include/cudf/datetime.hpp | 71 ++++++++++++++++--- cpp/include/cudf/detail/datetime.hpp | 33 +++++++++ cpp/src/datetime/datetime_ops.cu | 76 +++++++++++++++++++-- cpp/tests/datetime/datetime_ops_test.cpp | 54 +++++++++++++++ docs/cudf/source/api_docs/index_objects.rst | 9 ++- docs/cudf/source/api_docs/series.rst | 28 ++++---- python/cudf/cudf/_lib/cpp/datetime.pxd | 9 +++ python/cudf/cudf/_lib/datetime.pyx | 12 ++++ python/cudf/cudf/core/index.py | 50 ++++++++++++++ python/cudf/cudf/core/series.py | 61 ++++++++++++++++- python/cudf/cudf/tests/test_datetime.py | 10 ++- 11 files changed, 377 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index a8955ffb17c..fb04336871f 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -36,7 +36,7 @@ namespace datetime { */ /** - * @brief Extracts year from any date time type and returns an int16_t + * @brief Extracts year from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -50,7 +50,7 @@ std::unique_ptr extract_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts month from any date time type and returns an int16_t + * @brief Extracts month from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -64,7 +64,7 @@ std::unique_ptr extract_month( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts day from any date time type and returns an int16_t + * @brief Extracts day from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -78,7 +78,7 @@ std::unique_ptr extract_day( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts day from any date time type and returns an int16_t + * @brief Extracts day from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -92,7 +92,7 @@ std::unique_ptr extract_weekday( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts hour from any date time type and returns an int16_t + * @brief Extracts hour from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -106,7 +106,7 @@ std::unique_ptr extract_hour( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts minute from any date time type and returns an int16_t + * @brief Extracts minute from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -120,7 +120,7 @@ std::unique_ptr extract_minute( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts second from any date time type and returns an int16_t + * @brief Extracts second from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -133,6 +133,57 @@ std::unique_ptr extract_second( cudf::column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Extracts millisecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration. + * For example, the millisecond fraction of 1.234567890 seconds is 234. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t milliseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_millisecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Extracts microsecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration. + * For example, the microsecond fraction of 1.234567890 seconds is 567. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t microseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_microsecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Extracts nanosecond fraction from any datetime type and returns an int16_t + * cudf::column. + * + * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration. + * For example, the nanosecond fraction of 1.234567890 seconds is 890. + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t nanoseconds + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_nanosecond_fraction( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group /** * @addtogroup datetime_compute @@ -141,7 +192,7 @@ std::unique_ptr extract_second( */ /** - * @brief Computes the last day of the month in date time type and returns a TIMESTAMP_DAYS + * @brief Computes the last day of the month in datetime type and returns a TIMESTAMP_DAYS * cudf::column. * * @param column cudf::column_view of the input datetime values @@ -169,7 +220,7 @@ std::unique_ptr day_of_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Adds or subtracts a number of months from the date time type and returns a + * @brief Adds or subtracts a number of months from the datetime type and returns a * timestamp column that is of the same type as the input `timestamps` column. * * For a given row, if the `timestamps` or the `months` column value is null, @@ -204,7 +255,7 @@ std::unique_ptr add_calendrical_months( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Adds or subtracts a number of months from the date time type and returns a + * @brief Adds or subtracts a number of months from the datetime type and returns a * timestamp column that is of the same type as the input `timestamps` column. * * For a given row, if the `timestamps` value is null, the output for that row is null. diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 7a2545fbdcf..d17e641533e 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -94,6 +94,39 @@ std::unique_ptr extract_second( rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_millisecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_microsecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr extract_nanosecond_fraction( + cudf::column_view const& column, + rmm::cuda_stream_view stream = cudf::default_stream_value, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *) * diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index ee026d6c395..e89792525c9 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -76,9 +76,22 @@ struct extract_component_operator { if (time_since_midnight.count() < 0) { time_since_midnight += days(1); } - auto hrs_ = duration_cast(time_since_midnight); - auto mins_ = duration_cast(time_since_midnight - hrs_); - auto secs_ = duration_cast(time_since_midnight - hrs_ - mins_); + auto const hrs_ = [&] { return duration_cast(time_since_midnight); }; + auto const mins_ = [&] { return duration_cast(time_since_midnight) - hrs_(); }; + auto const secs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_(); + }; + auto const millisecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_(); + }; + auto const microsecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_() - + millisecs_(); + }; + auto const nanosecs_ = [&] { + return duration_cast(time_since_midnight) - hrs_() - mins_() - secs_() - + millisecs_() - microsecs_(); + }; switch (Component) { case datetime_component::YEAR: @@ -89,9 +102,12 @@ struct extract_component_operator { return static_cast(year_month_day(days_since_epoch).day()); case datetime_component::WEEKDAY: return year_month_weekday(days_since_epoch).weekday().iso_encoding(); - case datetime_component::HOUR: return hrs_.count(); - case datetime_component::MINUTE: return mins_.count(); - case datetime_component::SECOND: return secs_.count(); + case datetime_component::HOUR: return hrs_().count(); + case datetime_component::MINUTE: return mins_().count(); + case datetime_component::SECOND: return secs_().count(); + case datetime_component::MILLISECOND: return millisecs_().count(); + case datetime_component::MICROSECOND: return microsecs_().count(); + case datetime_component::NANOSECOND: return nanosecs_().count(); default: return 0; } } @@ -495,6 +511,33 @@ std::unique_ptr extract_second(column_view const& column, cudf::type_id::INT16>(column, stream, mr); } +std::unique_ptr extract_millisecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + +std::unique_ptr extract_microsecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + +std::unique_ptr extract_nanosecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); +} + std::unique_ptr last_day_of_month(column_view const& column, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -607,6 +650,27 @@ std::unique_ptr extract_second(column_view const& column, return detail::extract_second(column, cudf::default_stream_value, mr); } +std::unique_ptr extract_millisecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_millisecond_fraction(column, cudf::default_stream_value, mr); +} + +std::unique_ptr extract_microsecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_microsecond_fraction(column, cudf::default_stream_value, mr); +} + +std::unique_ptr extract_nanosecond_fraction(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_nanosecond_fraction(column, cudf::default_stream_value, mr); +} + std::unique_ptr last_day_of_month(column_view const& column, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 2898a649e36..c6d36b2aa6e 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -60,6 +60,9 @@ TYPED_TEST(NonTimestampTest, TestThrowsOnNonTimestamp) EXPECT_THROW(extract_hour(col), cudf::logic_error); EXPECT_THROW(extract_minute(col), cudf::logic_error); EXPECT_THROW(extract_second(col), cudf::logic_error); + EXPECT_THROW(extract_millisecond_fraction(col), cudf::logic_error); + EXPECT_THROW(extract_microsecond_fraction(col), cudf::logic_error); + EXPECT_THROW(extract_nanosecond_fraction(col), cudf::logic_error); EXPECT_THROW(last_day_of_month(col), cudf::logic_error); EXPECT_THROW(day_of_year(col), cudf::logic_error); EXPECT_THROW(add_calendrical_months( @@ -97,12 +100,21 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) 1674631932929 // 2023-01-25 07:32:12.929 GMT }; + auto timestamps_ns = + cudf::test::fixed_width_column_wrapper{ + -23324234, // 1969-12-31 23:59:59.976675766 GMT + 23432424, // 1970-01-01 00:00:00.023432424 GMT + 987234623 // 1970-01-01 00:00:00.987234623 GMT + }; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_D), fixed_width_column_wrapper{1965, 2018, 2023}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_s), fixed_width_column_wrapper{1965, 2018, 2023}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ms), fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ns), + fixed_width_column_wrapper{1969, 1970, 1970}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_D), fixed_width_column_wrapper{10, 7, 1}); @@ -110,6 +122,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{10, 7, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ms), fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ns), + fixed_width_column_wrapper{12, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_D), fixed_width_column_wrapper{26, 4, 25}); @@ -117,6 +131,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{26, 4, 25}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ms), fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ns), + fixed_width_column_wrapper{31, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_D), fixed_width_column_wrapper{2, 3, 3}); @@ -124,6 +140,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{2, 3, 3}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms), fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms), + fixed_width_column_wrapper{2, 3, 3}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -131,6 +149,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{14, 12, 7}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ms), fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ns), + fixed_width_column_wrapper{23, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -138,6 +158,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{1, 0, 32}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ms), fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns), + fixed_width_column_wrapper{59, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_D), fixed_width_column_wrapper{0, 0, 0}); @@ -145,6 +167,35 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{12, 0, 12}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_ms), fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ms), + fixed_width_column_wrapper{762, 0, 929}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ns), + fixed_width_column_wrapper{976, 23, 987}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ms), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ns), + fixed_width_column_wrapper{675, 432, 234}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_D), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_s), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ms), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns), + fixed_width_column_wrapper{766, 424, 623}); } template @@ -175,6 +226,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestEmptyColumns) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), int16s); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), int16s); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps), int16s); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps), int16s); } TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedDatetimeComponents) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 8e0e3bbd411..6edd15e7176 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -262,12 +262,15 @@ Time/date components DatetimeIndex.hour DatetimeIndex.minute DatetimeIndex.second - DatetimeIndex.dayofweek - DatetimeIndex.dayofyear + DatetimeIndex.microsecond + DatetimeIndex.nanosecond DatetimeIndex.day_of_year + DatetimeIndex.dayofyear + DatetimeIndex.dayofweek DatetimeIndex.weekday - DatetimeIndex.is_leap_year DatetimeIndex.quarter + DatetimeIndex.is_leap_year + DatetimeIndex.isocalendar Time-specific operations diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 53042041f6d..842319338b3 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -260,25 +260,27 @@ Datetime properties .. autosummary:: :toctree: api/ + year + month day - dayofweek - dayofyear - days_in_month - day_of_year hour minute - month second + microsecond + nanosecond + dayofweek weekday - year - is_leap_year + dayofyear + day_of_year + quarter is_month_start is_month_end is_quarter_start is_quarter_end is_year_start is_year_end - quarter + is_leap_year + days_in_month Datetime methods ^^^^^^^^^^^^^^^^ @@ -286,11 +288,11 @@ Datetime methods .. autosummary:: :toctree: api/ - strftime isocalendar - ceil - floor + strftime round + floor + ceil Timedelta properties @@ -300,11 +302,11 @@ Timedelta properties .. autosummary:: :toctree: api/ - components days + seconds microseconds nanoseconds - seconds + components .. _api.series.str: .. include:: string_handling.rst diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 74addb87357..d03587745e1 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -15,6 +15,15 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_hour(const column_view& column) except + cdef unique_ptr[column] extract_minute(const column_view& column) except + cdef unique_ptr[column] extract_second(const column_view& column) except + + cdef unique_ptr[column] extract_millisecond_fraction( + const column_view& column + ) except + + cdef unique_ptr[column] extract_microsecond_fraction( + const column_view& column + ) except + + cdef unique_ptr[column] extract_nanosecond_fraction( + const column_view& column + ) except + ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": DAY "cudf::datetime::rounding_frequency::DAY" diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index e218400a2db..cb0a245b915 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -49,6 +49,18 @@ def extract_datetime_component(Column col, object field): c_result = move(libcudf_datetime.extract_minute(col_view)) elif field == "second": c_result = move(libcudf_datetime.extract_second(col_view)) + elif field == "millisecond": + c_result = move( + libcudf_datetime.extract_millisecond_fraction(col_view) + ) + elif field == "microsecond": + c_result = move( + libcudf_datetime.extract_microsecond_fraction(col_view) + ) + elif field == "nanosecond": + c_result = move( + libcudf_datetime.extract_nanosecond_fraction(col_view) + ) elif field == "day_of_year": c_result = move(libcudf_datetime.day_of_year(col_view)) else: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5b101f74664..0628497fc29 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2051,6 +2051,56 @@ def second(self): """ return self._get_dt_field("second") + @property # type: ignore + @_cudf_nvtx_annotate + def microsecond(self): + """ + The microseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2000-01-01", + ... periods=3, freq="us")) + >>> datetime_index + DatetimeIndex([ '2000-01-01 00:00:00', '2000-01-01 00:00:00.000001', + '2000-01-01 00:00:00.000002'], + dtype='datetime64[ns]') + >>> datetime_index.microsecond + Int32Index([0, 1, 2], dtype='int32') + """ # noqa: E501 + return as_index( + ( + self._values.get_dt_field("millisecond") + * cudf.Scalar(1000, dtype="int32") + ) + + self._values.get_dt_field("microsecond"), + name=self.name, + ) + + @property # type: ignore + @_cudf_nvtx_annotate + def nanosecond(self): + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2000-01-01", + ... periods=3, freq="ns")) + >>> datetime_index + DatetimeIndex([ '2000-01-01 00:00:00', + '2000-01-01 00:00:00.000000001', + '2000-01-01 00:00:00.000000002'], + dtype='datetime64[ns]') + >>> datetime_index.nanosecond + Int16Index([0, 1, 2], dtype='int16') + """ + return self._get_dt_field("nanosecond") + @property # type: ignore @_cudf_nvtx_annotate def weekday(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f11052096e3..7493202a3d1 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1206,7 +1206,8 @@ def __repr__(self): and not is_decimal_dtype(preprocess.dtype) and not is_struct_dtype(preprocess.dtype) ) or isinstance( - preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn + preprocess._column, + cudf.core.column.timedelta.TimeDeltaColumn, ): output = repr( preprocess.astype("O").fillna(cudf._NA_REP).to_pandas() @@ -3591,6 +3592,64 @@ def second(self): """ return self._get_dt_field("second") + @property # type: ignore + @_cudf_nvtx_annotate + def microsecond(self): + """ + The microseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range("2000-01-01", + ... periods=3, freq="us")) + >>> datetime_series + 0 2000-01-01 00:00:00.000000 + 1 2000-01-01 00:00:00.000001 + 2 2000-01-01 00:00:00.000002 + dtype: datetime64[ns] + >>> datetime_series.dt.microsecond + 0 0 + 1 1 + 2 2 + dtype: int32 + """ + return Series( + data=( + self.series._column.get_dt_field("millisecond") + * cudf.Scalar(1000, dtype="int32") + ) + + self.series._column.get_dt_field("microsecond"), + index=self.series._index, + name=self.series.name, + ) + + @property # type: ignore + @_cudf_nvtx_annotate + def nanosecond(self): + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range("2000-01-01", + ... periods=3, freq="ns")) + >>> datetime_series + 0 2000-01-01 00:00:00.000000000 + 1 2000-01-01 00:00:00.000000001 + 2 2000-01-01 00:00:00.000000002 + dtype: datetime64[ns] + >>> datetime_series.dt.nanosecond + 0 0 + 1 1 + 2 2 + dtype: int16 + """ + return self._get_dt_field("nanosecond") + @property # type: ignore @_cudf_nvtx_annotate def weekday(self): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 800a8aeeab5..bd3b3561701 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -28,7 +28,9 @@ def data1(): def data2(): - return pd.date_range("20010101", "20020215", freq="400h", name="times") + return pd.date_range( + "20010101", freq="243434324423423234N", name="times", periods=10 + ) def timeseries_us_data(): @@ -81,6 +83,8 @@ def numerical_data(): "hour", "minute", "second", + "microsecond", + "nanosecond", "weekday", "dayofweek", "dayofyear", @@ -172,7 +176,7 @@ def test_dt_ops(data): # libcudf doesn't respect timezones -@pytest.mark.parametrize("data", [data1()]) +@pytest.mark.parametrize("data", [data1(), data2()]) @pytest.mark.parametrize("field", fields) def test_dt_series(data, field): pd_data = pd.Series(data.copy()) @@ -182,7 +186,7 @@ def test_dt_series(data, field): assert_eq(base, test) -@pytest.mark.parametrize("data", [data1()]) +@pytest.mark.parametrize("data", [data1(), data2()]) @pytest.mark.parametrize("field", fields) def test_dt_index(data, field): pd_data = data.copy()