From 1511cc7f8f9b9d19c7ec073062c91deed3112873 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 29 Oct 2021 10:34:38 -0500 Subject: [PATCH] Revert "Implement Series.datetime.floor (#9488)" This reverts commit 201f750958cf6f36ba0e0983849107e7b18fc0bd. --- cpp/include/cudf/datetime.hpp | 93 +---------------------- cpp/src/datetime/datetime_ops.cu | 97 +----------------------- cpp/tests/datetime/datetime_ops_test.cpp | 56 -------------- docs/cudf/source/api_docs/series.rst | 2 - python/cudf/cudf/_lib/cpp/datetime.pxd | 17 +---- python/cudf/cudf/_lib/datetime.pyx | 27 ------- python/cudf/cudf/core/column/datetime.py | 3 - python/cudf/cudf/core/series.py | 61 --------------- python/cudf/cudf/tests/test_datetime.py | 33 +------- 9 files changed, 6 insertions(+), 383 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 71e5968bf07..d67984daa7c 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -378,96 +378,5 @@ std::unique_ptr ceil_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Round down to the nearest day - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_day( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest hour - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_hour( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest minute - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_minute( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest second - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_second( - cudf::column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest millisecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_millisecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest microsecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_microsecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Round down to the nearest nanosecond - * - * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. - * - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - * @return cudf::column of the same datetime resolution as the input column - */ -std::unique_ptr floor_nanosecond( - column_view const& column, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 7cbacbd0a14..6e892b3e461 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -116,35 +116,6 @@ struct ceil_timestamp { } }; -template -struct floor_timestamp { - template - CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const - { - using namespace cuda::std::chrono; - // want to use this with D, H, T (minute), S, L (millisecond), U - switch (COMPONENT) { - case datetime_component::DAY: - return time_point_cast(floor(ts)); - case datetime_component::HOUR: - return time_point_cast(floor(ts)); - case datetime_component::MINUTE: - return time_point_cast(floor(ts)); - case datetime_component::SECOND: - return time_point_cast(floor(ts)); - case datetime_component::MILLISECOND: - return time_point_cast(floor(ts)); - case datetime_component::MICROSECOND: - return time_point_cast(floor(ts)); - case datetime_component::NANOSECOND: - return time_point_cast(floor(ts)); - default: cudf_assert(false && "Unexpected resolution"); - } - - return {}; - } -}; - // Number of days until month indexed by leap year and month (0-based index) static __device__ int16_t const days_until_month[2][13] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years @@ -225,7 +196,7 @@ struct is_leap_year_op { // Specific function for applying ceil/floor date ops template -struct dispatch_ceil_or_floor { +struct dispatch_ceil { template std::enable_if_t(), std::unique_ptr> operator()( cudf::column_view const& column, @@ -432,19 +403,7 @@ std::unique_ptr ceil_general(column_view const& column, rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - column.type(), dispatch_ceil_or_floor>{}, column, stream, mr); -} - -template -std::unique_ptr floor_general(column_view const& column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return cudf::type_dispatcher(column.type(), - dispatch_ceil_or_floor>{}, - column, - stream, - mr); + column.type(), dispatch_ceil>{}, column, stream, mr); } std::unique_ptr extract_year(column_view const& column, @@ -601,58 +560,6 @@ std::unique_ptr ceil_nanosecond(column_view const& column, column, rmm::cuda_stream_default, mr); } -std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_millisecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_microsecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - -std::unique_ptr floor_nanosecond(column_view const& column, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::floor_general( - column, rmm::cuda_stream_default, mr); -} - std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index d2703b8acd0..1d3e87279e5 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -827,60 +827,4 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } -TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) -{ - using T = TypeParam; - using namespace cudf::test; - using namespace cudf::datetime; - using namespace cuda::std::chrono; - - auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT - auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT - - auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop_)); - - auto host_val = to_host(input); - thrust::host_vector timestamps = host_val.first; - - thrust::host_vector floored_day(timestamps.size()); - thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_day = fixed_width_column_wrapper(floored_day.begin(), - floored_day.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); - - thrust::host_vector floored_hour(timestamps.size()); - thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_hour = fixed_width_column_wrapper( - floored_hour.begin(), floored_hour.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); - - std::vector floored_minute(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_minute = fixed_width_column_wrapper( - floored_minute.begin(), floored_minute.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); - - std::vector floored_second(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_second = fixed_width_column_wrapper( - floored_second.begin(), floored_second.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); - - std::vector floored_millisecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_millisecond = fixed_width_column_wrapper( - floored_millisecond.begin(), floored_millisecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); -} - CUDF_TEST_PROGRAM_MAIN() diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index c32a4a22ef2..95cf58adf0e 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -297,8 +297,6 @@ Datetime methods strftime isocalendar - ceil - floor Timedelta properties diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 71064a555c9..c27eb324008 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -22,22 +22,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_microsecond( const column_view& column ) except + - cdef unique_ptr[column] ceil_nanosecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_day(const column_view& column) except + - cdef unique_ptr[column] floor_hour(const column_view& column) except + - cdef unique_ptr[column] floor_minute(const column_view& column) except + - cdef unique_ptr[column] floor_second(const column_view& column) except + - cdef unique_ptr[column] floor_millisecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_microsecond( - const column_view& column - ) except + - cdef unique_ptr[column] floor_nanosecond( - const column_view& column - ) except + + cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 580e55a4308..4921d1b4ace 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -86,33 +86,6 @@ def ceil_datetime(Column col, object field): return result -def floor_datetime(Column col, object field): - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - - with nogil: - # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html - if field == "D": - c_result = move(libcudf_datetime.floor_day(col_view)) - elif field == "H": - c_result = move(libcudf_datetime.floor_hour(col_view)) - elif field == "T": - c_result = move(libcudf_datetime.floor_minute(col_view)) - elif field == "S": - c_result = move(libcudf_datetime.floor_second(col_view)) - elif field == "L": - c_result = move(libcudf_datetime.floor_millisecond(col_view)) - elif field == "U": - c_result = move(libcudf_datetime.floor_microsecond(col_view)) - elif field == "N": - c_result = move(libcudf_datetime.floor_nanosecond(col_view)) - else: - raise ValueError(f"Invalid resolution: '{field}'") - - result = Column.from_unique_ptr(move(c_result)) - return result - - def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 7492c127a67..68379002e6b 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -225,9 +225,6 @@ def get_dt_field(self, field: str) -> ColumnBase: def ceil(self, field: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, field) - def floor(self, field: str) -> ColumnBase: - return libcudf.datetime.floor_datetime(self, field) - def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): return other diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 77c84f63af8..d918a005d40 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5008,73 +5008,12 @@ def _get_dt_field(self, field): ) def ceil(self, field): - """ - Perform ceil operation on the data to the specified freq. - - Parameters - ---------- - field : str - One of ["D", "H", "T", "S", "L", "U", "N"] - See `frequency aliases `_ - for more details on these aliases. - - Returns - ------- - Series - Series with the same index for a Series. - - Examples - -------- - >>> import cudf - >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", - ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") - >>> t.dt.ceil("T") - 0 2001-01-01 00:05:00 - 1 2001-01-01 00:05:00 - 2 2001-01-01 00:06:00 - dtype: datetime64[ns] - """ out_column = self.series._column.ceil(field) return Series( data=out_column, index=self.series._index, name=self.series.name ) - def floor(self, field): - """ - Perform floor operation on the data to the specified freq. - - Parameters - ---------- - field : str - One of ["D", "H", "T", "S", "L", "U", "N"] - See `frequency aliases `_ - for more details on these aliases. - - Returns - ------- - Series - Series with the same index for a Series. - - Examples - -------- - >>> import cudf - >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", - ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") - >>> t.dt.floor("T") - 0 2001-01-01 00:04:00 - 1 2001-01-01 00:04:00 - 2 2001-01-01 00:05:00 - dtype: datetime64[ns] - """ - out_column = self.series._column.floor(field) - - return Series( - data=out_column, index=self.series._index, name=self.series.name - ) - def strftime(self, date_format, *args, **kwargs): """ Convert to Series using specified ``date_format``. diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 85f1055da89..3bbac217283 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1625,38 +1625,9 @@ def test_error_values(): @pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) def test_ceil(data, time_type, resolution): - gs = cudf.Series(data, dtype=time_type) - ps = gs.to_pandas() + ps = pd.Series(data, dtype=time_type) + gs = cudf.from_pandas(ps) expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution) assert_eq(expect, got) - - -@pytest.mark.parametrize( - "data", - [ - ( - [ - "2020-05-31 08:00:00", - "1999-12-31 18:40:10", - "2000-12-31 04:00:05", - "1900-02-28 07:00:06", - "1800-03-14 07:30:20", - "2100-03-14 07:30:20", - "1970-01-01 00:00:09", - "1969-12-31 12:59:10", - ] - ) - ], -) -@pytest.mark.parametrize("time_type", DATETIME_TYPES) -@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) -def test_floor(data, time_type, resolution): - - gs = cudf.Series(data, dtype=time_type) - ps = gs.to_pandas() - - expect = ps.dt.floor(resolution) - got = gs.dt.floor(resolution) - assert_eq(expect, got)