From f13178bbad2eac4454b0a264c2b724449de6450a Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 2 Dec 2021 19:45:16 +0000 Subject: [PATCH 01/17] added series.dt.floor --- cpp/include/cudf/datetime.hpp | 91 ++++++++++++++++++++++++ cpp/src/datetime/datetime_ops.cu | 78 +++++++++++++++++++- cpp/tests/datetime/datetime_ops_test.cpp | 72 +++++++++++++++++++ docs/cudf/source/api_docs/series.rst | 1 + python/cudf/cudf/_lib/cpp/datetime.pxd | 13 ++++ python/cudf/cudf/_lib/datetime.pyx | 27 +++++++ python/cudf/cudf/core/column/datetime.py | 3 + python/cudf/cudf/core/series.py | 35 +++++++++ python/cudf/cudf/tests/test_datetime.py | 31 ++++++++ 9 files changed, 349 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 71e5968bf07..489ffb25ffe 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -469,5 +469,96 @@ std::unique_ptr floor_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Round down to the nearest day + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_day( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest hour + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_hour( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest minute + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_minute( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest second + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_second( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest millisecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_millisecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest microsecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_microsecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest nanosecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr round_nanosecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 717bd7ac0a8..382cc3653d3 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -54,7 +54,7 @@ enum class datetime_component { NANOSECOND }; -enum class rounding_kind { CEIL, FLOOR }; +enum class rounding_kind { CEIL, FLOOR, ROUND }; template struct extract_component_operator { @@ -100,6 +100,7 @@ struct RoundFunctor { switch (round_kind) { case rounding_kind::CEIL: return cuda::std::chrono::ceil(dt); case rounding_kind::FLOOR: return cuda::std::chrono::floor(dt); + case rounding_kind::ROUND: return cuda::std::chrono::round(dt); default: cudf_assert(false && "Unsupported rounding kind."); } __builtin_unreachable(); @@ -224,7 +225,7 @@ struct is_leap_year_op { } }; -// Specific function for applying ceil/floor date ops +// Specific function for applying ceil/floor/round date ops struct dispatch_round { template std::enable_if_t(), std::unique_ptr> operator()( @@ -672,6 +673,79 @@ std::unique_ptr floor_nanosecond(column_view const& column, mr); } +std::unique_ptr round_day(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::DAY, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_hour(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::HOUR, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_minute(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::MINUTE, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_second(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::SECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_millisecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::MILLISECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_microsecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::MICROSECOND, + column, + rmm::cuda_stream_default, + mr); +} + +std::unique_ptr round_nanosecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::round_general(detail::rounding_kind::ROUND, + detail::datetime_component::NANOSECOND, + column, + rmm::cuda_stream_default, + mr); +} + std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 4ac24317145..f4910681a38 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -914,4 +914,76 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_nanosecond); } +TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) +{ + using T = TypeParam; + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT + auto stop = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + + auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); + + auto host_val = to_host(input); + thrust::host_vector timestamps = host_val.first; + + std::vector round_day(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_day.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_day = + fixed_width_column_wrapper(round_day.begin(), round_day.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_day(input), expected_day); + + std::vector round_hour(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_hour.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_hour = + fixed_width_column_wrapper(round_hour.begin(), round_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_hour(input), expected_hour); + + std::vector round_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_minute.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + round_minute.begin(), round_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_minute(input), expected_minute); + + std::vector round_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_second.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_second = fixed_width_column_wrapper( + round_second.begin(), round_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_second); + + std::vector round_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_millisecond.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + round_millisecond.begin(), round_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_millisecond); + + std::vector round_microsecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_microsecond.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_microsecond = fixed_width_column_wrapper( + round_microsecond.begin(), round_microsecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_microsecond); + + std::vector round_nanosecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), round_nanosecond.begin(), [](auto i) { + return time_point_cast(round(i)); + }); + auto expected_nanosecond = fixed_width_column_wrapper( + round_nanosecond.begin(), rounded_nanosecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_nanosecond); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index b90ee628332..dc1632f0a72 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -302,6 +302,7 @@ Datetime methods isocalendar ceil floor + round Timedelta properties diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 38ed9fbd769..f75b39ce6ee 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -39,6 +39,19 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] floor_nanosecond( const column_view& column ) except + + cdef unique_ptr[column] round_day(const column_view& column) except + + cdef unique_ptr[column] round_hour(const column_view& column) except + + cdef unique_ptr[column] round_minute(const column_view& column) except + + cdef unique_ptr[column] round_second(const column_view& column) except + + cdef unique_ptr[column] round_millisecond( + const column_view& column + ) except + + cdef unique_ptr[column] round_microsecond( + const column_view& column + ) except + + cdef unique_ptr[column] round_nanosecond( + const column_view& column + ) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3215088c438..3c05a17c268 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -116,6 +116,33 @@ def floor_datetime(Column col, object field): return result +def round_datetime(Column col, object field): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html + if field == "D": + c_result = move(libcudf_datetime.round_day(col_view)) + elif field == "H": + c_result = move(libcudf_datetime.round_hour(col_view)) + elif field == "T" or field == "min": + c_result = move(libcudf_datetime.round_minute(col_view)) + elif field == "S": + c_result = move(libcudf_datetime.round_second(col_view)) + elif field == "L" or field == "ms": + c_result = move(libcudf_datetime.round_millisecond(col_view)) + elif field == "U" or field == "us": + c_result = move(libcudf_datetime.round_microsecond(col_view)) + elif field == "N": + c_result = move(libcudf_datetime.round_nanosecond(col_view)) + else: + raise ValueError(f"Invalid resolution: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) + return result + + def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 7c8837ef45f..08d72f1c6ee 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -228,6 +228,9 @@ def ceil(self, freq: str) -> ColumnBase: def floor(self, freq: str) -> ColumnBase: return libcudf.datetime.floor_datetime(self, freq) + def round(self, freq: str) -> ColumnBase: + return libcudf.datetime.round_datetime(self, freq) + def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): return other diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index cf035ef457d..2ce997ece21 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4664,6 +4664,41 @@ def floor(self, freq): data={self.series.name: out_column}, index=self.series._index ) + def round(self, freq): + """ + Perform round operation on the data to the specified freq. + + Parameters + ---------- + freq : str + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). + See `frequency aliases `__ + for more details on these aliases. + + Returns + ------- + Series + Series with all timestamps rounded up to the specified frequency. + The index is preserved. + + Examples + -------- + >>> import cudf, pandas + >>> rng = pandas.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> cudf.Series(rng).dt.round("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + """ + out_column = self.series._column.round(freq) + + return Series._from_data( + data={self.series.name: out_column}, index=self.series._index + ) + def strftime(self, date_format, *args, **kwargs): """ Convert to Series using specified ``date_format``. diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index a95be4f7932..72601a3da2c 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1819,3 +1819,34 @@ def test_floor(data, time_type, resolution): expect = ps.dt.floor(resolution) got = gs.dt.floor(resolution) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", + [ + ( + [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:10", + "2000-12-31 04:00:05", + "1900-02-28 07:00:06", + "1800-03-14 07:30:20", + "2100-03-14 07:30:20", + "1970-01-01 00:00:09", + "1969-12-31 12:59:10", + ] + ) + ], +) +@pytest.mark.parametrize("time_type", DATETIME_TYPES) +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) +def test_round(data, time_type, resolution): + + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() + + expect = ps.dt.round(resolution) + got = gs.dt.round(resolution) + assert_eq(expect, got) From 789ace3438f946dc2f89b99ef0701b7ad77f6950 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 2 Dec 2021 22:16:11 +0000 Subject: [PATCH 02/17] added datetimeindex.round --- docs/cudf/source/api_docs/index_objects.rst | 1 + python/cudf/cudf/core/index.py | 33 +++++++++++++++++++++ python/cudf/cudf/tests/test_index.py | 13 ++++++++ 3 files changed, 47 insertions(+) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 2a4dd5ff9c8..497f7a413b9 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -282,6 +282,7 @@ Time-specific operations DatetimeIndex.round DatetimeIndex.ceil DatetimeIndex.floor + DatetimeIndex.round Conversion ~~~~~~~~~~ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 8f905ee6d49..88dd6b4b46b 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1933,6 +1933,39 @@ def floor(self, field): return self.__class__._from_data({self.name: out_column}) + def round(self, field): + """ + Perform round operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). + See `frequency aliases `__ + for more details on these aliases. + + Returns + ------- + DatetimeIndex + Index of the same type for a DatetimeIndex + + Examples + -------- + >>> import cudf + >>> import pandas as pd + >>> rng = cudf.Index(pd.date_range('1/1/2018 11:59:00', + ... periods=3, freq='min')) + >>> rng.round('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + """ + out_column = self._values.round(field) + + return self.__class__._from_data({self.name: out_column}) + class TimedeltaIndex(GenericIndex): """ diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index ab211616a02..c7fca2075f5 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2496,3 +2496,16 @@ def test_index_datetime_floor(resolution): cuidx_floor = cuidx.floor(resolution) assert_eq(pidx_floor, cuidx_floor) + + +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) +def test_index_datetime_round(resolution): + cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) + pidx = cuidx.to_pandas() + + pidx_floor = pidx.round(resolution) + cuidx_floor = cuidx.round(resolution) + + assert_eq(pidx_floor, cuidx_floor) From 96d22baa32ccad57ab6cd6500e2937e90fedffef Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 2 Dec 2021 22:30:27 +0000 Subject: [PATCH 03/17] move round impl. to IndexedFrame --- python/cudf/cudf/core/frame.py | 114 ----------------------- python/cudf/cudf/core/indexed_frame.py | 121 ++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 115 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d7a75cb9f40..61ce64e7d6b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1836,120 +1836,6 @@ def _shift(self, offset, fill_value=None): zip(self._column_names, data_columns), self._index ) - def round(self, decimals=0, how="half_even"): - """ - Round to a variable number of decimal places. - - Parameters - ---------- - decimals : int, dict, Series - Number of decimal places to round each column to. This parameter - must be an int for a Series. For a DataFrame, a dict or a Series - are also valid inputs. If an int is given, round each column to the - same number of places. Otherwise dict and Series round to variable - numbers of places. Column names should be in the keys if - `decimals` is a dict-like, or in the index if `decimals` is a - Series. Any columns not included in `decimals` will be left as is. - Elements of `decimals` which are not columns of the input will be - ignored. - how : str, optional - Type of rounding. Can be either "half_even" (default) - of "half_up" rounding. - - Returns - ------- - Series or DataFrame - A Series or DataFrame with the affected columns rounded to the - specified number of decimal places. - - Examples - -------- - **Series** - - >>> s = cudf.Series([0.1, 1.4, 2.9]) - >>> s.round() - 0 0.0 - 1 1.0 - 2 3.0 - dtype: float64 - - **DataFrame** - - >>> df = cudf.DataFrame( - [(.21, .32), (.01, .67), (.66, .03), (.21, .18)], - ... columns=['dogs', 'cats'] - ... ) - >>> df - dogs cats - 0 0.21 0.32 - 1 0.01 0.67 - 2 0.66 0.03 - 3 0.21 0.18 - - By providing an integer each column is rounded to the same number - of decimal places - - >>> df.round(1) - dogs cats - 0 0.2 0.3 - 1 0.0 0.7 - 2 0.7 0.0 - 3 0.2 0.2 - - With a dict, the number of places for specific columns can be - specified with the column names as key and the number of decimal - places as value - - >>> df.round({'dogs': 1, 'cats': 0}) - dogs cats - 0 0.2 0.0 - 1 0.0 1.0 - 2 0.7 0.0 - 3 0.2 0.0 - - Using a Series, the number of places for specific columns can be - specified with the column names as index and the number of - decimal places as value - - >>> decimals = cudf.Series([0, 1], index=['cats', 'dogs']) - >>> df.round(decimals) - dogs cats - 0 0.2 0.0 - 1 0.0 1.0 - 2 0.7 0.0 - 3 0.2 0.0 - """ - - if isinstance(decimals, cudf.Series): - decimals = decimals.to_pandas() - - if isinstance(decimals, pd.Series): - if not decimals.index.is_unique: - raise ValueError("Index of decimals must be unique") - decimals = decimals.to_dict() - elif isinstance(decimals, int): - decimals = {name: decimals for name in self._column_names} - elif not isinstance(decimals, abc.Mapping): - raise TypeError( - "decimals must be an integer, a dict-like or a Series" - ) - - cols = { - name: col.round(decimals[name], how=how) - if (name in decimals and _is_non_decimal_numeric_dtype(col.dtype)) - else col.copy(deep=True) - for name, col in self._data.items() - } - - return self.__class__._from_data( - data=cudf.core.column_accessor.ColumnAccessor( - cols, - multiindex=self._data.multiindex, - level_names=self._data.level_names, - ), - index=self._index, - ) - @annotate("SAMPLE", color="orange", domain="cudf_python") def sample( self, diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2044bad9675..97d6179f846 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4,6 +4,7 @@ from __future__ import annotations import warnings +from collections import abc from typing import Type, TypeVar from uuid import uuid4 @@ -15,7 +16,12 @@ import cudf import cudf._lib as libcudf from cudf._typing import ColumnLike -from cudf.api.types import is_categorical_dtype, is_integer_dtype, is_list_like +from cudf.api.types import ( + _is_non_decimal_numeric_dtype, + is_categorical_dtype, + is_integer_dtype, + is_list_like, +) from cudf.core.column import arange from cudf.core.frame import Frame from cudf.core.index import Index @@ -695,6 +701,119 @@ def _align_to_index( return result + def round(self, decimals=0, how="half_even"): + """ + Round to a variable number of decimal places. + + Parameters + ---------- + decimals : int, dict, Series + Number of decimal places to round each column to. This parameter + must be an int for a Series. For a DataFrame, a dict or a Series + are also valid inputs. If an int is given, round each column to the + same number of places. Otherwise dict and Series round to variable + numbers of places. Column names should be in the keys if + `decimals` is a dict-like, or in the index if `decimals` is a + Series. Any columns not included in `decimals` will be left as is. + Elements of `decimals` which are not columns of the input will be + ignored. + how : str, optional + Type of rounding. Can be either "half_even" (default) + of "half_up" rounding. + + Returns + ------- + Series or DataFrame + A Series or DataFrame with the affected columns rounded to the + specified number of decimal places. + + Examples + -------- + **Series** + + >>> s = cudf.Series([0.1, 1.4, 2.9]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: float64 + + **DataFrame** + + >>> df = cudf.DataFrame( + [(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats'] + ... ) + >>> df + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specified with the column names as index and the number of + decimal places as value + + >>> decimals = cudf.Series([0, 1], index=['cats', 'dogs']) + >>> df.round(decimals) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + """ + if isinstance(decimals, cudf.Series): + decimals = decimals.to_pandas() + + if isinstance(decimals, pd.Series): + if not decimals.index.is_unique: + raise ValueError("Index of decimals must be unique") + decimals = decimals.to_dict() + elif isinstance(decimals, int): + decimals = {name: decimals for name in self._column_names} + elif not isinstance(decimals, abc.Mapping): + raise TypeError( + "decimals must be an integer, a dict-like or a Series" + ) + + cols = { + name: col.round(decimals[name], how=how) + if (name in decimals and _is_non_decimal_numeric_dtype(col.dtype)) + else col.copy(deep=True) + for name, col in self._data.items() + } + + return self.__class__._from_data( + data=cudf.core.column_accessor.ColumnAccessor( + cols, + multiindex=self._data.multiindex, + level_names=self._data.level_names, + ), + index=self._index, + ) + def resample( self, rule, From 00fea68bab7a22fc556753f9b5573e96d5ae57c0 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 3 Dec 2021 18:44:16 +0000 Subject: [PATCH 04/17] fixed typo in test --- cpp/tests/datetime/datetime_ops_test.cpp | 46 ++++++++++++------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index f4910681a38..e6d14facc8d 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -929,60 +929,60 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) auto host_val = to_host(input); thrust::host_vector timestamps = host_val.first; - std::vector round_day(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_day.begin(), [](auto i) { + std::vector rounded_day(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_day.begin(), [](auto i) { return time_point_cast(round(i)); }); - auto expected_day = - fixed_width_column_wrapper(round_day.begin(), round_day.end()); + auto expected_day = fixed_width_column_wrapper(rounded_day.begin(), + rounded_day.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_day(input), expected_day); - std::vector round_hour(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_hour.begin(), [](auto i) { + std::vector rounded_hour(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_hour.begin(), [](auto i) { return time_point_cast(round(i)); }); - auto expected_hour = - fixed_width_column_wrapper(round_hour.begin(), round_hour.end()); + auto expected_hour = fixed_width_column_wrapper( + rounded_hour.begin(), rounded_hour.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_hour(input), expected_hour); - std::vector round_minute(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_minute.begin(), [](auto i) { + std::vector rounded_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_minute.begin(), [](auto i) { return time_point_cast(round(i)); }); auto expected_minute = fixed_width_column_wrapper( - round_minute.begin(), round_minute.end()); + rounded_minute.begin(), rounded_minute.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_minute(input), expected_minute); - std::vector round_second(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_second.begin(), [](auto i) { + std::vector rounded_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_second.begin(), [](auto i) { return time_point_cast(round(i)); }); auto expected_second = fixed_width_column_wrapper( - round_second.begin(), round_second.end()); + rounded_second.begin(), rounded_second.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_second); - std::vector round_millisecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_millisecond.begin(), [](auto i) { + std::vector rounded_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_millisecond.begin(), [](auto i) { return time_point_cast(round(i)); }); auto expected_millisecond = fixed_width_column_wrapper( - round_millisecond.begin(), round_millisecond.end()); + rounded_millisecond.begin(), rounded_millisecond.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_millisecond); - std::vector round_microsecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_microsecond.begin(), [](auto i) { + std::vector rounded_microsecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_microsecond.begin(), [](auto i) { return time_point_cast(round(i)); }); auto expected_microsecond = fixed_width_column_wrapper( - round_microsecond.begin(), round_microsecond.end()); + rounded_microsecond.begin(), rounded_microsecond.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_microsecond); - std::vector round_nanosecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), round_nanosecond.begin(), [](auto i) { + std::vector rounded_nanosecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), rounded_nanosecond.begin(), [](auto i) { return time_point_cast(round(i)); }); auto expected_nanosecond = fixed_width_column_wrapper( - round_nanosecond.begin(), rounded_nanosecond.end()); + rounded_nanosecond.begin(), rounded_nanosecond.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_nanosecond); } From b2c34759a194be8bb47e6c0265d2ae7d382efb7a Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 3 Dec 2021 18:48:20 +0000 Subject: [PATCH 05/17] fixed typo in rst file --- docs/cudf/source/api_docs/index_objects.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 497f7a413b9..2a4dd5ff9c8 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -282,7 +282,6 @@ Time-specific operations DatetimeIndex.round DatetimeIndex.ceil DatetimeIndex.floor - DatetimeIndex.round Conversion ~~~~~~~~~~ From 2ed624e6fa1c3b5e0cf2e0f5fdf5a9e717fb7310 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 3 Dec 2021 18:42:49 -0500 Subject: [PATCH 06/17] added doxygen docstrings Co-authored-by: Bradley Dice --- cpp/src/datetime/datetime_ops.cu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 382cc3653d3..18d347caa84 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -54,7 +54,11 @@ enum class datetime_component { NANOSECOND }; -enum class rounding_kind { CEIL, FLOOR, ROUND }; +enum class rounding_kind { + CEIL, ///< Rounds up to the next integer multiple of the provided frequency + FLOOR, ///< Rounds down to the next integer multiple of the provided frequency + ROUND ///< Rounds to the nearest integer multiple of the provided frequency +}; template struct extract_component_operator { From e4f19d9bdbf9efd00e6696186c17e2b97883a9b9 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 3 Dec 2021 18:44:05 -0500 Subject: [PATCH 07/17] apply suggestions related to typos in test names Co-authored-by: Bradley Dice --- cpp/tests/datetime/datetime_ops_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index e6d14facc8d..bdb3a32bd83 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -975,7 +975,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_microsecond = fixed_width_column_wrapper( rounded_microsecond.begin(), rounded_microsecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_microsecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_microsecond(input), expected_microsecond); std::vector rounded_nanosecond(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_nanosecond.begin(), [](auto i) { @@ -983,7 +983,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) }); auto expected_nanosecond = fixed_width_column_wrapper( rounded_nanosecond.begin(), rounded_nanosecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_nanosecond); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_nanosecond(input), expected_nanosecond); } CUDF_TEST_PROGRAM_MAIN() From 91697ec6e856c2c50011968260e8b8e542ffe426 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Sat, 4 Dec 2021 01:58:58 +0000 Subject: [PATCH 08/17] fixed style --- cpp/include/cudf/datetime.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 489ffb25ffe..17bea935dfd 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -470,7 +470,7 @@ std::unique_ptr floor_nanosecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest day + * @brief Round to the nearest day * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -483,7 +483,7 @@ std::unique_ptr round_day( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest hour + * @brief Round to the nearest hour * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -496,7 +496,7 @@ std::unique_ptr round_hour( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest minute + * @brief Round to the nearest minute * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -509,7 +509,7 @@ std::unique_ptr round_minute( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest second + * @brief Round to the nearest second * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -522,7 +522,7 @@ std::unique_ptr round_second( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest millisecond + * @brief Round to the nearest millisecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -535,7 +535,7 @@ std::unique_ptr round_millisecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest microsecond + * @brief Round to the nearest microsecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -548,7 +548,7 @@ std::unique_ptr round_microsecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round down to the nearest nanosecond + * @brief Round to the nearest nanosecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. From 43c5d3230d3b9e8fa1f385b59f8a9e371c612658 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Sat, 4 Dec 2021 02:11:12 +0000 Subject: [PATCH 09/17] fixed style issue --- cpp/src/datetime/datetime_ops.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 18d347caa84..defa380e2e5 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -55,9 +55,9 @@ enum class datetime_component { }; enum class rounding_kind { - CEIL, ///< Rounds up to the next integer multiple of the provided frequency - FLOOR, ///< Rounds down to the next integer multiple of the provided frequency - ROUND ///< Rounds to the nearest integer multiple of the provided frequency + CEIL, ///< Rounds up to the next integer multiple of the provided frequency + FLOOR, ///< Rounds down to the next integer multiple of the provided frequency + ROUND ///< Rounds to the nearest integer multiple of the provided frequency }; template From d663784fb728d334dc6426149041b4d67d9cb249 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Mon, 6 Dec 2021 15:12:21 +0000 Subject: [PATCH 10/17] addressing PR reviews --- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/series.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 97d6179f846..e5a8ac5a4fc 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -719,7 +719,7 @@ def round(self, decimals=0, how="half_even"): ignored. how : str, optional Type of rounding. Can be either "half_even" (default) - of "half_up" rounding. + or "half_up" rounding. Returns ------- diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 2ce997ece21..4367e80cd40 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4680,17 +4680,18 @@ def round(self, freq): Returns ------- Series - Series with all timestamps rounded up to the specified frequency. + Series with all timestamps rounded to the specified frequency. The index is preserved. Examples -------- - >>> import cudf, pandas - >>> rng = pandas.date_range('1/1/2018 11:59:00', periods=3, freq='min') - >>> cudf.Series(rng).dt.round("H") - 0 2018-01-01 12:00:00 - 1 2018-01-01 12:00:00 - 2 2018-01-01 12:00:00 + >>> import cudf + >>> dt_sr = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> dt_sr.dt.round("T") + 0 2001-01-01 00:05:00 + 1 2001-01-01 00:05:00 + 2 2001-01-01 00:05:00 dtype: datetime64[ns] """ out_column = self.series._column.round(freq) From 3c30fbd9822189a0c8fb1da11dcb0d60f779b9ea Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Mon, 6 Dec 2021 21:41:09 +0000 Subject: [PATCH 11/17] updated function docstring with formatting --- python/cudf/cudf/core/series.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 4367e80cd40..efa0610bbc0 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4686,8 +4686,11 @@ def round(self, freq): Examples -------- >>> import cudf - >>> dt_sr = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", - ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> dt_sr = cudf.Series([ + ... "2001-01-01 00:04:45", + ... "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04", + ... ], dtype="datetime64[ns]") >>> dt_sr.dt.round("T") 0 2001-01-01 00:05:00 1 2001-01-01 00:05:00 From 7c6c135750976a391070c89c4bb3ce0289975eaa Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Tue, 7 Dec 2021 15:09:22 +0000 Subject: [PATCH 12/17] changed field to freq --- python/cudf/cudf/core/index.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88dd6b4b46b..98e099235cf 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1871,13 +1871,13 @@ def _get_dt_field(self, field): def is_boolean(self): return False - def ceil(self, field): + def ceil(self, freq): """ Perform ceil operation on the data to the specified freq. Parameters ---------- - field : str + freq : str One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases Date: Tue, 7 Dec 2021 17:18:53 -0500 Subject: [PATCH 13/17] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/core/indexed_frame.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index e5a8ac5a4fc..66a19b6698d 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -709,10 +709,10 @@ def round(self, decimals=0, how="half_even"): ---------- decimals : int, dict, Series Number of decimal places to round each column to. This parameter - must be an int for a Series. For a DataFrame, a dict or a Series + must be an int for a Series. For a DataFrame, a dict or a Series are also valid inputs. If an int is given, round each column to the - same number of places. Otherwise dict and Series round to variable - numbers of places. Column names should be in the keys if + same number of places. Otherwise dict and Series round to variable + numbers of places. Column names should be in the keys if `decimals` is a dict-like, or in the index if `decimals` is a Series. Any columns not included in `decimals` will be left as is. Elements of `decimals` which are not columns of the input will be @@ -741,8 +741,8 @@ def round(self, decimals=0, how="half_even"): **DataFrame** >>> df = cudf.DataFrame( - [(.21, .32), (.01, .67), (.66, .03), (.21, .18)], - ... columns=['dogs', 'cats'] + ... [(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats'], ... ) >>> df dogs cats @@ -752,7 +752,7 @@ def round(self, decimals=0, how="half_even"): 3 0.21 0.18 By providing an integer each column is rounded to the same number - of decimal places + of decimal places. >>> df.round(1) dogs cats @@ -762,8 +762,8 @@ def round(self, decimals=0, how="half_even"): 3 0.2 0.2 With a dict, the number of places for specific columns can be - specified with the column names as key and the number of decimal - places as value + specified with the column names as keys and the number of decimal + places as values. >>> df.round({'dogs': 1, 'cats': 0}) dogs cats @@ -773,8 +773,8 @@ def round(self, decimals=0, how="half_even"): 3 0.2 0.0 Using a Series, the number of places for specific columns can be - specified with the column names as index and the number of - decimal places as value + specified with the column names as the index and the number of + decimal places as the values. >>> decimals = cudf.Series([0, 1], index=['cats', 'dogs']) >>> df.round(decimals) From b771e807df90675b2c8ae064a997709a05ed54ab Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Tue, 7 Dec 2021 22:30:15 +0000 Subject: [PATCH 14/17] updated docstring example in DatetimeIndex.round --- python/cudf/cudf/core/index.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 98e099235cf..0002aaf38c5 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1949,18 +1949,31 @@ def round(self, freq): Returns ------- DatetimeIndex - Index of the same type for a DatetimeIndex + Index containing rounded datetimes. Examples -------- >>> import cudf - >>> import pandas as pd - >>> rng = cudf.Index(pd.date_range('1/1/2018 11:59:00', - ... periods=3, freq='min')) - >>> rng.round('H') - DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', - '2018-01-01 12:00:00'], - dtype='datetime64[ns]', freq=None) + >>> dt_idx = cudf.Index([ + ... "2001-01-01 00:04:45", + ... "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04", + ... ], dtype="datetime64[ns]") + >>> dt_idx + DatetimeIndex(['2001-01-01 00:04:45', + '2001-01-01 00:05:04', + '2001-01-01 00:04:58'], + dtype='datetime64[ns]', freq=None) + >>> dt_idx.round('H') + DatetimeIndex(['2001-01-01', + '2001-01-01', + '2001-01-01'], + dtype='datetime64[ns]', freq=None) + >>> dt_idx.round('T') + DatetimeIndex(['2001-01-01 00:05:00', + '2001-01-01 00:05:00', + '2001-01-01 00:05:00'], + dtype='datetime64[ns]', freq=None) """ out_column = self._values.round(freq) From 09f71e295386d6cacd52f50f15c72ca4bdd79c7d Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Thu, 9 Dec 2021 10:11:57 -0500 Subject: [PATCH 15/17] changing dtype to auto for timestamps in test Co-authored-by: Vyas Ramasubramani --- cpp/tests/datetime/datetime_ops_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index bdb3a32bd83..c911dc33e86 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -927,7 +927,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); auto host_val = to_host(input); - thrust::host_vector timestamps = host_val.first; + auto timestamps = host_val.first; std::vector rounded_day(timestamps.size()); std::transform(timestamps.begin(), timestamps.end(), rounded_day.begin(), [](auto i) { From 4d0fc98f080b222f7156672e0e10ea77f6ebcbb8 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 10 Dec 2021 02:58:04 +0000 Subject: [PATCH 16/17] style fixes --- cpp/tests/datetime/datetime_ops_test.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index c911dc33e86..62b8425704f 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -14,19 +14,18 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include - #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include #define XXX false // stub for null values @@ -926,7 +925,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime) auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop)); - auto host_val = to_host(input); + auto host_val = to_host(input); auto timestamps = host_val.first; std::vector rounded_day(timestamps.size()); From 8c49f73930df61dc99cfda88c8a0446d054a9455 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 10 Dec 2021 15:06:18 +0000 Subject: [PATCH 17/17] changed rounding_kind to rounding_function --- cpp/src/datetime/datetime_ops.cu | 60 ++++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index defa380e2e5..85653b4f0be 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -54,7 +54,7 @@ enum class datetime_component { NANOSECOND }; -enum class rounding_kind { +enum class rounding_function { CEIL, ///< Rounds up to the next integer multiple of the provided frequency FLOOR, ///< Rounds down to the next integer multiple of the provided frequency ROUND ///< Rounds to the nearest integer multiple of the provided frequency @@ -99,12 +99,12 @@ struct extract_component_operator { template struct RoundFunctor { template - CUDA_DEVICE_CALLABLE auto operator()(rounding_kind round_kind, Timestamp dt) + CUDA_DEVICE_CALLABLE auto operator()(rounding_function round_kind, Timestamp dt) { switch (round_kind) { - case rounding_kind::CEIL: return cuda::std::chrono::ceil(dt); - case rounding_kind::FLOOR: return cuda::std::chrono::floor(dt); - case rounding_kind::ROUND: return cuda::std::chrono::round(dt); + case rounding_function::CEIL: return cuda::std::chrono::ceil(dt); + case rounding_function::FLOOR: return cuda::std::chrono::floor(dt); + case rounding_function::ROUND: return cuda::std::chrono::round(dt); default: cudf_assert(false && "Unsupported rounding kind."); } __builtin_unreachable(); @@ -112,10 +112,10 @@ struct RoundFunctor { }; struct RoundingDispatcher { - rounding_kind round_kind; + rounding_function round_kind; datetime_component component; - RoundingDispatcher(rounding_kind round_kind, datetime_component component) + RoundingDispatcher(rounding_function round_kind, datetime_component component) : round_kind(round_kind), component(component) { } @@ -233,7 +233,7 @@ struct is_leap_year_op { struct dispatch_round { template std::enable_if_t(), std::unique_ptr> operator()( - rounding_kind round_kind, + rounding_function round_kind, datetime_component component, cudf::column_view const& column, rmm::cuda_stream_view stream, @@ -419,7 +419,7 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu } } -std::unique_ptr round_general(rounding_kind round_kind, +std::unique_ptr round_general(rounding_function round_kind, datetime_component component, column_view const& column, rmm::cuda_stream_view stream, @@ -534,7 +534,7 @@ std::unique_ptr extract_quarter(column_view const& column, std::unique_ptr ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::DAY, column, rmm::cuda_stream_default, @@ -544,7 +544,7 @@ std::unique_ptr ceil_day(column_view const& column, rmm::mr::device_memo std::unique_ptr ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::HOUR, column, rmm::cuda_stream_default, @@ -554,7 +554,7 @@ std::unique_ptr ceil_hour(column_view const& column, rmm::mr::device_mem std::unique_ptr ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::MINUTE, column, rmm::cuda_stream_default, @@ -564,7 +564,7 @@ std::unique_ptr ceil_minute(column_view const& column, rmm::mr::device_m std::unique_ptr ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::SECOND, column, rmm::cuda_stream_default, @@ -575,7 +575,7 @@ std::unique_ptr ceil_millisecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::MILLISECOND, column, rmm::cuda_stream_default, @@ -586,7 +586,7 @@ std::unique_ptr ceil_microsecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::MICROSECOND, column, rmm::cuda_stream_default, @@ -597,7 +597,7 @@ std::unique_ptr ceil_nanosecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::CEIL, + return detail::round_general(detail::rounding_function::CEIL, detail::datetime_component::NANOSECOND, column, rmm::cuda_stream_default, @@ -607,7 +607,7 @@ std::unique_ptr ceil_nanosecond(column_view const& column, std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::DAY, column, rmm::cuda_stream_default, @@ -617,7 +617,7 @@ std::unique_ptr floor_day(column_view const& column, rmm::mr::device_mem std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::HOUR, column, rmm::cuda_stream_default, @@ -627,7 +627,7 @@ std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_me std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::MINUTE, column, rmm::cuda_stream_default, @@ -637,7 +637,7 @@ std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_ std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::SECOND, column, rmm::cuda_stream_default, @@ -648,7 +648,7 @@ std::unique_ptr floor_millisecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::MILLISECOND, column, rmm::cuda_stream_default, @@ -659,7 +659,7 @@ std::unique_ptr floor_microsecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::MICROSECOND, column, rmm::cuda_stream_default, @@ -670,7 +670,7 @@ std::unique_ptr floor_nanosecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::FLOOR, + return detail::round_general(detail::rounding_function::FLOOR, detail::datetime_component::NANOSECOND, column, rmm::cuda_stream_default, @@ -680,7 +680,7 @@ std::unique_ptr floor_nanosecond(column_view const& column, std::unique_ptr round_day(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::DAY, column, rmm::cuda_stream_default, @@ -690,7 +690,7 @@ std::unique_ptr round_day(column_view const& column, rmm::mr::device_mem std::unique_ptr round_hour(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::HOUR, column, rmm::cuda_stream_default, @@ -700,7 +700,7 @@ std::unique_ptr round_hour(column_view const& column, rmm::mr::device_me std::unique_ptr round_minute(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::MINUTE, column, rmm::cuda_stream_default, @@ -710,7 +710,7 @@ std::unique_ptr round_minute(column_view const& column, rmm::mr::device_ std::unique_ptr round_second(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::SECOND, column, rmm::cuda_stream_default, @@ -721,7 +721,7 @@ std::unique_ptr round_millisecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::MILLISECOND, column, rmm::cuda_stream_default, @@ -732,7 +732,7 @@ std::unique_ptr round_microsecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::MICROSECOND, column, rmm::cuda_stream_default, @@ -743,7 +743,7 @@ std::unique_ptr round_nanosecond(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::round_general(detail::rounding_kind::ROUND, + return detail::round_general(detail::rounding_function::ROUND, detail::datetime_component::NANOSECOND, column, rmm::cuda_stream_default,