diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 52b21c98f75..b2d0e006d39 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -177,17 +177,52 @@ std::unique_ptr day_of_year( * 5/30/20 00:00:01, 7/30/20 14:12:13] * @endcode - * @param[in] timestamps cudf::column_view of timestamp type. - * @param[in] months cudf::column_view of integer type containing the number of months to add. + * @throw cudf::logic_error if `timestamps` datatype is not a TIMESTAMP or if `months` datatype + * is not INT16 or INT32. + * @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size. + * + * @param timestamps cudf::column_view of timestamp type. + * @param months cudf::column_view of integer type containing the number of months to add. * * @returns cudf::column of timestamp type containing the computed timestamps. + */ +std::unique_ptr add_calendrical_months( + cudf::column_view const& timestamps, + cudf::column_view const& months, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Adds or subtracts a number of months from the date time type and returns a + * timestamp column that is of the same type as the input `timestamps` column. + * + * For a given row, if the `timestamps` value is null, the output for that row is null. + * A null months scalar would result in an all null column. + * This method preserves the input time and the day where applicable. The date is rounded + * down to the last day of the month for that year, if the new day is invalid for that month. + * + * @code{.pseudo} + * Example: + * timestamps = [5/31/20 08:00:00, 6/30/20 00:00:00, 7/31/20 13:00:00] + * months = -3 + * output is [2/29/20 08:00:00, 3/30/20 00:00:00, 4/30/20 13:00:00] + * + * timestamps = [4/28/20 04:00:00, 5/30/20 01:00:00, 6/30/20 21:00:00] + * months = 1 + * output is [5/28/20 04:00:00, 6/30/20 01:00:00, 7/30/20 21:00:00] + * @endcode + * * @throw cudf::logic_error if `timestamps` datatype is not a TIMESTAMP or if `months` datatype - * is not INT16. + * is not INT16 or INT32. * @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size. + * + * @param timestamps cudf::column_view of timestamp type. + * @param months cudf::scalar of integer type containing the number of months to add. + * + * @return cudf::column of timestamp type containing the computed timestamps. */ std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, - cudf::column_view const& months, + cudf::scalar const& months, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 3a4459d9f95..650e28bc728 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -125,6 +125,18 @@ std::unique_ptr add_calendrical_months( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&, + * rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr add_calendrical_months( + cudf::column_view const& timestamps, + cudf::scalar const& months, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *) * diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index df013be717f..6e892b3e461 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -20,8 +20,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -32,6 +34,8 @@ #include #include +#include + namespace cudf { namespace datetime { namespace detail { @@ -285,31 +289,38 @@ std::unique_ptr apply_datetime_op(column_view const& column, } struct add_calendrical_months_functor { - column_view timestamp_column; - column_view months_column; - mutable_column_view output; - - add_calendrical_months_functor(column_view tsc, column_view mc, mutable_column_view out) - : timestamp_column(tsc), months_column(mc), output(out) - { - } - - template - typename std::enable_if_t::value, void> operator()( - rmm::cuda_stream_view stream) const + template + typename std::enable_if_t::value, std::unique_ptr> + operator()(Args&&...) const { CUDF_FAIL("Cannot extract datetime component from non-timestamp column."); } - template - typename std::enable_if_t::value, void> operator()( - rmm::cuda_stream_view stream) const + template + typename std::enable_if_t::value, std::unique_ptr> + operator()(column_view timestamp_column, + MonthIterator months_begin, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { + auto size = timestamp_column.size(); + auto output_col_type = timestamp_column.type(); + + // Return an empty column if source column is empty + if (size == 0) return make_empty_column(output_col_type); + + // The nullmask of `output` cannot be determined without information from + // the `months` type (column or scalar). Therefore, it is initialized as + // `UNALLOCATED` and assigned at a later stage. + auto output = + make_fixed_width_column(output_col_type, size, mask_state::UNALLOCATED, stream, mr); + auto output_mview = output->mutable_view(); + thrust::transform(rmm::exec_policy(stream), timestamp_column.begin(), timestamp_column.end(), - months_column.begin(), - output.begin(), + months_begin, + output_mview.begin(), [] __device__(auto time_val, auto months_val) { using namespace cuda::std::chrono; using duration_m = duration; @@ -328,6 +339,7 @@ struct add_calendrical_months_functor { // Put back the time component to the date return sys_days{ymd} + (time_val - days_since_epoch); }); + return output; } }; @@ -337,29 +349,54 @@ std::unique_ptr add_calendrical_months(column_view const& timestamp_colu rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp"); - CUDF_EXPECTS(months_column.type() == data_type{type_id::INT16}, - "Months column type should be INT16"); + CUDF_EXPECTS( + months_column.type().id() == type_id::INT16 or months_column.type().id() == type_id::INT32, + "Months column type should be INT16 or INT32."); CUDF_EXPECTS(timestamp_column.size() == months_column.size(), "Timestamp and months column should be of the same size"); - auto size = timestamp_column.size(); - auto output_col_type = timestamp_column.type(); - - // Return an empty column if source column is empty - if (size == 0) return make_empty_column(output_col_type); - - auto output_col_mask = - cudf::detail::bitmask_and(table_view({timestamp_column, months_column}), stream, mr); - auto output = make_fixed_width_column( - output_col_type, size, std::move(output_col_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); - - auto launch = add_calendrical_months_functor{ - timestamp_column, months_column, static_cast(*output)}; - - type_dispatcher(timestamp_column.type(), launch, stream); + auto const months_begin_iter = + cudf::detail::indexalator_factory::make_input_iterator(months_column); + auto output = type_dispatcher(timestamp_column.type(), + add_calendrical_months_functor{}, + timestamp_column, + months_begin_iter, + stream, + mr); + + auto output_null_mask = + cudf::detail::bitmask_and(table_view{{timestamp_column, months_column}}, stream, mr); + output->set_null_mask(std::move(output_null_mask)); return output; } +std::unique_ptr add_calendrical_months(column_view const& timestamp_column, + scalar const& months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp"); + CUDF_EXPECTS(months.type().id() == type_id::INT16 or months.type().id() == type_id::INT32, + "Months type should be INT16 or INT32"); + + if (months.is_valid(stream)) { + auto const months_begin_iter = thrust::make_permutation_iterator( + cudf::detail::indexalator_factory::make_input_iterator(months), + thrust::make_constant_iterator(0)); + auto output = type_dispatcher(timestamp_column.type(), + add_calendrical_months_functor{}, + timestamp_column, + months_begin_iter, + stream, + mr); + output->set_null_mask(cudf::detail::copy_bitmask(timestamp_column, stream, mr)); + return output; + } else { + return make_timestamp_column( + timestamp_column.type(), timestamp_column.size(), mask_state::ALL_NULL, stream, mr); + } +} + template std::unique_ptr ceil_general(column_view const& column, rmm::cuda_stream_view stream, @@ -591,6 +628,14 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti timestamp_column, months_column, rmm::cuda_stream_default, mr); } +std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, + cudf::scalar const& months, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::add_calendrical_months(timestamp_column, months, rmm::cuda_stream_default, mr); +} + std::unique_ptr is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 4a1c0512643..1d3e87279e5 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -471,10 +472,41 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithInvalidColType) 662688000L // 1991-01-01 00:00:00 GMT }; - // Months has to be an INT16 type - auto months = cudf::test::fixed_width_column_wrapper{-2}; + // Months has to be an INT16 or INT32 type + EXPECT_NO_THROW( + add_calendrical_months(timestamps_s, cudf::test::fixed_width_column_wrapper{-2})); + EXPECT_NO_THROW( + add_calendrical_months(timestamps_s, cudf::test::fixed_width_column_wrapper{-2})); + + EXPECT_THROW( + add_calendrical_months(timestamps_s, cudf::test::fixed_width_column_wrapper{-2}), + cudf::logic_error); + EXPECT_THROW( + add_calendrical_months(timestamps_s, cudf::test::fixed_width_column_wrapper{-2}), + cudf::logic_error); +} - EXPECT_THROW(add_calendrical_months(timestamps_s, months), cudf::logic_error); +TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithInvalidScalarType) +{ + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = fixed_width_column_wrapper{ + 662688000L // 1991-01-01 00:00:00 GMT + }; + + // Months has to be an INT16 or INT32 type + EXPECT_NO_THROW(add_calendrical_months(timestamps_s, *cudf::make_fixed_width_scalar(5))); + EXPECT_NO_THROW( + add_calendrical_months(timestamps_s, *cudf::make_fixed_width_scalar(-3))); + + EXPECT_THROW(add_calendrical_months(timestamps_s, *cudf::make_fixed_width_scalar(-3)), + cudf::logic_error); + EXPECT_THROW(add_calendrical_months(timestamps_s, *cudf::make_fixed_width_scalar(-3)), + cudf::logic_error); } TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithIncorrectColSizes) @@ -496,7 +528,15 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithIncorrectColSizes) EXPECT_THROW(add_calendrical_months(timestamps_s, months), cudf::logic_error); } -TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSeconds) +using ValidMonthIntegerType = cudf::test::Types; + +template +struct TypedAddMonthsTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(TypedAddMonthsTest, ValidMonthIntegerType); + +TYPED_TEST(TypedAddMonthsTest, TestAddMonthsWithSeconds) { using namespace cudf::test; using namespace cudf::datetime; @@ -520,11 +560,10 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSeconds) -131536728L // 1965-10-31 14:01:12 GMT }; - auto months = - cudf::test::fixed_width_column_wrapper{-2, 6, -1, 1, -4, 8, -2, 10, 4, -20, 1, 3}; + auto const months = + cudf::test::fixed_width_column_wrapper{-2, 6, -1, 1, -4, 8, -2, 10, 4, -20, 1, 3}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *add_calendrical_months(timestamps_s, months), + auto const expected = cudf::test::fixed_width_column_wrapper{ 657417600L, // 1990-11-01 00:00:00 GMT 965221201L, // 2000-08-02 13:00:01 GMT @@ -538,11 +577,58 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSeconds) -184254590L, // 1964-02-29 10:10:10 GMT -128952000L, // 1965-11-30 12:00:00 GMT -123587928L // 1966-01-31 14:01:12 GMT + }; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *add_calendrical_months(timestamps_s, months), expected, verbosity); +} + +TYPED_TEST(TypedAddMonthsTest, TestAddScalarMonthsWithSeconds) +{ + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = fixed_width_column_wrapper{ + 662688000L, // 1991-01-01 00:00:00 GMT + 949496401L, // 2000-02-02 13:00:01 GMT - leap year + 1056964201L, // 2003-06-30 09:10:01 GMT - last day of month + 0L, // This is the UNIX epoch - 1970-01-01 + -131536728L // 1965-10-31 14:01:12 GMT - last day of month + }; + + // add + auto const months1 = cudf::make_fixed_width_scalar(11); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *add_calendrical_months(timestamps_s, *months1), + fixed_width_column_wrapper{ + 691545600L, // 1991-12-01 00:00:00 GMT + 978440401L, // 2001-01-02 13:00:01 GMT + 1085908201L, // 2004-05-30 09:10:01 GMT + 28857600L, // 1970-12-01 00:00:00 GMT + -102679128L, // 1966-09-30 14:01:12 GMT + }, + verbosity); + + // subtract + auto const months2 = cudf::make_fixed_width_scalar(-20); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *add_calendrical_months(timestamps_s, *months2), + fixed_width_column_wrapper{ + 609984000L, // 1989-05-01 00:00:00 GMT + 896792401L, // 1998-06-02 13:00:01 GMT + 1004433001L, // 2001-10-30 09:10:01 GMT + -52704000L, // 1968-05-01 00:00:00 GMT + -184240728L, // 1964-02-29 14:01:12 GMT - lands on a leap year february }, verbosity); } -TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSecondsAndNullValues) +TYPED_TEST(TypedAddMonthsTest, TestAddMonthsWithSecondsAndNullValues) { using namespace cudf::test; using namespace cudf::datetime; @@ -568,7 +654,7 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSecondsAndNullValues) }, {true, false, true, false, true, false, true, false, true, true, true, true}}; - auto months = cudf::test::fixed_width_column_wrapper{ + auto const months = cudf::test::fixed_width_column_wrapper{ {-2, 6, -1, 1, -4, 8, -2, 10, 4, -20, 1, 3}, {false, true, true, false, true, true, true, true, true, true, true, true}}; @@ -593,6 +679,51 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSecondsAndNullValues) verbosity); } +TYPED_TEST(TypedAddMonthsTest, TestAddScalarMonthsWithSecondsWithNulls) +{ + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = fixed_width_column_wrapper( + { + 662688000L, // 1991-01-01 00:00:00 GMT + 0L, // NULL + 1056964201L, // 2003-06-30 09:10:01 GMT - last day of month + 0L, // This is the UNIX epoch - 1970-01-01 + 0L // NULL + }, + iterators::nulls_at({1, 4})); + + // valid scalar + auto const months1 = cudf::make_fixed_width_scalar(11); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *add_calendrical_months(timestamps_s, *months1), + fixed_width_column_wrapper( + { + 691545600L, // 1991-12-01 00:00:00 GMT + 0L, // NULL + 1085908201L, // 2004-05-30 09:10:01 GMT + 28857600L, // 1970-12-01 00:00:00 GMT + 0L, // NULL + }, + iterators::nulls_at({1, 4})), + verbosity); + + // null scalar + auto const months2 = + cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_to_id()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *add_calendrical_months(timestamps_s, *months2), + fixed_width_column_wrapper({0L, 0L, 0L, 0L, 0L}, + iterators::all_nulls()), + verbosity); +} + TEST_F(BasicDatetimeOpsTest, TestIsLeapYear) { using namespace cudf::test;