Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support scalar months in add_calendrical_months, extends API to INT32 support #8991

Merged
merged 16 commits into from
Sep 15, 2021
43 changes: 39 additions & 4 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,52 @@ std::unique_ptr<cudf::column> day_of_year(
* 5/30/20 00:00:01, 7/30/20 14:12:13]
* @endcode

* @param[in] timestamps cudf::column_view of timestamp type.
* @param[in] months cudf::column_view of integer type containing the number of months to add.
* @throw cudf::logic_error if `timestamps` datatype is not a TIMESTAMP or if `months` datatype
* is not INT16 or INT32.
isVoid marked this conversation as resolved.
Show resolved Hide resolved
* @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size.
*
* @param timestamps cudf::column_view of timestamp type.
* @param months cudf::column_view of integer type containing the number of months to add.
*
* @returns cudf::column of timestamp type containing the computed timestamps.
*/
std::unique_ptr<cudf::column> add_calendrical_months(
cudf::column_view const& timestamps,
cudf::column_view const& months,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Adds or subtracts a number of months from the date time type and returns a
* timestamp column that is of the same type as the input `timestamps` column.
*
* For a given row, if the `timestamps` value is null, the output for that row is null.
* A null months scalar would result in an all null column.
* This method preserves the input time and the day where applicable. The date is rounded
* down to the last day of the month for that year, if the new day is invalid for that month.
*
* @code{.pseudo}
isVoid marked this conversation as resolved.
Show resolved Hide resolved
* Example:
* timestamps = [5/31/20 08:00:00, 6/30/20 00:00:00, 7/31/20 13:00:00]
* months = -3
* output is [2/29/20 08:00:00, 3/30/20 00:00:00, 4/30/20 13:00:00]
*
* timestamps = [4/28/20 04:00:00, 5/30/20 01:00:00, 6/30/20 21:00:00]
* months = 1
* output is [5/28/20 04:00:00, 6/30/20 01:00:00, 7/30/20 21:00:00]
* @endcode
*
* @throw cudf::logic_error if `timestamps` datatype is not a TIMESTAMP or if `months` datatype
isVoid marked this conversation as resolved.
Show resolved Hide resolved
isVoid marked this conversation as resolved.
Show resolved Hide resolved
* is not INT16.
* is not INT16 or INT32.
* @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size.
*
* @param timestamps cudf::column_view of timestamp type.
* @param months cudf::scalar of integer type containing the number of months to add.
*
* @return cudf::column of timestamp type containing the computed timestamps.
*/
std::unique_ptr<cudf::column> add_calendrical_months(
cudf::column_view const& timestamps,
cudf::column_view const& months,
cudf::scalar const& months,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down
12 changes: 12 additions & 0 deletions cpp/include/cudf/detail/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ std::unique_ptr<cudf::column> add_calendrical_months(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&,
* rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<cudf::column> add_calendrical_months(
cudf::column_view const& timestamps,
cudf::scalar const& months,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *)
*
Expand Down
109 changes: 75 additions & 34 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@
#include <cudf/column/column_view.hpp>
#include <cudf/datetime.hpp>
#include <cudf/detail/datetime.hpp>
#include <cudf/detail/indexalator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/constant_iterator.h>
isVoid marked this conversation as resolved.
Show resolved Hide resolved
namespace cudf {
namespace datetime {
namespace detail {
Expand Down Expand Up @@ -211,31 +214,35 @@ std::unique_ptr<column> apply_datetime_op(column_view const& column,
}

struct add_calendrical_months_functor {
column_view timestamp_column;
column_view months_column;
mutable_column_view output;

add_calendrical_months_functor(column_view tsc, column_view mc, mutable_column_view out)
: timestamp_column(tsc), months_column(mc), output(out)
{
}

template <typename Element>
typename std::enable_if_t<!cudf::is_timestamp_t<Element>::value, void> operator()(
rmm::cuda_stream_view stream) const
template <typename Element, typename... Args>
typename std::enable_if_t<!cudf::is_timestamp_t<Element>::value, std::unique_ptr<column>>
operator()(Args&&...) const
{
CUDF_FAIL("Cannot extract datetime component from non-timestamp column.");
}

template <typename Timestamp>
typename std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, void> operator()(
rmm::cuda_stream_view stream) const
template <typename Timestamp, typename MonthIterator>
typename std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, std::unique_ptr<column>>
operator()(column_view timestamp_column,
MonthIterator months_begin,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto size = timestamp_column.size();
auto output_col_type = timestamp_column.type();

// Return an empty column if source column is empty
if (size == 0) return make_empty_column(output_col_type);

auto output =
isVoid marked this conversation as resolved.
Show resolved Hide resolved
make_fixed_width_column(output_col_type, size, mask_state::UNALLOCATED, stream, mr);
auto output_mview = output->mutable_view();

thrust::transform(rmm::exec_policy(stream),
timestamp_column.begin<Timestamp>(),
timestamp_column.end<Timestamp>(),
months_column.begin<int16_t>(),
output.begin<Timestamp>(),
months_begin,
output_mview.begin<Timestamp>(),
[] __device__(auto time_val, auto months_val) {
using namespace cuda::std::chrono;
using duration_m = duration<int32_t, months::period>;
Expand All @@ -254,6 +261,7 @@ struct add_calendrical_months_functor {
// Put back the time component to the date
return sys_days{ymd} + (time_val - days_since_epoch);
});
return output;
}
};

Expand All @@ -263,29 +271,54 @@ std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_colu
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp");
CUDF_EXPECTS(months_column.type() == data_type{type_id::INT16},
"Months column type should be INT16");
CUDF_EXPECTS(
months_column.type().id() == type_id::INT16 or months_column.type().id() == type_id::INT32,
"Months column type should be INT16 or INT32.");
CUDF_EXPECTS(timestamp_column.size() == months_column.size(),
"Timestamp and months column should be of the same size");
auto size = timestamp_column.size();
auto output_col_type = timestamp_column.type();

// Return an empty column if source column is empty
if (size == 0) return make_empty_column(output_col_type);

auto output_col_mask =
cudf::detail::bitmask_and(table_view({timestamp_column, months_column}), stream, mr);
auto output = make_fixed_width_column(
output_col_type, size, std::move(output_col_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr);

auto launch = add_calendrical_months_functor{
timestamp_column, months_column, static_cast<mutable_column_view>(*output)};

type_dispatcher(timestamp_column.type(), launch, stream);

auto const months_begin_iter =
cudf::detail::indexalator_factory::make_input_iterator(months_column);
auto output = type_dispatcher(timestamp_column.type(),
add_calendrical_months_functor{},
timestamp_column,
months_begin_iter,
stream,
mr);

auto output_null_mask =
cudf::detail::bitmask_and(table_view{{timestamp_column, months_column}}, stream, mr);
output->set_null_mask(std::move(output_null_mask));
return output;
}

std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_column,
scalar const& months,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(is_timestamp(timestamp_column.type()), "Column type should be timestamp");
CUDF_EXPECTS(months.type().id() == type_id::INT16 or months.type().id() == type_id::INT32,
"Months type should be INT16 or INT32");

if (months.is_valid(stream)) {
auto const months_begin_iter = thrust::make_permutation_iterator(
cudf::detail::indexalator_factory::make_input_iterator(months),
thrust::make_constant_iterator(0));
auto output = type_dispatcher(timestamp_column.type(),
add_calendrical_months_functor{},
timestamp_column,
months_begin_iter,
stream,
mr);
output->set_null_mask(cudf::detail::copy_bitmask(timestamp_column, stream, mr));
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
return output;
} else {
return make_timestamp_column(
timestamp_column.type(), timestamp_column.size(), mask_state::ALL_NULL, stream, mr);
}
}

std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -456,6 +489,14 @@ std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& ti
timestamp_column, months_column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& timestamp_column,
cudf::scalar const& months,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::add_calendrical_months(timestamp_column, months, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand Down
Loading