Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement timestamp ceil #8942

Merged
merged 14 commits into from
Aug 25, 2021
85 changes: 85 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,5 +207,90 @@ std::unique_ptr<cudf::column> is_leap_year(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group

/**
* @brief Round up to the nearest day
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_day(
shaneding marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest hour
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest minute
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest second
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest millisecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest microsecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest nanosecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace datetime
} // namespace cudf
8 changes: 8 additions & 0 deletions cpp/include/cudf/wrappers/durations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ namespace cudf {
* @brief Type alias representing an int32_t duration of days.
*/
using duration_D = cuda::std::chrono::duration<int32_t, cuda::std::chrono::days::period>;
/**
* @brief Type alias representing an int32_t duration of hours.
*/
using duration_h = cuda::std::chrono::duration<int32_t, cuda::std::chrono::hours::period>;
/**
* @brief Type alias representing an int32_t duration of minutes.
*/
using duration_m = cuda::std::chrono::duration<int32_t, cuda::std::chrono::minutes::period>;
shaneding marked this conversation as resolved.
Show resolved Hide resolved
/**
* @brief Type alias representing an int64_t duration of seconds.
*/
Expand Down
182 changes: 182 additions & 0 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
#include <cudf/wrappers/durations.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -41,6 +44,9 @@ enum class datetime_component {
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

template <datetime_component Component>
Expand Down Expand Up @@ -77,6 +83,33 @@ struct extract_component_operator {
}
};

template <datetime_component COMPONENT>
struct ceil_timestamp {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const
{
using namespace cuda::std::chrono;
// want to use this with D, H, T (minute), S, L (millisecond), U
switch (COMPONENT) {
case datetime_component::DAY:
return time_point_cast<typename Timestamp::duration>(ceil<duration_D>(ts));
case datetime_component::HOUR:
return time_point_cast<typename Timestamp::duration>(ceil<duration_h>(ts));
case datetime_component::MINUTE:
return time_point_cast<typename Timestamp::duration>(ceil<duration_m>(ts));
case datetime_component::SECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_s>(ts));
case datetime_component::MILLISECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_ms>(ts));
case datetime_component::MICROSECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_us>(ts));
case datetime_component::NANOSECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_ns>(ts));
default: return time_point_cast<typename Timestamp::duration>(ceil<duration_s>(ts));
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
}
}
};

// Number of days until month indexed by leap year and month (0-based index)
static __device__ int16_t const days_until_month[2][13] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years
Expand Down Expand Up @@ -138,6 +171,45 @@ struct is_leap_year_op {
}
};

// Specific function for applying ceil/floor date ops
template <typename TransformFunctor>
struct dispatch_ceil {
template <typename Timestamp>
std::enable_if_t<cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
cudf::column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto size = column.size();
auto output_col_type = data_type{cudf::type_to_id<Timestamp>()};

// Return an empty column if source column is empty
if (size == 0) return make_empty_column(output_col_type);

auto output = make_fixed_width_column(output_col_type,
size,
cudf::detail::copy_bitmask(column, stream, mr),
column.null_count(),
stream,
mr);

thrust::transform(rmm::exec_policy(stream),
column.begin<Timestamp>(),
column.end<Timestamp>(),
output->mutable_view().begin<Timestamp>(),
TransformFunctor{});

return output;
}

template <typename Timestamp, typename... Args>
std::enable_if_t<!cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
Args&&...)
{
CUDF_FAIL("Must be cudf::timestamp");
}
};

// Apply the functor for every element/row in the input column to create the output column
template <typename TransformFunctor, typename OutputColT>
struct launch_functor {
Expand Down Expand Up @@ -290,6 +362,64 @@ std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_colu
return output;
}

std::unique_ptr<column> ceil_general(column_view const& column,
shaneding marked this conversation as resolved.
Show resolved Hide resolved
datetime_component component,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
switch (component) {
case datetime_component::DAY:
return cudf::type_dispatcher(column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::DAY>>{},
shaneding marked this conversation as resolved.
Show resolved Hide resolved
column,
stream,
mr);
case datetime_component::HOUR:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::HOUR>>{},
column,
stream,
mr);
case datetime_component::MINUTE:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::MINUTE>>{},
column,
stream,
mr);
case datetime_component::SECOND:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::SECOND>>{},
column,
stream,
mr);
case datetime_component::MILLISECOND:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::MILLISECOND>>{},
column,
stream,
mr);
case datetime_component::MICROSECOND:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::MICROSECOND>>{},
column,
stream,
mr);
case datetime_component::NANOSECOND:
return cudf::type_dispatcher(
column.type(),
dispatch_ceil<detail::ceil_timestamp<datetime_component::NANOSECOND>>{},
column,
stream,
mr);
default: CUDF_FAIL("Unexpected resolution");
}
}

std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -378,6 +508,58 @@ std::unique_ptr<column> is_leap_year(column_view const& column,

} // namespace detail

std::unique_ptr<column> ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::DAY, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::HOUR, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::MINUTE, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::SECOND, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_millisecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::MILLISECOND, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_microsecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::MICROSECOND, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_nanosecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general(
column, detail::datetime_component::NANOSECOND, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand Down
56 changes: 56 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,62 @@ TEST_F(BasicDatetimeOpsTest, TestLastDayOfMonthWithDate)
verbosity);
}

TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime)
{
using T = TypeParam;
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;

auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT

auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop_));

auto host_val = to_host<T>(input);
thrust::host_vector<T> timestamps = host_val.first;

thrust::host_vector<T> ceiled_day(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<days>(i));
});
auto expected_day =
fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_day.begin(), ceiled_day.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_day(input), expected_day);

thrust::host_vector<T> ceiled_hour(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<hours>(i));
});
auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_hour.begin(),
ceiled_hour.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_hour(input), expected_hour);

std::vector<T> ceiled_minute(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<minutes>(i));
});
auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_minute.begin(), ceiled_minute.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_minute(input), expected_minute);

std::vector<T> ceiled_second(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<seconds>(i));
});
auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_second.begin(), ceiled_second.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_second(input), expected_second);

std::vector<T> ceiled_millisecond(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<milliseconds>(i));
});
auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_millisecond.begin(), ceiled_millisecond.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_millisecond(input), expected_millisecond);
}

TEST_F(BasicDatetimeOpsTest, TestDayOfYearWithDate)
{
using namespace cudf::test;
Expand Down