Skip to content

Commit

Permalink
Implement timestamp ceil (#8942)
Browse files Browse the repository at this point in the history
Partly addresses #8682 

This adds a `ceil` function for timestamp columns in libcudf. It is applied on fixed resolutions only.

Authors:
  - https://github.com/shaneding

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #8942
  • Loading branch information
shaneding authored Aug 25, 2021
1 parent f0fa255 commit 2a566dd
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 0 deletions.
85 changes: 85 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,5 +237,90 @@ std::unique_ptr<cudf::column> extract_quarter(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group

/**
* @brief Round up to the nearest day
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest hour
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest minute
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest second
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest millisecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest microsecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest nanosecond
*
* @param cudf::column_view of the input datetime values
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace datetime
} // namespace cudf
8 changes: 8 additions & 0 deletions cpp/include/cudf/wrappers/durations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ namespace cudf {
* @brief Type alias representing an int32_t duration of days.
*/
using duration_D = cuda::std::chrono::duration<int32_t, cuda::std::chrono::days::period>;
/**
* @brief Type alias representing an int32_t duration of hours.
*/
using duration_h = cuda::std::chrono::duration<int32_t, cuda::std::chrono::hours::period>;
/**
* @brief Type alias representing an int32_t duration of minutes.
*/
using duration_m = cuda::std::chrono::duration<int32_t, cuda::std::chrono::minutes::period>;
/**
* @brief Type alias representing an int64_t duration of seconds.
*/
Expand Down
135 changes: 135 additions & 0 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
#include <cudf/wrappers/durations.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -41,6 +44,9 @@ enum class datetime_component {
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

template <datetime_component Component>
Expand Down Expand Up @@ -77,6 +83,35 @@ struct extract_component_operator {
}
};

template <datetime_component COMPONENT>
struct ceil_timestamp {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const
{
using namespace cuda::std::chrono;
// want to use this with D, H, T (minute), S, L (millisecond), U
switch (COMPONENT) {
case datetime_component::DAY:
return time_point_cast<typename Timestamp::duration>(ceil<duration_D>(ts));
case datetime_component::HOUR:
return time_point_cast<typename Timestamp::duration>(ceil<duration_h>(ts));
case datetime_component::MINUTE:
return time_point_cast<typename Timestamp::duration>(ceil<duration_m>(ts));
case datetime_component::SECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_s>(ts));
case datetime_component::MILLISECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_ms>(ts));
case datetime_component::MICROSECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_us>(ts));
case datetime_component::NANOSECOND:
return time_point_cast<typename Timestamp::duration>(ceil<duration_ns>(ts));
default: cudf_assert(false && "Unexpected resolution");
}

return {};
}
};

// Number of days until month indexed by leap year and month (0-based index)
static __device__ int16_t const days_until_month[2][13] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years
Expand Down Expand Up @@ -155,6 +190,45 @@ struct is_leap_year_op {
}
};

// Specific function for applying ceil/floor date ops
template <typename TransformFunctor>
struct dispatch_ceil {
template <typename Timestamp>
std::enable_if_t<cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
cudf::column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto size = column.size();
auto output_col_type = data_type{cudf::type_to_id<Timestamp>()};

// Return an empty column if source column is empty
if (size == 0) return make_empty_column(output_col_type);

auto output = make_fixed_width_column(output_col_type,
size,
cudf::detail::copy_bitmask(column, stream, mr),
column.null_count(),
stream,
mr);

thrust::transform(rmm::exec_policy(stream),
column.begin<Timestamp>(),
column.end<Timestamp>(),
output->mutable_view().begin<Timestamp>(),
TransformFunctor{});

return output;
}

template <typename Timestamp, typename... Args>
std::enable_if_t<!cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
Args&&...)
{
CUDF_FAIL("Must be cudf::timestamp");
}
};

// Apply the functor for every element/row in the input column to create the output column
template <typename TransformFunctor, typename OutputColT>
struct launch_functor {
Expand Down Expand Up @@ -286,6 +360,15 @@ std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_colu
return output;
}

template <datetime_component Component>
std::unique_ptr<column> ceil_general(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(
column.type(), dispatch_ceil<detail::ceil_timestamp<Component>>{}, column, stream, mr);
}

std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -388,6 +471,58 @@ std::unique_ptr<column> extract_quarter(column_view const& column,

} // namespace detail

std::unique_ptr<column> ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::DAY>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::HOUR>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::MINUTE>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::SECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_millisecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::MILLISECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_microsecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::MICROSECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> ceil_nanosecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ceil_general<detail::datetime_component::NANOSECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand Down
56 changes: 56 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,62 @@ TEST_F(BasicDatetimeOpsTest, TestLastDayOfMonthWithDate)
verbosity);
}

TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime)
{
using T = TypeParam;
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;

auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT

auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop_));

auto host_val = to_host<T>(input);
thrust::host_vector<T> timestamps = host_val.first;

thrust::host_vector<T> ceiled_day(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<days>(i));
});
auto expected_day =
fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_day.begin(), ceiled_day.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_day(input), expected_day);

thrust::host_vector<T> ceiled_hour(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<hours>(i));
});
auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_hour.begin(),
ceiled_hour.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_hour(input), expected_hour);

std::vector<T> ceiled_minute(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<minutes>(i));
});
auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_minute.begin(), ceiled_minute.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_minute(input), expected_minute);

std::vector<T> ceiled_second(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<seconds>(i));
});
auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_second.begin(), ceiled_second.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_second(input), expected_second);

std::vector<T> ceiled_millisecond(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) {
return time_point_cast<typename T::duration>(ceil<milliseconds>(i));
});
auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
ceiled_millisecond.begin(), ceiled_millisecond.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_millisecond(input), expected_millisecond);
}

TEST_F(BasicDatetimeOpsTest, TestDayOfYearWithDate)
{
using namespace cudf::test;
Expand Down

0 comments on commit 2a566dd

Please sign in to comment.