Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement extract_datetime_component in libcudf/pylibcudf #16776

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@ namespace datetime {
* @file
*/

/**
* @brief Types of datetime components that may be extracted.
*/
enum class datetime_component : uint8_t {
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
INVALID = 0,
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

/**
* @brief Extracts year from any datetime type and returns an int16_t
* cudf::column.
Expand Down Expand Up @@ -164,6 +181,7 @@ std::unique_ptr<cudf::column> extract_second(
* @returns cudf::column of the extracted int16_t milliseconds
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/

std::unique_ptr<cudf::column> extract_millisecond_fraction(
wence- marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_view const& column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand Down Expand Up @@ -207,6 +225,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts the specified datetime component from any datetime type and
* returns an int16_t cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param component The datetime component to extract
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t datetime component
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_datetime_component(
cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
/**
* @addtogroup datetime_compute
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/detail/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
* rmm::cuda_stream_view, rmm::device_async_resource_ref)
*
*/
std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
* rmm::device_async_resource_ref)
Expand Down
88 changes: 45 additions & 43 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,6 @@
namespace cudf {
namespace datetime {
namespace detail {
enum class datetime_component {
INVALID = 0,
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

enum class rounding_function {
CEIL, ///< Rounds up to the next integer multiple of the provided frequency
Expand Down Expand Up @@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::YEAR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
}

std::unique_ptr<column> extract_month(column_view const& column,
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MONTH>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
}

std::unique_ptr<column> extract_day(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::DAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
}

std::unique_ptr<column> extract_weekday(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
}

std::unique_ptr<column> extract_hour(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::HOUR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
}

std::unique_ptr<column> extract_minute(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MINUTE>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
}

std::unique_ptr<column> extract_second(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::SECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
}

std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
}

std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
}

std::unique_ptr<column> last_day_of_month(column_view const& column,
Expand Down Expand Up @@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
#define extract(field) \
case field: \
return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
column, stream, mr)

switch (component) {
extract(datetime_component::YEAR);
extract(datetime_component::MONTH);
extract(datetime_component::DAY);
extract(datetime_component::WEEKDAY);
extract(datetime_component::HOUR);
extract(datetime_component::MINUTE);
extract(datetime_component::SECOND);
extract(datetime_component::MILLISECOND);
extract(datetime_component::MICROSECOND);
extract(datetime_component::NANOSECOND);
default: CUDF_FAIL("Unsupported datetime component.");
}
#undef extract
}

} // namespace detail

std::unique_ptr<column> ceil_datetimes(column_view const& column,
Expand Down Expand Up @@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
return detail::extract_second(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::extract_datetime_component(column, component, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
Expand Down
130 changes: 130 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
fixed_width_column_wrapper<int16_t>{766, 424, 623});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{12, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{31, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{14, 12, 7});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{14, 12, 7});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{23, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{1, 0, 32});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{1, 0, 32});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{59, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{12, 0, 12});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{12, 0, 12});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{59, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{762, 0, 929});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{976, 23, 987});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{675, 432, 234});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{766, 424, 623});
}

template <typename T>
Expand Down
Loading
Loading