Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose days_in_month function in libcudf and add python bindings #8892

Merged
merged 12 commits into from
Aug 6, 2021
15 changes: 15 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,21 @@ std::unique_ptr<cudf::column> is_leap_year(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Extract the number of days in the month
isVoid marked this conversation as resolved.
Show resolved Hide resolved
*
* output[i] contains the number of days in the month of date `column[i]`
* output[i] is null if `column[i]` is null
*
* @param[in] cudf::column_view of the input datetime values
*
* @returns cudf::column of datatype INT16 of days in month of the corresponding date
* @throw cudf::logic_error if input column datatype is not a TIMESTAMP
isVoid marked this conversation as resolved.
Show resolved Hide resolved
*/
std::unique_ptr<cudf::column> days_in_month(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns the quarter of the date
*
Expand Down
36 changes: 34 additions & 2 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,15 @@ static __device__ int16_t const days_until_month[2][13] = {
{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} // For leap years
};

// Number of days in month
static __device__ uint8_t const days_in_month_table[2][13] = {
{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, // For non leap years
{0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} // For leap years
};

CUDA_DEVICE_CALLABLE uint8_t days_in_month(cuda::std::chrono::month mon, bool is_leap_year)
{
return days_until_month[is_leap_year][unsigned{mon}] -
days_until_month[is_leap_year][unsigned{mon} - 1];
return days_in_month_table[is_leap_year][unsigned{mon}];
}

// Round up the date to the last day of the month and return the
Expand Down Expand Up @@ -144,6 +149,7 @@ struct extract_quarter_op {
}
};

// Returns true if the year is a leap year
struct is_leap_year_op {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE bool operator()(Timestamp const ts) const
Expand All @@ -155,6 +161,18 @@ struct is_leap_year_op {
}
};

// Extract the number of days of the month
struct days_in_month_op {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE int16_t operator()(Timestamp const ts) const
{
using namespace cuda::std::chrono;
auto const days_since_epoch = floor<days>(ts);
auto const date = year_month_day(days_since_epoch);
return static_cast<int16_t>(days_in_month(date.month(), date.year().is_leap()));
isVoid marked this conversation as resolved.
Show resolved Hide resolved
}
};

// Apply the functor for every element/row in the input column to create the output column
template <typename TransformFunctor, typename OutputColT>
struct launch_functor {
Expand Down Expand Up @@ -393,6 +411,13 @@ std::unique_ptr<column> is_leap_year(column_view const& column,
return apply_datetime_op<is_leap_year_op, type_id::BOOL8>(column, stream, mr);
}

std::unique_ptr<column> days_in_month(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return apply_datetime_op<days_in_month_op, type_id::INT16>(column, stream, mr);
}

std::unique_ptr<column> extract_quarter(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -476,6 +501,13 @@ std::unique_ptr<column> is_leap_year(column_view const& column, rmm::mr::device_
return detail::is_leap_year(column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> days_in_month(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::days_in_month(column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> extract_quarter(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
Expand Down
80 changes: 80 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,86 @@ TEST_F(BasicDatetimeOpsTest, TestIsLeapYear)
{true, false, true, true, true, true, true, true, false, true, true, false}});
}

TEST_F(BasicDatetimeOpsTest, TestDaysInMonths)

{
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;

auto start = time_point_ms(milliseconds(-2500000000000)); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop = time_point_ms(milliseconds(2500000000000)); // Mon, 22 Mar 2049 04:26:40 GMT
auto step = months{1}; // std::ratio<2629746>>, 1/12 of avg gregorian year

auto count = static_cast<std::size_t>((stop - start) / step);

auto date_vector = thrust::host_vector<long>(count, start.time_since_epoch().count());
auto month_iter = thrust::make_counting_iterator<int16_t>(0);

fixed_width_column_wrapper<cudf::timestamp_ms> base_timestamps(date_vector.begin(),
date_vector.end());
fixed_width_column_wrapper<int16_t> month_offset(month_iter, month_iter + count);

auto input = cudf::datetime::add_calendrical_months(base_timestamps, month_offset);

auto got = cudf::datetime::days_in_month(*input);

// Extract last day of the month on host
auto [host_input, _] = cudf::test::to_host<int64_t>(*input);
auto host_expect = thrust::host_vector<int16_t>(host_input.size());

std::transform(host_input.begin(), host_input.end(), host_expect.begin(), [](auto rep) {
auto tp = time_point_ms(milliseconds{rep});
auto dp = floor<days>(tp);
year_month_day ymd{dp};
year_month_day_last ymdl{ymd.year() / ymd.month() / last};
return static_cast<int16_t>(unsigned(ymdl.day()));
});
fixed_width_column_wrapper<int16_t> expect(host_expect.begin(), host_expect.end());

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got, expect);
}

TEST_F(BasicDatetimeOpsTest, TestDaysInMonthsMasked)
{
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;

auto start = time_point_ms(milliseconds(-2500000000000)); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop = time_point_ms(milliseconds(2500000000000)); // Mon, 22 Mar 2049 04:26:40 GMT
auto step = months{1}; // std::ratio<2629746>>, 1/12 of avg gregorian year

auto count = static_cast<std::size_t>((stop - start) / step);

auto date_vector = thrust::host_vector<long>(count, start.time_since_epoch().count());
auto month_iter = thrust::make_counting_iterator<int16_t>(0);
auto mask = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return i % 2 == 0; });

fixed_width_column_wrapper<cudf::timestamp_ms> base_timestamps(
date_vector.begin(), date_vector.end(), mask);
fixed_width_column_wrapper<int16_t> month_offset(month_iter, month_iter + count);

auto input = cudf::datetime::add_calendrical_months(base_timestamps, month_offset);

auto got = cudf::datetime::days_in_month(*input);

// Extract last day of the month on host
auto [host_input, _] = cudf::test::to_host<int64_t>(*input);
auto host_expect = thrust::host_vector<int16_t>(host_input.size());

std::transform(host_input.begin(), host_input.end(), host_expect.begin(), [](auto rep) {
auto tp = time_point_ms(milliseconds{rep});
auto dp = floor<days>(tp);
year_month_day ymd{dp};
year_month_day_last ymdl{ymd.year() / ymd.month() / last};
return static_cast<int16_t>(unsigned(ymdl.day()));
});
fixed_width_column_wrapper<int16_t> expect(host_expect.begin(), host_expect.end(), mask);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got, expect);
}

TEST_F(BasicDatetimeOpsTest, TestQuarter)
{
using namespace cudf::test;
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
) except +
cdef unique_ptr[column] day_of_year(const column_view& column) except +
cdef unique_ptr[column] is_leap_year(const column_view& column) except +
cdef unique_ptr[column] days_in_month(const column_view& column) except +
14 changes: 14 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,24 @@ def extract_datetime_component(Column col, object field):


def is_leap_year(Column col):
"""Returns a boolean indicator whether the year of the date is a leap year
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.is_leap_year(col_view))

return Column.from_unique_ptr(move(c_result))


def days_in_month(Column col):
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
"""Extracts the number of days in the month of the date
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.days_in_month(col_view))

return Column.from_unique_ptr(move(c_result))
87 changes: 87 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6438,6 +6438,42 @@ def is_leap_year(self):
-------
Series
Booleans indicating if dates belong to a leap year.

Example
-------
>>> import pandas as pd, cudf
>>> s = cudf.Series(
... pd.date_range(start='2000-02-01', end='2013-02-01', freq='1Y'))
>>> s
0 2000-12-31
1 2001-12-31
2 2002-12-31
3 2003-12-31
4 2004-12-31
5 2005-12-31
6 2006-12-31
7 2007-12-31
8 2008-12-31
9 2009-12-31
10 2010-12-31
11 2011-12-31
12 2012-12-31
dtype: datetime64[ns]
>>> s.dt.is_leap_year
0 True
1 False
2 False
3 False
4 True
5 False
6 False
7 False
8 True
9 False
10 False
11 False
12 True
dtype: bool
"""
res = libcudf.datetime.is_leap_year(self.series._column).fillna(False)
return Series._from_data(
Expand All @@ -6458,6 +6494,57 @@ def is_month_start(self):
"""
return (self.day == 1).fillna(False)

@property
def days_in_month(self):
"""
Get the total number of days in the month that the date falls on.

Returns
-------
Series
Integers representing the number of days in month

Example
-------
>>> import pandas as pd, cudf
>>> s = cudf.Series(
... pd.date_range(start='2000-08-01', end='2001-08-01', freq='1M'))
>>> s
0 2000-08-31
1 2000-09-30
2 2000-10-31
3 2000-11-30
4 2000-12-31
5 2001-01-31
6 2001-02-28
7 2001-03-31
8 2001-04-30
9 2001-05-31
10 2001-06-30
11 2001-07-31
dtype: datetime64[ns]
>>> s.dt.days_in_month
0 31
1 30
2 31
3 30
4 31
5 31
6 28
7 31
8 30
9 31
10 30
11 31
dtype: int16
"""
res = libcudf.datetime.days_in_month(self.series._column)
return Series._from_data(
ColumnAccessor({None: res}),
index=self.series._index,
name=self.series.name,
)

def _get_dt_field(self, field):
out_column = self.series._column.get_dt_field(field)
return Series(
Expand Down
20 changes: 20 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pytest

import cudf
import cudf.testing.dataset_generator as dataset_generator
from cudf.core import DataFrame, Series
from cudf.core.index import DatetimeIndex
from cudf.testing._utils import (
Expand Down Expand Up @@ -1299,6 +1300,25 @@ def test_is_leap_year():
assert_eq(expect2, got2)


@pytest.mark.parametrize("dtype", DATETIME_TYPES)
def test_days_in_months(dtype):
nrows = 1000

data = dataset_generator.rand_dataframe(
dtypes_meta=[
{"dtype": dtype, "null_frequency": 0.4, "cardinality": nrows}
],
rows=nrows,
use_threads=False,
seed=23,
)

ps = data.to_pandas()["0"]
gs = cudf.from_pandas(ps)

assert_eq(ps.dt.days_in_month, gs.dt.days_in_month)


@pytest.mark.parametrize(
"data",
[
Expand Down