Skip to content

Commit

Permalink
Merge branch 'branch-22.02' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…TimedeltaIndex
  • Loading branch information
skirui-source committed Dec 16, 2021
2 parents a04647e + 52d7acc commit 8d2dcf4
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 55 deletions.
8 changes: 2 additions & 6 deletions cpp/include/cudf/detail/merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,10 @@ struct tagged_element_relational_comparator {
__device__ weak_ordering compare(index_type lhs_tagged_index,
index_type rhs_tagged_index) const noexcept
{
side const l_side = thrust::get<0>(lhs_tagged_index);
side const r_side = thrust::get<0>(rhs_tagged_index);

cudf::size_type const l_indx = thrust::get<1>(lhs_tagged_index);
cudf::size_type const r_indx = thrust::get<1>(rhs_tagged_index);
auto const [l_side, l_indx] = lhs_tagged_index;
auto const [r_side, r_indx] = rhs_tagged_index;

column_device_view const* ptr_left_dview{l_side == side::LEFT ? &lhs : &rhs};

column_device_view const* ptr_right_dview{r_side == side::LEFT ? &lhs : &rhs};

auto erl_comparator = element_relational_comparator(
Expand Down
7 changes: 2 additions & 5 deletions cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,

// build offsets column
auto offsets_transformer = [d_lhs, d_rhs] __device__(auto index_pair) {
auto side = thrust::get<0>(index_pair);
auto index = thrust::get<1>(index_pair);
auto const [side, index] = index_pair;
if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return 0;
auto d_str =
side == side::LEFT ? d_lhs.element<string_view>(index) : d_rhs.element<string_view>(index);
Expand All @@ -90,9 +89,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
thrust::make_counting_iterator<size_type>(0),
strings_count,
[d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) {
index_type index_pair = begin[idx];
auto side = thrust::get<0>(index_pair);
auto index = thrust::get<1>(index_pair);
auto const [side, index] = begin[idx];
if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return;
auto d_str = side == side::LEFT ? d_lhs.element<string_view>(index)
: d_rhs.element<string_view>(index);
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/dictionary/detail/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ std::unique_ptr<column> merge(dictionary_column_view const& lcol,
row_order.end(),
output_iter,
[lcol_iter, rcol_iter] __device__(auto const& index_pair) {
auto index = thrust::get<1>(index_pair);
return (thrust::get<0>(index_pair) == cudf::detail::side::LEFT
? lcol_iter[index]
: rcol_iter[index]);
auto const [side, index] = index_pair;
return side == cudf::detail::side::LEFT ? lcol_iter[index] : rcol_iter[index];
});

// build dictionary; the validity mask is updated by the caller
Expand Down
7 changes: 2 additions & 5 deletions cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ __global__ void materialize_merged_bitmask_kernel(
auto active_threads = __ballot_sync(0xffffffff, destination_row < num_destination_rows);

while (destination_row < num_destination_rows) {
index_type const& merged_idx = merged_indices[destination_row];
side const src_side = thrust::get<0>(merged_idx);
size_type const src_row = thrust::get<1>(merged_idx);
auto const [src_side, src_row] = merged_indices[destination_row];
bool const from_left{src_side == side::LEFT};
bool source_bit_is_valid{true};
if (left_have_valids && from_left) {
Expand Down Expand Up @@ -284,8 +282,7 @@ struct column_merger {
row_order_.end(),
merged_view.begin<Element>(),
[d_lcol, d_rcol] __device__(index_type const& index_pair) {
auto side = thrust::get<0>(index_pair);
auto index = thrust::get<1>(index_pair);
auto const [side, index] = index_pair;
return side == side::LEFT ? d_lcol[index] : d_rcol[index];
});

Expand Down
5 changes: 2 additions & 3 deletions cpp/tests/copying/sample_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,8 @@ struct SampleBasicTest : public SampleTest,

TEST_P(SampleBasicTest, CombinationOfParameters)
{
cudf::size_type const table_size = 1024;
cudf::size_type const n_samples = std::get<0>(GetParam());
cudf::sample_with_replacement multi_smpl = std::get<1>(GetParam());
cudf::size_type const table_size = 1024;
auto const [n_samples, multi_smpl] = GetParam();

auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
cudf::test::fixed_width_column_wrapper<int16_t> col1(data, data + table_size);
Expand Down
9 changes: 4 additions & 5 deletions cpp/tests/interop/from_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,10 @@ struct FromArrowTestSlice

TEST_P(FromArrowTestSlice, SliceTest)
{
auto tables = get_tables(10000);
auto cudf_table_view = tables.first->view();
auto arrow_table = tables.second;
auto start = std::get<0>(GetParam());
auto end = std::get<1>(GetParam());
auto tables = get_tables(10000);
auto cudf_table_view = tables.first->view();
auto arrow_table = tables.second;
auto const [start, end] = GetParam();

auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0];
auto expected_cudf_table = cudf::table{sliced_cudf_table};
Expand Down
9 changes: 4 additions & 5 deletions cpp/tests/interop/to_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,10 @@ struct ToArrowTestSlice

TEST_P(ToArrowTestSlice, SliceTest)
{
auto tables = get_tables(10000);
auto cudf_table_view = tables.first->view();
auto arrow_table = tables.second;
auto start = std::get<0>(GetParam());
auto end = std::get<1>(GetParam());
auto tables = get_tables(10000);
auto cudf_table_view = tables.first->view();
auto arrow_table = tables.second;
auto const [start, end] = GetParam();

auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0];
auto expected_arrow_table = arrow_table->Slice(start, end - start);
Expand Down
8 changes: 3 additions & 5 deletions cpp/tests/io/orc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,8 +1137,7 @@ struct OrcWriterTestDecimal : public OrcWriterTest,

TEST_P(OrcWriterTestDecimal, Decimal64)
{
auto const num_rows = std::get<0>(GetParam());
auto const scale = std::get<1>(GetParam());
auto const [num_rows, scale] = GetParam();

// Using int16_t because scale causes values to overflow if they already require 32 bits
auto const vals = random_values<int32_t>(num_rows);
Expand Down Expand Up @@ -1241,9 +1240,8 @@ struct OrcWriterTestStripes

TEST_P(OrcWriterTestStripes, StripeSize)
{
constexpr auto num_rows = 1000000;
auto size_bytes = std::get<0>(GetParam());
auto size_rows = std::get<1>(GetParam());
constexpr auto num_rows = 1000000;
auto const [size_bytes, size_rows] = GetParam();

const auto seq_col = random_values<int>(num_rows);
const auto validity =
Expand Down
3 changes: 1 addition & 2 deletions cpp/tests/transform/mask_to_bools_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ struct MaskToBoolsTest
TEST_P(MaskToBoolsTest, LargeDataSizeTest)
{
auto data = std::vector<bool>(10000);
cudf::size_type const begin_bit = std::get<0>(GetParam());
cudf::size_type const end_bit = std::get<1>(GetParam());
auto const [begin_bit, end_bit] = GetParam();
std::transform(data.cbegin(), data.cend(), data.begin(), [](auto val) {
return rand() % 2 == 0 ? true : false;
});
Expand Down
41 changes: 26 additions & 15 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import cupy as cp
import numpy as np
import pandas as pd
import pandas.tseries.offsets as pd_offset
from pandas.core.tools.datetimes import _unit_map

import cudf
Expand Down Expand Up @@ -458,6 +459,17 @@ class DateOffset:
"Y": "years",
}

_TICK_OR_WEEK_TO_UNITS = {
pd_offset.Week: "weeks",
pd_offset.Day: "days",
pd_offset.Hour: "hours",
pd_offset.Minute: "minutes",
pd_offset.Second: "seconds",
pd_offset.Milli: "milliseconds",
pd_offset.Micro: "microseconds",
pd_offset.Nano: "nanoseconds",
}

_FREQSTR_REGEX = re.compile("([0-9]*)([a-zA-Z]+)")

def __init__(self, n=1, normalize=False, **kwds):
Expand Down Expand Up @@ -649,6 +661,13 @@ def _from_freqstr(cls: Type[_T], freqstr: str) -> _T:

return cls(**{cls._CODES_TO_UNITS[freq_part]: int(numeric_part)})

@classmethod
def _from_pandas_ticks_or_weeks(
cls: Type[_T],
tick: Union[pd.tseries.offsets.Tick, pd.tseries.offsets.Week],
) -> _T:
return cls(**{cls._TICK_OR_WEEK_TO_UNITS[type(tick)]: tick.n})

def _maybe_as_fast_pandas_offset(self):
if (
len(self.kwds) == 1
Expand Down Expand Up @@ -814,23 +833,15 @@ def date_range(
if isinstance(freq, DateOffset):
offset = freq
elif isinstance(freq, str):
# Map pandas `offset alias` into cudf DateOffset `CODE`, only
# fixed-frequency, non-anchored offset aliases are supported.
mo = re.fullmatch(
rf'(-)*(\d*)({"|".join(_offset_alias_to_code.keys())})', freq
)
if mo is None:
offset = pd.tseries.frequencies.to_offset(freq)
if not isinstance(offset, pd.tseries.offsets.Tick) and not isinstance(
offset, pd.tseries.offsets.Week
):
raise ValueError(
f"Unrecognized or unsupported offset alias {freq}."
f"Unrecognized frequency string {freq}. cuDF does "
"not yet support month, quarter, year-anchored frequency."
)

sign, n, offset_alias = mo.groups()
code = _offset_alias_to_code[offset_alias]

freq = "".join([n, code])
offset = DateOffset._from_freqstr(freq)
if sign:
offset.kwds.update({s: -i for s, i in offset.kwds.items()})
offset = DateOffset._from_pandas_ticks_or_weeks(offset)
else:
raise TypeError("`freq` must be a `str` or cudf.DateOffset object.")

Expand Down
42 changes: 42 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,48 @@ def test_date_range_raise_overflow():
cudf.date_range(start=start, periods=periods, freq=freq)


@pytest.mark.parametrize(
"freqstr_unsupported",
[
"1M",
"2SM",
"3MS",
"4BM",
"5CBM",
"6SMS",
"7BMS",
"8CBMS",
"Q",
"2BQ",
"3BQS",
"10A",
"10Y",
"9BA",
"9BY",
"8AS",
"8YS",
"7BAS",
"7BYS",
"BH",
"B",
],
)
def test_date_range_raise_unsupported(freqstr_unsupported):
s, e = "2001-01-01", "2008-01-31"
pd.date_range(start=s, end=e, freq=freqstr_unsupported)
with pytest.raises(ValueError, match="does not yet support"):
cudf.date_range(start=s, end=e, freq=freqstr_unsupported)

# We also check that these values are unsupported when using lowercase
# characters. We exclude the value 3MS (every 3 month starts) because 3ms
# is a valid frequency for every 3 milliseconds.
if freqstr_unsupported != "3MS":
freqstr_unsupported = freqstr_unsupported.lower()
pd.date_range(start=s, end=e, freq=freqstr_unsupported)
with pytest.raises(ValueError, match="does not yet support"):
cudf.date_range(start=s, end=e, freq=freqstr_unsupported)


##################################################################
# End of Date Range Test #
##################################################################
Expand Down

0 comments on commit 8d2dcf4

Please sign in to comment.