Merge branch 'branch-22.02' of https://github.com/rapidsai/cudf into …

…TimedeltaIndex
rapidsai · Dec 16, 2021 · 8d2dcf4 · 8d2dcf4
2 parents a04647e + 52d7acc
commit 8d2dcf4
Show file tree

Hide file tree

Showing 11 changed files with 90 additions and 55 deletions.
diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh
@@ -80,14 +80,10 @@ struct tagged_element_relational_comparator {
   __device__ weak_ordering compare(index_type lhs_tagged_index,
                                    index_type rhs_tagged_index) const noexcept
   {
-    side const l_side = thrust::get<0>(lhs_tagged_index);
-    side const r_side = thrust::get<0>(rhs_tagged_index);
-
-    cudf::size_type const l_indx = thrust::get<1>(lhs_tagged_index);
-    cudf::size_type const r_indx = thrust::get<1>(rhs_tagged_index);
+    auto const [l_side, l_indx] = lhs_tagged_index;
+    auto const [r_side, r_indx] = rhs_tagged_index;
 
     column_device_view const* ptr_left_dview{l_side == side::LEFT ? &lhs : &rhs};
-
     column_device_view const* ptr_right_dview{r_side == side::LEFT ? &lhs : &rhs};
 
     auto erl_comparator = element_relational_comparator(

diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh
@@ -68,8 +68,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
 
   // build offsets column
   auto offsets_transformer = [d_lhs, d_rhs] __device__(auto index_pair) {
-    auto side  = thrust::get<0>(index_pair);
-    auto index = thrust::get<1>(index_pair);
+    auto const [side, index] = index_pair;
     if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return 0;
     auto d_str =
       side == side::LEFT ? d_lhs.element<string_view>(index) : d_rhs.element<string_view>(index);
@@ -90,9 +89,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
                      thrust::make_counting_iterator<size_type>(0),
                      strings_count,
                      [d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) {
-                       index_type index_pair = begin[idx];
-                       auto side             = thrust::get<0>(index_pair);
-                       auto index            = thrust::get<1>(index_pair);
+                       auto const [side, index] = begin[idx];
                        if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return;
                        auto d_str = side == side::LEFT ? d_lhs.element<string_view>(index)
                                                        : d_rhs.element<string_view>(index);

diff --git a/cpp/src/dictionary/detail/merge.cu b/cpp/src/dictionary/detail/merge.cu
@@ -53,10 +53,8 @@ std::unique_ptr<column> merge(dictionary_column_view const& lcol,
                     row_order.end(),
                     output_iter,
                     [lcol_iter, rcol_iter] __device__(auto const& index_pair) {
-                      auto index = thrust::get<1>(index_pair);
-                      return (thrust::get<0>(index_pair) == cudf::detail::side::LEFT
-                                ? lcol_iter[index]
-                                : rcol_iter[index]);
+                      auto const [side, index] = index_pair;
+                      return side == cudf::detail::side::LEFT ? lcol_iter[index] : rcol_iter[index];
                     });
 
   // build dictionary; the validity mask is updated by the caller

diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
@@ -80,9 +80,7 @@ __global__ void materialize_merged_bitmask_kernel(
   auto active_threads = __ballot_sync(0xffffffff, destination_row < num_destination_rows);
 
   while (destination_row < num_destination_rows) {
-    index_type const& merged_idx = merged_indices[destination_row];
-    side const src_side          = thrust::get<0>(merged_idx);
-    size_type const src_row      = thrust::get<1>(merged_idx);
+    auto const [src_side, src_row] = merged_indices[destination_row];
     bool const from_left{src_side == side::LEFT};
     bool source_bit_is_valid{true};
     if (left_have_valids && from_left) {
@@ -284,8 +282,7 @@ struct column_merger {
                       row_order_.end(),
                       merged_view.begin<Element>(),
                       [d_lcol, d_rcol] __device__(index_type const& index_pair) {
-                        auto side  = thrust::get<0>(index_pair);
-                        auto index = thrust::get<1>(index_pair);
+                        auto const [side, index] = index_pair;
                         return side == side::LEFT ? d_lcol[index] : d_rcol[index];
                       });
 

diff --git a/cpp/tests/copying/sample_tests.cpp b/cpp/tests/copying/sample_tests.cpp
@@ -89,9 +89,8 @@ struct SampleBasicTest : public SampleTest,
 
 TEST_P(SampleBasicTest, CombinationOfParameters)
 {
-  cudf::size_type const table_size         = 1024;
-  cudf::size_type const n_samples          = std::get<0>(GetParam());
-  cudf::sample_with_replacement multi_smpl = std::get<1>(GetParam());
+  cudf::size_type const table_size   = 1024;
+  auto const [n_samples, multi_smpl] = GetParam();
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<int16_t> col1(data, data + table_size);

diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
@@ -333,11 +333,10 @@ struct FromArrowTestSlice
 
 TEST_P(FromArrowTestSlice, SliceTest)
 {
-  auto tables          = get_tables(10000);
-  auto cudf_table_view = tables.first->view();
-  auto arrow_table     = tables.second;
-  auto start           = std::get<0>(GetParam());
-  auto end             = std::get<1>(GetParam());
+  auto tables             = get_tables(10000);
+  auto cudf_table_view    = tables.first->view();
+  auto arrow_table        = tables.second;
+  auto const [start, end] = GetParam();
 
   auto sliced_cudf_table   = cudf::slice(cudf_table_view, {start, end})[0];
   auto expected_cudf_table = cudf::table{sliced_cudf_table};

diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
@@ -488,11 +488,10 @@ struct ToArrowTestSlice
 
 TEST_P(ToArrowTestSlice, SliceTest)
 {
-  auto tables          = get_tables(10000);
-  auto cudf_table_view = tables.first->view();
-  auto arrow_table     = tables.second;
-  auto start           = std::get<0>(GetParam());
-  auto end             = std::get<1>(GetParam());
+  auto tables             = get_tables(10000);
+  auto cudf_table_view    = tables.first->view();
+  auto arrow_table        = tables.second;
+  auto const [start, end] = GetParam();
 
   auto sliced_cudf_table    = cudf::slice(cudf_table_view, {start, end})[0];
   auto expected_arrow_table = arrow_table->Slice(start, end - start);

diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
@@ -1137,8 +1137,7 @@ struct OrcWriterTestDecimal : public OrcWriterTest,
 
 TEST_P(OrcWriterTestDecimal, Decimal64)
 {
-  auto const num_rows = std::get<0>(GetParam());
-  auto const scale    = std::get<1>(GetParam());
+  auto const [num_rows, scale] = GetParam();
 
   // Using int16_t because scale causes values to overflow if they already require 32 bits
   auto const vals = random_values<int32_t>(num_rows);
@@ -1241,9 +1240,8 @@ struct OrcWriterTestStripes
 
 TEST_P(OrcWriterTestStripes, StripeSize)
 {
-  constexpr auto num_rows = 1000000;
-  auto size_bytes         = std::get<0>(GetParam());
-  auto size_rows          = std::get<1>(GetParam());
+  constexpr auto num_rows            = 1000000;
+  auto const [size_bytes, size_rows] = GetParam();
 
   const auto seq_col = random_values<int>(num_rows);
   const auto validity =

diff --git a/cpp/tests/transform/mask_to_bools_test.cpp b/cpp/tests/transform/mask_to_bools_test.cpp
@@ -56,8 +56,7 @@ struct MaskToBoolsTest
 TEST_P(MaskToBoolsTest, LargeDataSizeTest)
 {
   auto data                       = std::vector<bool>(10000);
-  cudf::size_type const begin_bit = std::get<0>(GetParam());
-  cudf::size_type const end_bit   = std::get<1>(GetParam());
+  auto const [begin_bit, end_bit] = GetParam();
   std::transform(data.cbegin(), data.cend(), data.begin(), [](auto val) {
     return rand() % 2 == 0 ? true : false;
   });

diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
@@ -8,6 +8,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pandas.tseries.offsets as pd_offset
 from pandas.core.tools.datetimes import _unit_map
 
 import cudf
@@ -458,6 +459,17 @@ class DateOffset:
         "Y": "years",
     }
 
+    _TICK_OR_WEEK_TO_UNITS = {
+        pd_offset.Week: "weeks",
+        pd_offset.Day: "days",
+        pd_offset.Hour: "hours",
+        pd_offset.Minute: "minutes",
+        pd_offset.Second: "seconds",
+        pd_offset.Milli: "milliseconds",
+        pd_offset.Micro: "microseconds",
+        pd_offset.Nano: "nanoseconds",
+    }
+
     _FREQSTR_REGEX = re.compile("([0-9]*)([a-zA-Z]+)")
 
     def __init__(self, n=1, normalize=False, **kwds):
@@ -649,6 +661,13 @@ def _from_freqstr(cls: Type[_T], freqstr: str) -> _T:
 
         return cls(**{cls._CODES_TO_UNITS[freq_part]: int(numeric_part)})
 
+    @classmethod
+    def _from_pandas_ticks_or_weeks(
+        cls: Type[_T],
+        tick: Union[pd.tseries.offsets.Tick, pd.tseries.offsets.Week],
+    ) -> _T:
+        return cls(**{cls._TICK_OR_WEEK_TO_UNITS[type(tick)]: tick.n})
+
     def _maybe_as_fast_pandas_offset(self):
         if (
             len(self.kwds) == 1
@@ -814,23 +833,15 @@ def date_range(
     if isinstance(freq, DateOffset):
         offset = freq
     elif isinstance(freq, str):
-        # Map pandas `offset alias` into cudf DateOffset `CODE`, only
-        # fixed-frequency, non-anchored offset aliases are supported.
-        mo = re.fullmatch(
-            rf'(-)*(\d*)({"|".join(_offset_alias_to_code.keys())})', freq
-        )
-        if mo is None:
+        offset = pd.tseries.frequencies.to_offset(freq)
+        if not isinstance(offset, pd.tseries.offsets.Tick) and not isinstance(
+            offset, pd.tseries.offsets.Week
+        ):
             raise ValueError(
-                f"Unrecognized or unsupported offset alias {freq}."
+                f"Unrecognized frequency string {freq}. cuDF does "
+                "not yet support month, quarter, year-anchored frequency."
             )
-
-        sign, n, offset_alias = mo.groups()
-        code = _offset_alias_to_code[offset_alias]
-
-        freq = "".join([n, code])
-        offset = DateOffset._from_freqstr(freq)
-        if sign:
-            offset.kwds.update({s: -i for s, i in offset.kwds.items()})
+        offset = DateOffset._from_pandas_ticks_or_weeks(offset)
     else:
         raise TypeError("`freq` must be a `str` or cudf.DateOffset object.")
 

diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
@@ -1583,6 +1583,48 @@ def test_date_range_raise_overflow():
         cudf.date_range(start=start, periods=periods, freq=freq)
 
 
+@pytest.mark.parametrize(
+    "freqstr_unsupported",
+    [
+        "1M",
+        "2SM",
+        "3MS",
+        "4BM",
+        "5CBM",
+        "6SMS",
+        "7BMS",
+        "8CBMS",
+        "Q",
+        "2BQ",
+        "3BQS",
+        "10A",
+        "10Y",
+        "9BA",
+        "9BY",
+        "8AS",
+        "8YS",
+        "7BAS",
+        "7BYS",
+        "BH",
+        "B",
+    ],
+)
+def test_date_range_raise_unsupported(freqstr_unsupported):
+    s, e = "2001-01-01", "2008-01-31"
+    pd.date_range(start=s, end=e, freq=freqstr_unsupported)
+    with pytest.raises(ValueError, match="does not yet support"):
+        cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+    # We also check that these values are unsupported when using lowercase
+    # characters. We exclude the value 3MS (every 3 month starts) because 3ms
+    # is a valid frequency for every 3 milliseconds.
+    if freqstr_unsupported != "3MS":
+        freqstr_unsupported = freqstr_unsupported.lower()
+        pd.date_range(start=s, end=e, freq=freqstr_unsupported)
+        with pytest.raises(ValueError, match="does not yet support"):
+            cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+
 ##################################################################
 #                    End of Date Range Test                      #
 ##################################################################