Merge remote-tracking branch 'upstream/branch-22.06' into thrust-1.16

rapidsai · Mar 31, 2022 · a2a5daa · a2a5daa
2 parents 1e20830 + 4775f11
commit a2a5daa
Show file tree

Hide file tree

Showing 33 changed files with 139 additions and 338 deletions.
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # ucx-py version
 export UCX_PY_VERSION='0.26.*'

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
@@ -12,7 +12,7 @@ dependencies:
   - clang-tools=11.1.0
   - cupy>=9.5.0,<11.0.0a0
   - rmm=22.06.*
-  - cmake>=3.20.1
+  - cmake>=3.20.1,<3.23
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
   - numba>=0.54

diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
@@ -67,9 +67,6 @@ class orc_reader_options {
   // Cast timestamp columns to a specific type
   data_type _timestamp_type{type_id::EMPTY};
 
-  // Columns that should be converted from Decimal to Float64
-  std::vector<std::string> _decimal_cols_as_float;
-
   // Columns that should be read as Decimal128
   std::vector<std::string> _decimal128_columns;
 
@@ -138,14 +135,6 @@ class orc_reader_options {
    */
   data_type get_timestamp_type() const { return _timestamp_type; }
 
-  /**
-   * @brief Fully qualified names of columns that should be converted from Decimal to Float64.
-   */
-  std::vector<std::string> const& get_decimal_cols_as_float() const
-  {
-    return _decimal_cols_as_float;
-  }
-
   /**
    * @brief Fully qualified names of columns that should be read as 128-bit Decimal.
    */
@@ -215,18 +204,6 @@ class orc_reader_options {
    */
   void set_timestamp_type(data_type type) { _timestamp_type = type; }
 
-  /**
-   * @brief Set columns that should be converted from Decimal to Float64
-   *
-   * @param val Vector of fully qualified column names.
-   */
-  [[deprecated(
-    "Decimal to float conversion is deprecated and will be remove in future release")]] void
-  set_decimal_cols_as_float(std::vector<std::string> val)
-  {
-    _decimal_cols_as_float = std::move(val);
-  }
-
   /**
    * @brief Set columns that should be read as 128-bit Decimal
    *
@@ -340,21 +317,6 @@ class orc_reader_options_builder {
     return *this;
   }
 
-  /**
-   * @brief Columns that should be converted from decimals to float64.
-   *
-   * @param val Vector of column names.
-   * @return this for chaining.
-   */
-  [[deprecated(
-    "Decimal to float conversion is deprecated and will be remove in future "
-    "release")]] orc_reader_options_builder&
-  decimal_cols_as_float(std::vector<std::string> val)
-  {
-    options._decimal_cols_as_float = std::move(val);
-    return *this;
-  }
-
   /**
    * @brief Columns that should be read as 128-bit Decimal
    *

diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,7 +86,8 @@ std::unique_ptr<column> replace_re(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @throw cudf::logic_error if capture index values in `replacement` are not in range 1-99
+ * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also
+ * if the index exceeds the group count specified in the pattern
  *
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression patterns to search within each string.

diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
@@ -241,30 +241,22 @@ size_t gather_stream_info(const size_t stripe_index,
 /**
  * @brief Determines cuDF type of an ORC Decimal column.
  */
-auto decimal_column_type(std::vector<std::string> const& float64_columns,
-                         std::vector<std::string> const& decimal128_columns,
+auto decimal_column_type(std::vector<std::string> const& decimal128_columns,
                          cudf::io::orc::detail::aggregate_orc_metadata const& metadata,
                          int column_index)
 {
-  if (metadata.get_col_type(column_index).kind != DECIMAL) return type_id::EMPTY;
+  if (metadata.get_col_type(column_index).kind != DECIMAL) { return type_id::EMPTY; }
 
-  auto const& column_path = metadata.column_path(0, column_index);
-  auto is_column_in       = [&](const std::vector<std::string>& cols) {
-    return std::find(cols.cbegin(), cols.cend(), column_path) != cols.end();
-  };
-
-  auto const user_selected_float64    = is_column_in(float64_columns);
-  auto const user_selected_decimal128 = is_column_in(decimal128_columns);
-  CUDF_EXPECTS(not user_selected_float64 or not user_selected_decimal128,
-               "Both decimal128 and float64 types selected for column " + column_path);
-
-  if (user_selected_float64) return type_id::FLOAT64;
-  if (user_selected_decimal128) return type_id::DECIMAL128;
+  if (std::find(decimal128_columns.cbegin(),
+                decimal128_columns.cend(),
+                metadata.column_path(0, column_index)) != decimal128_columns.end()) {
+    return type_id::DECIMAL128;
+  }
 
   auto const precision = metadata.get_col_type(column_index)
                            .precision.value_or(cuda::std::numeric_limits<int64_t>::digits10);
-  if (precision <= cuda::std::numeric_limits<int32_t>::digits10) return type_id::DECIMAL32;
-  if (precision <= cuda::std::numeric_limits<int64_t>::digits10) return type_id::DECIMAL64;
+  if (precision <= cuda::std::numeric_limits<int32_t>::digits10) { return type_id::DECIMAL32; }
+  if (precision <= cuda::std::numeric_limits<int64_t>::digits10) { return type_id::DECIMAL64; }
   return type_id::DECIMAL128;
 }
 
@@ -796,12 +788,11 @@ std::unique_ptr<column> reader::impl::create_empty_column(const size_type orc_co
                                                           rmm::cuda_stream_view stream)
 {
   schema_info.name = _metadata.column_name(0, orc_col_id);
-  auto const type  = to_type_id(
-    _metadata.get_schema(orc_col_id),
-    _use_np_dtypes,
-    _timestamp_type.id(),
-    decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, orc_col_id));
-  int32_t scale = 0;
+  auto const type  = to_type_id(_metadata.get_schema(orc_col_id),
+                               _use_np_dtypes,
+                               _timestamp_type.id(),
+                               decimal_column_type(decimal128_columns, _metadata, orc_col_id));
+  int32_t scale    = 0;
   std::vector<std::unique_ptr<column>> child_columns;
   std::unique_ptr<column> out_col = nullptr;
   auto kind                       = _metadata.get_col_type(orc_col_id).kind;
@@ -943,8 +934,7 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
   _use_np_dtypes = options.is_enabled_use_np_dtypes();
 
   // Control decimals conversion
-  _decimal_cols_as_float = options.get_decimal_cols_as_float();
-  decimal128_columns     = options.get_decimal128_columns();
+  decimal128_columns = options.get_decimal128_columns();
 }
 
 timezone_table reader::impl::compute_timezone_table(
@@ -1004,11 +994,10 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // Get a list of column data types
     std::vector<data_type> column_types;
     for (auto& col : columns_level) {
-      auto col_type = to_type_id(
-        _metadata.get_col_type(col.id),
-        _use_np_dtypes,
-        _timestamp_type.id(),
-        decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, col.id));
+      auto col_type = to_type_id(_metadata.get_col_type(col.id),
+                                 _use_np_dtypes,
+                                 _timestamp_type.id(),
+                                 decimal_column_type(decimal128_columns, _metadata, col.id));
       CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type");
       if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or
           col_type == type_id::DECIMAL128) {

diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -221,7 +221,6 @@ class reader::impl {
 
   bool _use_index{true};
   bool _use_np_dtypes{true};
-  std::vector<std::string> _decimal_cols_as_float;
   std::vector<std::string> decimal128_columns;
   data_type _timestamp_type{type_id::EMPTY};
   reader_column_meta _col_meta{};

diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -962,15 +962,6 @@ static __device__ uint32_t Byte_RLE(orc_bytestream_s* bs,
   return rle->num_vals;
 }
 
-/**
- * @brief Powers of 10
- */
-static const __device__ __constant__ double kPow10[40] = {
-  1.0,   1.e1,  1.e2,  1.e3,  1.e4,  1.e5,  1.e6,  1.e7,  1.e8,  1.e9,  1.e10, 1.e11, 1.e12, 1.e13,
-  1.e14, 1.e15, 1.e16, 1.e17, 1.e18, 1.e19, 1.e20, 1.e21, 1.e22, 1.e23, 1.e24, 1.e25, 1.e26, 1.e27,
-  1.e28, 1.e29, 1.e30, 1.e31, 1.e32, 1.e33, 1.e34, 1.e35, 1.e36, 1.e37, 1.e38, 1.e39,
-};
-
 static const __device__ __constant__ int64_t kPow5i[28] = {1,
                                                            5,
                                                            25,
@@ -1045,34 +1036,24 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs,
       auto const pos = static_cast<int>(vals.i64[2 * t]);
       __int128_t v   = decode_varint128(bs, pos);
 
-      if (dtype_id == type_id::FLOAT64) {
-        double f      = v;
-        int32_t scale = (t < numvals) ? val_scale : 0;
-        if (scale >= 0)
-          vals.f64[t] = f / kPow10[min(scale, 39)];
-        else
-          vals.f64[t] = f * kPow10[min(-scale, 39)];
-      } else {
-        auto const scaled_value = [&]() {
-          // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from
-          // val_scale. So the difference of them will be used to add 0s or remove digits.
-          int32_t scale = (t < numvals) ? col_scale - val_scale : 0;
-          if (scale >= 0) {
-            scale = min(scale, 27);
-            return (v * kPow5i[scale]) << scale;
-          } else  // if (scale < 0)
-          {
-            scale = min(-scale, 27);
-            return (v / kPow5i[scale]) >> scale;
-          }
-        }();
-        if (dtype_id == type_id::DECIMAL32) {
-          vals.i32[t] = scaled_value;
-        } else if (dtype_id == type_id::DECIMAL64) {
-          vals.i64[t] = scaled_value;
+      auto const scaled_value = [&]() {
+        // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from
+        // val_scale. So the difference of them will be used to add 0s or remove digits.
+        int32_t const scale = (t < numvals) ? col_scale - val_scale : 0;
+        if (scale >= 0) {
+          auto const abs_scale = min(scale, 27);
+          return (v * kPow5i[abs_scale]) << abs_scale;
         } else {
-          vals.i128[t] = scaled_value;
+          auto const abs_scale = min(-scale, 27);
+          return (v / kPow5i[abs_scale]) >> abs_scale;
         }
+      }();
+      if (dtype_id == type_id::DECIMAL32) {
+        vals.i32[t] = scaled_value;
+      } else if (dtype_id == type_id::DECIMAL64) {
+        vals.i64[t] = scaled_value;
+      } else {
+        vals.i128[t] = scaled_value;
       }
     }
     // There is nothing to read, so break
@@ -1711,8 +1692,7 @@ __global__ void __launch_bounds__(block_size)
             case DECIMAL:
               if (s->chunk.dtype_id == type_id::DECIMAL32) {
                 static_cast<uint32_t*>(data_out)[row] = s->vals.u32[t + vals_skipped];
-              } else if (s->chunk.dtype_id == type_id::FLOAT64 or
-                         s->chunk.dtype_id == type_id::DECIMAL64) {
+              } else if (s->chunk.dtype_id == type_id::DECIMAL64) {
                 static_cast<uint64_t*>(data_out)[row] = s->vals.u64[t + vals_skipped];
               } else {
                 // decimal128

diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernels.cu
@@ -35,18 +35,19 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join(table_device_view left_table,
-                           table_device_view right_table,
-                           table_device_view probe,
-                           table_device_view build,
-                           row_equality const equality_probe,
-                           join_kind const join_type,
-                           cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                           size_type* join_output_l,
-                           size_type* join_output_r,
-                           cudf::ast::detail::expression_device_view device_expression_data,
-                           cudf::size_type const* join_result_offsets,
-                           bool const swap_tables)
+__launch_bounds__(block_size) __global__
+  void mixed_join(table_device_view left_table,
+                  table_device_view right_table,
+                  table_device_view probe,
+                  table_device_view build,
+                  row_equality const equality_probe,
+                  join_kind const join_type,
+                  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                  size_type* join_output_l,
+                  size_type* join_output_r,
+                  cudf::ast::detail::expression_device_view device_expression_data,
+                  cudf::size_type const* join_result_offsets,
+                  bool const swap_tables)
 {
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is

diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -32,17 +32,18 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join_semi(table_device_view left_table,
-                                table_device_view right_table,
-                                table_device_view probe,
-                                table_device_view build,
-                                row_equality const equality_probe,
-                                join_kind const join_type,
-                                cudf::detail::semi_map_type::device_view hash_table_view,
-                                size_type* join_output_l,
-                                cudf::ast::detail::expression_device_view device_expression_data,
-                                cudf::size_type const* join_result_offsets,
-                                bool const swap_tables)
+__launch_bounds__(block_size) __global__
+  void mixed_join_semi(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::semi_map_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables)
 {
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is

diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernels.cu
@@ -35,7 +35,7 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <int block_size, bool has_nulls>
-__global__ void compute_mixed_join_output_size(
+__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,

diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu
@@ -32,7 +32,7 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <int block_size, bool has_nulls>
-__global__ void compute_mixed_join_output_size_semi(
+__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size_semi(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,