Merge branch 'branch-24.06' into update-pandas-template-link

rapidsai · Apr 17, 2024 · c5ef876 · c5ef876
2 parents df1eb93 + f222b4a
commit c5ef876
Show file tree

Hide file tree

Showing 587 changed files with 4,442 additions and 3,706 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -33,3 +33,4 @@ ENV SCCACHE_REGION="us-east-2"
 ENV SCCACHE_BUCKET="rapids-sccache-devs"
 ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
 ENV HISTFILE="/home/coder/.cache/._bash_history"
+ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -182,7 +182,7 @@ jobs:
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@patch-1
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
     with:
         node_type: cpu4
         build_type: pull-request

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -44,7 +44,6 @@ jobs:
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_cpp_memcheck.sh"
   static-configure:
-    needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:

diff --git a/ci/configure_cpp_static.sh b/ci/configure_cpp_static.sh
@@ -3,8 +3,6 @@
 
 set -euo pipefail
 
-rapids-configure-conda-channels
-
 source rapids-date-string
 
 rapids-logger "Configure static cpp build"

diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -27,7 +27,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   --dist worksteal \
   --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1
 
-SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-24.06-results.json
+SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json
 # summarize the results and save them to artifacts:
 python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${SUMMARY_FILE_NAME}
 RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -64,11 +64,11 @@ dependencies:
 - nvcomp==3.0.6
 - nvtx>=0.2.1
 - packaging
-- pandas>=2.0,<2.2.2dev0
+- pandas>=2.0,<2.2.3dev0
 - pandoc
 - pip
 - pre-commit
-- protobuf>=4.21,<5
+- protobuf>=3.20,<5
 - ptxcompiler
 - pyarrow==14.0.2.*
 - pydata-sphinx-theme!=0.14.2

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -62,11 +62,11 @@ dependencies:
 - nvcomp==3.0.6
 - nvtx>=0.2.1
 - packaging
-- pandas>=2.0,<2.2.2dev0
+- pandas>=2.0,<2.2.3dev0
 - pandoc
 - pip
 - pre-commit
-- protobuf>=4.21,<5
+- protobuf>=3.20,<5
 - pyarrow==14.0.2.*
 - pydata-sphinx-theme!=0.14.2
 - pynvjitlink

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
@@ -78,10 +78,10 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
   run:
-    - {{ pin_compatible('protobuf', min_pin='x.x', max_pin='x') }}
+    - protobuf >=3.20,<5.0a0
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.2.2dev0
+    - pandas >=2.0,<2.2.3dev0
     - cupy >=12.0.0
     - numba >=0.57
     - {{ pin_compatible('numpy', max_pin='x') }}

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -331,14 +331,19 @@ ConfigureNVBench(
 ConfigureBench(JSON_BENCH json/json.cu)
 ConfigureNVBench(FST_NVBENCH io/fst.cu)
 ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
-ConfigureNVBench(JSON_READER_OPTION io/json/json_reader_option.cpp)
+ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp)
 ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
 ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp)
 target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB)
 
+# ##################################################################################################
+# * decimal benchmark
+# ---------------------------------------------------------------------------------
+ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
+
 add_custom_target(
   run_benchmarks
   DEPENDS CUDF_BENCHMARKS

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
@@ -324,10 +324,11 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
   distribution_fn<DeviceType> dist;
   std::optional<numeric::scale_type> scale;
 
-  random_value_fn(distribution_params<DeviceType> const& desc)
+  random_value_fn(distribution_params<T> const& desc)
     : lower_bound{desc.lower_bound},
       upper_bound{desc.upper_bound},
-      dist{make_distribution<DeviceType>(desc.id, desc.lower_bound, desc.upper_bound)}
+      dist{make_distribution<DeviceType>(desc.id, lower_bound, upper_bound)},
+      scale{desc.scale}
   {
   }
 

diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
@@ -182,9 +182,17 @@ struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::struct_vi
   cudf::size_type max_depth;
 };
 
-// Present for compilation only. To be implemented once reader/writers support the fixed width type.
+/**
+ * @brief Fixed-point values are parameterized with a distribution type, scale, and bounds of the
+ * same type.
+ */
 template <typename T>
-struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {};
+struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
+  distribution_id id;
+  typename T::rep lower_bound;
+  typename T::rep upper_bound;
+  std::optional<numeric::scale_type> scale;
+};
 
 /**
  * @brief Returns a vector of types, corresponding to the input type or a type group.
@@ -226,7 +234,7 @@ class data_profile {
     cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 64}, 2};
   distribution_params<cudf::struct_view> struct_dist_desc{
     {cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2};
-  std::map<cudf::type_id, distribution_params<__uint128_t>> decimal_params;
+  std::map<cudf::type_id, distribution_params<numeric::decimal128>> decimal_params;
 
   double bool_probability_true           = 0.5;
   std::optional<double> null_probability = 0.01;
@@ -300,16 +308,21 @@ class data_profile {
   }
 
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  distribution_params<typename T::rep> get_distribution_params() const
+  distribution_params<T> get_distribution_params() const
   {
     using rep = typename T::rep;
     auto it   = decimal_params.find(cudf::type_to_id<T>());
     if (it == decimal_params.end()) {
       auto const range = default_range<rep>();
-      return distribution_params<rep>{default_distribution_id<rep>(), range.first, range.second};
+      auto const scale = std::optional<numeric::scale_type>{};
+      return distribution_params<T>{
+        default_distribution_id<rep>(), range.first, range.second, scale};
     } else {
       auto& desc = it->second;
-      return {desc.id, static_cast<rep>(desc.lower_bound), static_cast<rep>(desc.upper_bound)};
+      return {desc.id,
+              static_cast<rep>(desc.lower_bound),
+              static_cast<rep>(desc.upper_bound),
+              desc.scale};
     }
   }
 
@@ -359,6 +372,23 @@ class data_profile {
     }
   }
 
+  // Users should pass integral values for bounds when setting the parameters for fixed-point.
+  // Otherwise the call with have no effect.
+  template <typename T,
+            typename Type_enum,
+            std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+  void set_distribution_params(Type_enum type_or_group,
+                               distribution_id dist,
+                               T lower_bound,
+                               T upper_bound,
+                               numeric::scale_type scale)
+  {
+    for (auto tid : get_type_or_group(static_cast<int32_t>(type_or_group))) {
+      decimal_params[tid] = {
+        dist, static_cast<__int128_t>(lower_bound), static_cast<__int128_t>(upper_bound), scale};
+    }
+  }
+
   template <typename T, typename Type_enum, std::enable_if_t<cudf::is_chrono<T>(), T>* = nullptr>
   void set_distribution_params(Type_enum type_or_group,
                                distribution_id dist,

diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,11 +21,13 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
+#include <rmm/resource_ref.hpp>
+
 template <typename T, typename ScalarType = cudf::scalar_type_t<T>>
 std::unique_ptr<cudf::scalar> make_scalar(
-  T value                             = 0,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+  T value                           = 0,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
 {
   auto s = new ScalarType(value, true, stream, mr);
   return std::unique_ptr<cudf::scalar>(s);

diff --git a/cpp/benchmarks/decimal/convert_floating.cpp b/cpp/benchmarks/decimal/convert_floating.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/types.hpp>
+#include <cudf/unary.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <type_traits>
+
+// This benchmark compares the cost of converting decimal <--> floating point
+template <typename InputType, typename OutputType>
+void bench_cast_decimal(nvbench::state& state, nvbench::type_list<InputType, OutputType>)
+{
+  static constexpr bool is_input_floating  = std::is_floating_point_v<InputType>;
+  static constexpr bool is_output_floating = std::is_floating_point_v<OutputType>;
+
+  static constexpr bool is_double =
+    std::is_same_v<InputType, double> || std::is_same_v<OutputType, double>;
+  static constexpr bool is_32bit =
+    std::is_same_v<InputType, numeric::decimal32> || std::is_same_v<OutputType, numeric::decimal32>;
+  static constexpr bool is_128bit = std::is_same_v<InputType, numeric::decimal128> ||
+                                    std::is_same_v<OutputType, numeric::decimal128>;
+
+  // Skip floating --> floating and decimal --> decimal
+  if constexpr (is_input_floating == is_output_floating) {
+    state.skip("Meaningless conversion.");
+    return;
+  }
+
+  // Skip float <--> dec128
+  if constexpr (!is_double && is_128bit) {
+    state.skip("Ignoring float <--> dec128.");
+    return;
+  }
+
+  // Get settings
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const exp_mode = state.get_int64("exp_range");
+
+  // Exponent range: Range size is 10^6
+  // These probe the edges of the float and double ranges, as well as more common values
+  int const exp_min_array[] = {-307, -37, -14, -3, 8, 31, 301};
+  int const exp_range_size  = 6;
+  int const exp_min         = exp_min_array[exp_mode];
+  int const exp_max         = exp_min + exp_range_size;
+
+  // With exp range size of 6, decimal output (generated or casted-to) has 7 digits of precision
+  int const extra_digits_precision = 1;
+
+  // Exclude end range of double from float test
+  if (!is_double && ((exp_mode == 0) || (exp_mode == 6))) {
+    state.skip("Range beyond end of float tests.");
+    return;
+  }
+
+  // The current float <--> decimal conversion algorithm is limited
+  static constexpr bool is_64bit = !is_32bit && !is_128bit;
+  if (is_32bit && (exp_mode != 3)) {
+    state.skip("Decimal32 conversion only works up to scale factors of 10^9.");
+    return;
+  }
+  if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) {
+    state.skip("Decimal64 conversion only works up to scale factors of 10^18.");
+    return;
+  }
+  if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) {
+    state.skip("Decimal128 conversion only works up to scale factors of 10^38.");
+    return;
+  }
+
+  // Type IDs
+  auto const input_id  = cudf::type_to_id<InputType>();
+  auto const output_id = cudf::type_to_id<OutputType>();
+
+  // Create data profile and scale
+  auto const [output_scale, profile] = [&]() {
+    if constexpr (is_input_floating) {
+      // Range for generated floating point values
+      auto get_pow10 = [](auto exp10) {
+        return std::pow(static_cast<InputType>(10), static_cast<InputType>(exp10));
+      };
+      InputType const floating_range_min = get_pow10(exp_min);
+      InputType const floating_range_max = get_pow10(exp_max);
+
+      // With exp range size of 6, output has 7 decimal digits of precision
+      auto const decimal_output_scale = exp_min - extra_digits_precision;
+
+      // Input distribution
+      data_profile const profile = data_profile_builder().distribution(
+        input_id, distribution_id::NORMAL, floating_range_min, floating_range_max);
+
+      return std::pair{decimal_output_scale, profile};
+
+    } else {  // Generating decimals
+
+      using decimal_rep_type = typename InputType::rep;
+
+      // For exp range size 6 and precision 7, generates ints between 10 and 10^7,
+      // with scale factor of: exp_max - 7. This matches floating point generation.
+      int const digits_precision     = exp_range_size + extra_digits_precision;
+      auto const decimal_input_scale = numeric::scale_type{exp_max - digits_precision};
+
+      // Range for generated integer values
+      auto get_pow10 = [](auto exp10) {
+        return numeric::detail::ipow<decimal_rep_type, numeric::Radix::BASE_10>(exp10);
+      };
+      auto const decimal_range_min = get_pow10(digits_precision - exp_range_size);
+      auto const decimal_range_max = get_pow10(digits_precision);
+
+      // Input distribution
+      data_profile const profile = data_profile_builder().distribution(input_id,
+                                                                       distribution_id::NORMAL,
+                                                                       decimal_range_min,
+                                                                       decimal_range_max,
+                                                                       decimal_input_scale);
+
+      return std::pair{0, profile};
+    }
+  }();
+
+  // Generate input data
+  auto const input_col  = create_random_column(input_id, row_count{num_rows}, profile);
+  auto const input_view = input_col->view();
+
+  // Output type
+  auto const output_type =
+    !is_input_floating ? cudf::data_type(output_id) : cudf::data_type(output_id, output_scale);
+
+  // Stream
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  // Run benchmark
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { cudf::cast(input_view, output_type); });
+
+  // Throughput statistics
+  state.add_element_count(num_rows);
+  state.add_global_memory_reads<InputType>(num_rows);
+  state.add_global_memory_writes<OutputType>(num_rows);
+}
+
+// Data types
+using data_types =
+  nvbench::type_list<float, double, numeric::decimal32, numeric::decimal64, numeric::decimal128>;
+
+NVBENCH_BENCH_TYPES(bench_cast_decimal, NVBENCH_TYPE_AXES(data_types, data_types))
+  .set_name("decimal_floating_conversion")
+  .set_type_axes_names({"InputType", "OutputType"})
+  .add_int64_power_of_two_axis("num_rows", {28})
+  .add_int64_axis("exp_range", nvbench::range(0, 6));
diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp
@@ -120,6 +120,7 @@ class memory_stats_logger {
   }
 
  private:
+  // TODO change to resource_ref once set_current_device_resource supports it
   rmm::mr::device_memory_resource* existing_mr;
   rmm::mr::statistics_resource_adaptor<rmm::mr::device_memory_resource> statistics_mr;
 };