Merge branch 'branch-22.04' of github.com:rapidsai/cudf into improvem…

…ent/ListOfColumnsRefactor/filling
rapidsai · Mar 11, 2022 · d617caa · d617caa
2 parents a0ef0af + f29c8d9
commit d617caa
Show file tree

Hide file tree

Showing 58 changed files with 2,878 additions and 934 deletions.
diff --git a/codecov.yml b/codecov.yml
@@ -2,4 +2,7 @@
 coverage:
   status:
     project: off
-    patch: off
+    patch: on
+
+github_checks:
+    annotations: true
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -362,6 +362,7 @@ add_library(
   src/quantiles/quantiles.cu
   src/reductions/all.cu
   src/reductions/any.cu
+  src/reductions/collect_ops.cu
   src/reductions/max.cu
   src/reductions/mean.cu
   src/reductions/min.cu
@@ -373,6 +374,13 @@ add_library(
   src/reductions/scan/scan.cpp
   src/reductions/scan/scan_exclusive.cu
   src/reductions/scan/scan_inclusive.cu
+  src/reductions/segmented_all.cu
+  src/reductions/segmented_any.cu
+  src/reductions/segmented_max.cu
+  src/reductions/segmented_min.cu
+  src/reductions/segmented_product.cu
+  src/reductions/segmented_reductions.cpp
+  src/reductions/segmented_sum.cu
   src/reductions/std.cu
   src/reductions/sum.cu
   src/reductions/sum_of_squares.cu

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -24,7 +24,7 @@ target_compile_options(
 
 target_link_libraries(
   cudf_datagen PUBLIC GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main
-                      benchmark::benchmark nvbench::nvbench Threads::Threads cudf
+                      benchmark::benchmark nvbench::nvbench Threads::Threads cudf cudftestutil
 )
 
 target_include_directories(
@@ -175,9 +175,10 @@ ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu)
 # ##################################################################################################
 # * reduction benchmark ---------------------------------------------------------------------------
 ConfigureBench(
-  REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/reduce.cpp
-  reduction/scan.cpp reduction/minmax.cpp
+  REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
+  reduction/reduce.cpp reduction/scan.cpp
 )
+ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)
 
 # ##################################################################################################
 # * reduction benchmark ---------------------------------------------------------------------------

diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
@@ -28,11 +28,12 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
 {
   auto const column_size{static_cast<cudf::size_type>(state.range(0))};
 
-  auto const source_table = create_sequence_table(
+  auto const source_table = create_random_table(
     {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});
 
-  auto lhs          = cudf::column_view(source_table->get_column(0));
-  auto rhs          = cudf::column_view(source_table->get_column(1));
+  auto lhs = cudf::column_view(source_table->get_column(0));
+  auto rhs = cudf::column_view(source_table->get_column(1));
+
   auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());
 
   // Call once for hot cache.

diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include <nvbench/nvbench.cuh>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/reduction.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/device_vector.h>
+
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+namespace cudf {
+
+bool constexpr is_boolean_output_agg(segmented_reduce_aggregation::Kind kind)
+{
+  return kind == segmented_reduce_aggregation::ALL || kind == segmented_reduce_aggregation::ANY;
+}
+
+template <segmented_reduce_aggregation::Kind kind>
+std::unique_ptr<segmented_reduce_aggregation> make_simple_aggregation()
+{
+  switch (kind) {
+    case segmented_reduce_aggregation::SUM:
+      return make_sum_aggregation<segmented_reduce_aggregation>();
+    case segmented_reduce_aggregation::PRODUCT:
+      return make_product_aggregation<segmented_reduce_aggregation>();
+    case segmented_reduce_aggregation::MIN:
+      return make_min_aggregation<segmented_reduce_aggregation>();
+    case segmented_reduce_aggregation::MAX:
+      return make_max_aggregation<segmented_reduce_aggregation>();
+    case segmented_reduce_aggregation::ALL:
+      return make_all_aggregation<segmented_reduce_aggregation>();
+    case segmented_reduce_aggregation::ANY:
+      return make_any_aggregation<segmented_reduce_aggregation>();
+    default: CUDF_FAIL("Unsupported simple segmented aggregation");
+  }
+}
+
+template <typename InputType>
+std::pair<std::unique_ptr<column>, thrust::device_vector<size_type>> make_test_data(
+  nvbench::state& state)
+{
+  auto const column_size{size_type(state.get_int64("column_size"))};
+  auto const num_segments{size_type(state.get_int64("num_segments"))};
+
+  auto segment_length = column_size / num_segments;
+
+  test::UniformRandomGenerator<InputType> rand_gen(0, 100);
+  auto data_it = detail::make_counting_transform_iterator(
+    0, [&rand_gen](auto i) { return rand_gen.generate(); });
+
+  auto offset_it =
+    detail::make_counting_transform_iterator(0, [&column_size, &segment_length](auto i) {
+      return column_size < i * segment_length ? column_size : i * segment_length;
+    });
+
+  test::fixed_width_column_wrapper<InputType> input(data_it, data_it + column_size);
+  std::vector<size_type> h_offsets(offset_it, offset_it + num_segments + 1);
+  thrust::device_vector<size_type> d_offsets(h_offsets);
+
+  return std::make_pair(input.release(), d_offsets);
+}
+
+template <typename InputType, typename OutputType, aggregation::Kind kind>
+std::enable_if_t<!is_boolean_output_agg(kind) || std::is_same_v<OutputType, bool>, void>
+BM_Simple_Segmented_Reduction(nvbench::state& state,
+                              nvbench::type_list<InputType, OutputType, nvbench::enum_type<kind>>)
+{
+  // TODO: to be replaced by nvbench fixture once it's ready
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const column_size{size_type(state.get_int64("column_size"))};
+  auto [input, offsets] = make_test_data<InputType>(state);
+  auto agg              = make_simple_aggregation<kind>();
+
+  state.add_element_count(column_size);
+  state.add_global_memory_reads<InputType>(column_size);
+  state.add_global_memory_writes<OutputType>(column_size);
+
+  state.exec(
+    nvbench::exec_tag::sync,
+    [input_view = input->view(), offset_span = device_span<size_type>{offsets}, &agg](
+      nvbench::launch& launch) {
+      segmented_reduce(
+        input_view, offset_span, *agg, data_type{type_to_id<OutputType>()}, null_policy::INCLUDE);
+    });
+}
+
+template <typename InputType, typename OutputType, aggregation::Kind kind>
+std::enable_if_t<is_boolean_output_agg(kind) && !std::is_same_v<OutputType, bool>, void>
+BM_Simple_Segmented_Reduction(nvbench::state& state,
+                              nvbench::type_list<InputType, OutputType, nvbench::enum_type<kind>>)
+{
+  state.skip("Invalid combination of dtype and aggregation type.");
+}
+
+using Types = nvbench::type_list<bool, int32_t, float, double>;
+// Skip benchmarking MAX/ANY since they are covered by MIN/ALL respectively.
+using AggKinds = nvbench::
+  enum_type_list<aggregation::SUM, aggregation::PRODUCT, aggregation::MIN, aggregation::ALL>;
+
+NVBENCH_BENCH_TYPES(BM_Simple_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, Types, AggKinds))
+  .set_name("segmented_reduction_simple")
+  .set_type_axes_names({"InputType", "OutputType", "AggregationKinds"})
+  .add_int64_axis("column_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
+  .add_int64_axis("num_segments", {1'000, 10'000, 100'000});
+
+}  // namespace cudf
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
@@ -148,6 +148,17 @@ class groupby_scan_aggregation : public virtual aggregation {
   groupby_scan_aggregation() {}
 };
 
+/**
+ * @brief Derived class intended for segmented reduction usage.
+ */
+class segmented_reduce_aggregation : public virtual aggregation {
+ public:
+  ~segmented_reduce_aggregation() override = default;
+
+ protected:
+  segmented_reduce_aggregation() {}
+};
+
 enum class udf_type : bool { CUDA, PTX };
 enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };
 

diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -147,7 +147,8 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
  */
 class sum_aggregation final : public rolling_aggregation,
                               public groupby_aggregation,
-                              public groupby_scan_aggregation {
+                              public groupby_scan_aggregation,
+                              public segmented_reduce_aggregation {
  public:
   sum_aggregation() : aggregation(SUM) {}
 
@@ -166,7 +167,7 @@ class sum_aggregation final : public rolling_aggregation,
 /**
  * @brief Derived class for specifying a product aggregation
  */
-class product_aggregation final : public groupby_aggregation {
+class product_aggregation final : public groupby_aggregation, public segmented_reduce_aggregation {
  public:
   product_aggregation() : aggregation(PRODUCT) {}
 
@@ -187,7 +188,8 @@ class product_aggregation final : public groupby_aggregation {
  */
 class min_aggregation final : public rolling_aggregation,
                               public groupby_aggregation,
-                              public groupby_scan_aggregation {
+                              public groupby_scan_aggregation,
+                              public segmented_reduce_aggregation {
  public:
   min_aggregation() : aggregation(MIN) {}
 
@@ -208,7 +210,8 @@ class min_aggregation final : public rolling_aggregation,
  */
 class max_aggregation final : public rolling_aggregation,
                               public groupby_aggregation,
-                              public groupby_scan_aggregation {
+                              public groupby_scan_aggregation,
+                              public segmented_reduce_aggregation {
  public:
   max_aggregation() : aggregation(MAX) {}
 
@@ -248,7 +251,7 @@ class count_aggregation final : public rolling_aggregation,
 /**
  * @brief Derived class for specifying an any aggregation
  */
-class any_aggregation final : public aggregation {
+class any_aggregation final : public segmented_reduce_aggregation {
  public:
   any_aggregation() : aggregation(ANY) {}
 
@@ -267,7 +270,7 @@ class any_aggregation final : public aggregation {
 /**
  * @brief Derived class for specifying an all aggregation
  */
-class all_aggregation final : public aggregation {
+class all_aggregation final : public segmented_reduce_aggregation {
  public:
   all_aggregation() : aggregation(ALL) {}