Merge remote-tracking branch 'upstream/branch-22.04' into reduce-warn…

…ings
rapidsai · Feb 3, 2022 · 2c3c87e · 2c3c87e
2 parents d2c0c4b + a25a2ec
commit 2c3c87e
Show file tree

Hide file tree

Showing 53 changed files with 2,325 additions and 913 deletions.
diff --git a/build.sh b/build.sh
@@ -230,6 +230,7 @@ if buildAll || hasArg libcudf; then
         fi
         echo "$MSG"
         python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
+        cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
     fi
 
     if [[ ${INSTALL_TARGET} != "" ]]; then

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
@@ -85,6 +85,7 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
       gpuci_logger "Copying build metrics results"
       mkdir -p "$WORKSPACE/build-metrics"
       cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
+      cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja.log"
   fi
 
   gpuci_logger "Build conda pkg for libcudf_kafka"

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -123,7 +123,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
 
 # ##################################################################################################
 # * stream_compaction benchmark -------------------------------------------------------------------
-ConfigureBench(STREAM_COMPACTION_BENCH stream_compaction/drop_duplicates.cpp)
+ConfigureNVBench(STREAM_COMPACTION_BENCH stream_compaction/drop_duplicates.cpp)
 
 # ##################################################################################################
 # * join benchmark --------------------------------------------------------------------------------

diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -88,13 +88,13 @@ void BM_orc_read_varying_options(benchmark::State& state)
   auto const use_np_dtypes = (flags & 2) != 0;
   auto const ts_type       = cudf::data_type{static_cast<cudf::type_id>(state.range(state_idx++))};
 
+  // skip_rows is not supported on nested types
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
                                                    int32_t(type_group_id::FLOATING_POINT),
                                                    int32_t(type_group_id::FIXED_POINT),
                                                    int32_t(type_group_id::TIMESTAMP),
-                                                   int32_t(cudf::type_id::STRING),
-                                                   int32_t(cudf::type_id::LIST)}),
+                                                   int32_t(cudf::type_id::STRING)}),
                                 col_sel);
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -181,11 +181,12 @@ BENCHMARK_REGISTER_F(OrcRead, column_selection)
   ->Unit(benchmark::kMillisecond)
   ->UseManualTime();
 
+// Need an API to get the number of stripes to enable row_selection::STRIPES here
 BENCHMARK_DEFINE_F(OrcRead, row_selection)
 (::benchmark::State& state) { BM_orc_read_varying_options(state); }
 BENCHMARK_REGISTER_F(OrcRead, row_selection)
   ->ArgsProduct({{int32_t(column_selection::ALL)},
-                 {int32_t(row_selection::STRIPES), int32_t(row_selection::NROWS)},
+                 {int32_t(row_selection::NROWS)},
                  {1, 8},
                  {0b11},  // defaults
                  {int32_t(cudf::type_id::EMPTY)}})

diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -88,13 +88,13 @@ void BM_parq_read_varying_options(benchmark::State& state)
   auto const use_pandas_metadata = (flags & 2) != 0;
   auto const ts_type = cudf::data_type{static_cast<cudf::type_id>(state.range(state_idx++))};
 
+  // No nested types here, because of https://github.com/rapidsai/cudf/issues/9970
   auto const data_types = dtypes_for_column_selection(
     get_type_or_group({static_cast<int32_t>(type_group_id::INTEGRAL),
                        static_cast<int32_t>(type_group_id::FLOATING_POINT),
                        static_cast<int32_t>(type_group_id::FIXED_POINT),
                        static_cast<int32_t>(type_group_id::TIMESTAMP),
-                       static_cast<int32_t>(cudf::type_id::STRING),
-                       static_cast<int32_t>(cudf::type_id::LIST)}),
+                       static_cast<int32_t>(cudf::type_id::STRING)}),
     col_sel);
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -181,20 +181,18 @@ BENCHMARK_REGISTER_F(ParquetRead, column_selection)
   ->Unit(benchmark::kMillisecond)
   ->UseManualTime();
 
-// Disabled until we add an API to read metadata from a parquet file and determine num row groups.
-// https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863
-/*
+// row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file and
+// determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863
 BENCHMARK_DEFINE_F(ParquetRead, row_selection)
 (::benchmark::State& state) { BM_parq_read_varying_options(state); }
 BENCHMARK_REGISTER_F(ParquetRead, row_selection)
   ->ArgsProduct({{int32_t(column_selection::ALL)},
-                 {int32_t(row_selection::ROW_GROUPS), int32_t(row_selection::NROWS)},
+                 {int32_t(row_selection::NROWS)},
                  {1, 4},
                  {0b01},  // defaults
                  {int32_t(cudf::type_id::EMPTY)}})
   ->Unit(benchmark::kMillisecond)
   ->UseManualTime();
-*/
 
 BENCHMARK_DEFINE_F(ParquetRead, misc_options)
 (::benchmark::State& state) { BM_parq_read_varying_options(state); }

diff --git a/cpp/benchmarks/stream_compaction/drop_duplicates.cpp b/cpp/benchmarks/stream_compaction/drop_duplicates.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,64 +15,102 @@
  */
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/stream_compaction.hpp>
+#include <cudf/detail/stream_compaction.hpp>
 #include <cudf/types.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
+
+#include <fixture/rmm_pool_raii.hpp>
+
+#include <nvbench/nvbench.cuh>
 
 #include <memory>
 #include <random>
 
-class Compaction : public cudf::benchmark {
-};
+// necessary for custom enum types
+// see: https://github.com/NVIDIA/nvbench/blob/main/examples/enums.cu
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  // Enum type:
+  cudf::duplicate_keep_option,
+  // Callable to generate input strings:
+  [](cudf::duplicate_keep_option option) {
+    switch (option) {
+      case cudf::duplicate_keep_option::KEEP_FIRST: return "KEEP_FIRST";
+      case cudf::duplicate_keep_option::KEEP_LAST: return "KEEP_LAST";
+      case cudf::duplicate_keep_option::KEEP_NONE: return "KEEP_NONE";
+      default: return "ERROR";
+    }
+  },
+  // Callable to generate descriptions:
+  [](auto) { return std::string{}; })
+
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
+
+template <typename Type, cudf::duplicate_keep_option Keep>
+void nvbench_drop_duplicates(nvbench::state& state,
+                             nvbench::type_list<Type, nvbench::enum_type<Keep>>)
+{
+  if constexpr (not std::is_same_v<Type, int32_t> and
+                Keep != cudf::duplicate_keep_option::KEEP_FIRST) {
+    state.skip("Skip unwanted benchmarks.");
+  }
+
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const num_rows = state.get_int64("NumRows");
+
+  cudf::test::UniformRandomGenerator<long> rand_gen(0, 100);
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [&rand_gen](auto row) { return rand_gen.generate(); });
+  auto valids = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i % 100 == 0 ? false : true; });
+  cudf::test::fixed_width_column_wrapper<Type, long> values(elements, elements + num_rows, valids);
+
+  auto input_column = cudf::column_view(values);
+  auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::detail::drop_duplicates(
+      input_table, {0}, Keep, cudf::null_equality::EQUAL, cudf::null_order::BEFORE, stream_view);
+  });
+}
 
 template <typename Type>
-void BM_compaction(benchmark::State& state, cudf::duplicate_keep_option keep)
+void nvbench_unordered_drop_duplicates(nvbench::state& state, nvbench::type_list<Type>)
 {
-  auto const n_rows = static_cast<cudf::size_type>(state.range(0));
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const num_rows = state.get_int64("NumRows");
 
   cudf::test::UniformRandomGenerator<long> rand_gen(0, 100);
   auto elements = cudf::detail::make_counting_transform_iterator(
     0, [&rand_gen](auto row) { return rand_gen.generate(); });
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 100 == 0 ? false : true; });
-  cudf::test::fixed_width_column_wrapper<Type, long> values(elements, elements + n_rows, valids);
+  cudf::test::fixed_width_column_wrapper<Type, long> values(elements, elements + num_rows, valids);
 
   auto input_column = cudf::column_view(values);
   auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = cudf::drop_duplicates(input_table, {0}, keep);
-  }
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::detail::unordered_drop_duplicates(
+      input_table, {0}, cudf::null_equality::EQUAL, stream_view);
+  });
 }
 
-#define concat(a, b, c) a##b##c
-#define get_keep(op)    cudf::duplicate_keep_option::KEEP_##op
-
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, keep)                     \
-  BENCHMARK_DEFINE_F(Compaction, name)(::benchmark::State & state) \
-  {                                                                \
-    BM_compaction<type>(state, get_keep(keep));                    \
-  }                                                                \
-  BENCHMARK_REGISTER_F(Compaction, name)                           \
-    ->UseManualTime()                                              \
-    ->Arg(10000)    /* 10k */                                      \
-    ->Arg(100000)   /* 100k */                                     \
-    ->Arg(1000000)  /* 1M */                                       \
-    ->Arg(10000000) /* 10M */
-
-#define COMPACTION_BENCHMARK_DEFINE(type, keep) \
-  RBM_BENCHMARK_DEFINE(concat(type, _, keep), type, keep)
-
-COMPACTION_BENCHMARK_DEFINE(bool, NONE);
-COMPACTION_BENCHMARK_DEFINE(int8_t, NONE);
-COMPACTION_BENCHMARK_DEFINE(int32_t, NONE);
-COMPACTION_BENCHMARK_DEFINE(int32_t, FIRST);
-COMPACTION_BENCHMARK_DEFINE(int32_t, LAST);
-using cudf::timestamp_ms;
-COMPACTION_BENCHMARK_DEFINE(timestamp_ms, NONE);
-COMPACTION_BENCHMARK_DEFINE(float, NONE);
+using data_type   = nvbench::type_list<bool, int8_t, int32_t, int64_t, float, cudf::timestamp_ms>;
+using keep_option = nvbench::enum_type_list<cudf::duplicate_keep_option::KEEP_FIRST,
+                                            cudf::duplicate_keep_option::KEEP_LAST,
+                                            cudf::duplicate_keep_option::KEEP_NONE>;
+
+NVBENCH_BENCH_TYPES(nvbench_drop_duplicates, NVBENCH_TYPE_AXES(data_type, keep_option))
+  .set_name("drop_duplicates")
+  .set_type_axes_names({"Type", "KeepOption"})
+  .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+
+NVBENCH_BENCH_TYPES(nvbench_unordered_drop_duplicates, NVBENCH_TYPE_AXES(data_type))
+  .set_name("unordered_drop_duplicates")
+  .set_type_axes_names({"Type"})
+  .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
     cuco 0.0
     GLOBAL_TARGETS cuco::cuco
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
-    GIT_TAG 0ca860b824f5dc22cf8a41f09912e62e11f07d82
+    GIT_TAG 6ec8b6dcdeceea07ab4456d32461a05c18864411
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
 

diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -75,6 +75,18 @@ std::unique_ptr<table> drop_duplicates(
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::unordered_drop_duplicates
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<table> unordered_drop_duplicates(
+  table_view const& input,
+  std::vector<size_type> const& keys,
+  null_equality nulls_equal           = null_equality::EQUAL,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy)
  *
@@ -94,5 +106,24 @@ cudf::size_type distinct_count(table_view const& input,
                                null_equality nulls_equal    = null_equality::EQUAL,
                                rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
+/**
+ * @copydoc cudf::unordered_distinct_count(column_view const&, null_policy, nan_policy)
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+cudf::size_type unordered_distinct_count(column_view const& input,
+                                         null_policy null_handling,
+                                         nan_policy nan_handling,
+                                         rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
+/**
+ * @copydoc cudf::unordered_distinct_count(table_view const&, null_equality)
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+cudf::size_type unordered_distinct_count(table_view const& input,
+                                         null_equality nulls_equal    = null_equality::EQUAL,
+                                         rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
 }  // namespace detail
 }  // namespace cudf