Merge branch 'branch-22.04' into regex-dispatcher

rapidsai · Feb 28, 2022 · 3015451 · 3015451
2 parents 85639db + 4c9ef51
commit 3015451
Show file tree

Hide file tree

Showing 132 changed files with 2,476 additions and 1,659 deletions.
diff --git a/build.sh b/build.sh
@@ -168,6 +168,10 @@ if hasArg clean; then
         rmdir ${bd} || true
     fi
     done
+
+    # Cleaning up python artifacts
+    find ${REPODIR}/python/ | grep -E "(__pycache__|\.pyc|\.pyo|\.so$)"  | xargs rm -rf
+
 fi
 
 

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 find_package(Threads REQUIRED)
 
-add_library(cudf_datagen STATIC common/generate_input.cpp)
+add_library(cudf_datagen STATIC common/generate_input.cpp common/generate_nullmask.cu)
 target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17)
 
 target_compile_options(
@@ -276,7 +276,7 @@ ConfigureBench(
 
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
-ConfigureBench(JSON_BENCH string/json.cpp)
+ConfigureBench(JSON_BENCH string/json.cu)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------

diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,72 +14,42 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column_factories.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
-
-#include <benchmark/benchmark.h>
-#include <fixture/benchmark_fixture.hpp>
-#include <fixture/templated_benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
 
 #include <algorithm>
 #include <list>
-#include <numeric>
-#include <random>
+#include <memory>
 #include <vector>
 
 enum class TreeType {
   IMBALANCED_LEFT  // All operator expressions have a left child operator expression and a right
                    // child column reference
 };
 
+template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
 class AST : public cudf::benchmark {
 };
 
 template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
 static void BM_ast_transform(benchmark::State& state)
 {
-  const cudf::size_type table_size{(cudf::size_type)state.range(0)};
-  const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
+  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
+  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
 
   // Create table data
-  auto n_cols          = reuse_columns ? 1 : tree_levels + 1;
-  auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>(n_cols);
-  auto columns         = std::vector<cudf::column_view>(n_cols);
-
-  auto data_iterator = thrust::make_counting_iterator(0);
-
-  if constexpr (Nullable) {
-    auto validities = std::vector<bool>(table_size);
-    std::random_device rd;
-    std::mt19937 gen(rd());
-
-    std::generate(
-      validities.begin(), validities.end(), [&]() { return gen() > (0.5 * gen.max()); });
-    std::generate_n(column_wrappers.begin(), n_cols, [=]() {
-      return cudf::test::fixed_width_column_wrapper<key_type>(
-        data_iterator, data_iterator + table_size, validities.begin());
-    });
-  } else {
-    std::generate_n(column_wrappers.begin(), n_cols, [=]() {
-      return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
-                                                              data_iterator + table_size);
-    });
-  }
-  std::transform(
-    column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
-      return static_cast<cudf::column_view>(col);
-    });
-
-  cudf::table_view table{columns};
+  auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
+  auto const source_table =
+    create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
+                          row_count{table_size},
+                          Nullable ? 0.5 : -1.0);
+  auto table = source_table->view();
 
   // Create column references
   auto column_refs = std::vector<cudf::ast::column_reference>();
@@ -138,10 +108,15 @@ static void CustomRanges(benchmark::internal::Benchmark* b)
   }
 }
 
-#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable)   \
-  TEMPLATED_BENCHMARK_F(AST, BM_ast_transform, key_type, tree_type, reuse_columns, nullable) \
-    ->Apply(CustomRanges)                                                                    \
-    ->Unit(benchmark::kMillisecond)                                                          \
+#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
+  BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable)     \
+  (::benchmark::State & st)                                                                \
+  {                                                                                        \
+    BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st);                    \
+  }                                                                                        \
+  BENCHMARK_REGISTER_F(AST, name)                                                          \
+    ->Apply(CustomRanges)                                                                  \
+    ->Unit(benchmark::kMillisecond)                                                        \
     ->UseManualTime();
 
 AST_TRANSFORM_BENCHMARK_DEFINE(

diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,23 +14,15 @@
  * limitations under the License.
  */
 
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf/binaryop.hpp>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
-
-#include <benchmark/benchmark.h>
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
-
-#include <thrust/iterator/counting_iterator.h>
 
 #include <algorithm>
-#include <numeric>
 #include <vector>
 
 // This set of benchmarks is designed to be a comparison for the AST benchmarks
@@ -47,40 +39,29 @@ class BINARYOP : public cudf::benchmark {
 template <typename key_type, TreeType tree_type, bool reuse_columns>
 static void BM_binaryop_transform(benchmark::State& state)
 {
-  const cudf::size_type table_size{(cudf::size_type)state.range(0)};
-  const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
+  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
+  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
 
   // Create table data
-  auto n_cols          = reuse_columns ? 1 : tree_levels + 1;
-  auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>();
-  auto columns         = std::vector<cudf::column_view>(n_cols);
-
-  auto data_iterator = thrust::make_counting_iterator(0);
-  std::generate_n(std::back_inserter(column_wrappers), n_cols, [=]() {
-    return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
-                                                            data_iterator + table_size);
-  });
-  std::transform(
-    column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
-      return static_cast<cudf::column_view>(col);
-    });
-
-  cudf::table_view table{columns};
+  auto const n_cols       = reuse_columns ? 1 : tree_levels + 1;
+  auto const source_table = create_sequence_table(
+    cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
+  cudf::table_view table{*source_table};
 
   // Execute benchmark
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     // Execute tree that chains additions like (((a + b) + c) + d)
-    auto const op         = cudf::binary_operator::ADD;
-    auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
+    auto const op               = cudf::binary_operator::ADD;
+    auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
     if (reuse_columns) {
-      auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
+      auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type);
       for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
-        result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
+        result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type);
       }
     } else {
-      auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
-      std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
+      auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type);
+      std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) {
         result = cudf::binary_operation(result->view(), col, op, result_data_type);
       });
     }

diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
@@ -14,30 +14,25 @@
  * limitations under the License.
  */
 
-#include <fixture/benchmark_fixture.hpp>
-#include <fixture/templated_benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/binaryop.hpp>
 
-#include <thrust/iterator/counting_iterator.h>
-
 class COMPILED_BINARYOP : public cudf::benchmark {
 };
 
 template <typename TypeLhs, typename TypeRhs, typename TypeOut>
 void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
 {
-  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+  auto const column_size{static_cast<cudf::size_type>(state.range(0))};
 
-  auto data_it = thrust::make_counting_iterator(0);
-  cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
-  cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
+  auto const source_table = create_sequence_table(
+    {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});
 
-  auto lhs          = cudf::column_view(input1);
-  auto rhs          = cudf::column_view(input2);
+  auto lhs          = cudf::column_view(source_table->get_column(0));
+  auto rhs          = cudf::column_view(source_table->get_column(1));
   auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());
 
   // Call once for hot cache.
-Original file line number
+Diff line change
@@ Expand Up / @@ -168,6 +168,10 @@ if hasArg clean; then @@
             rmdir ${bd} || true
         fi
         done
+        # Cleaning up python artifacts
+        find ${REPODIR}/python/ | grep -E "(__pycache__|\.pyc|\.pyo|\.so$)"  | xargs rm -rf
     fi
@@ Expand Down @@