Merge remote-tracking branch 'origin/branch-22.06' into devtools-buil…

…d-in-docker-for-native
rapidsai · Apr 7, 2022 · acc42a8 · acc42a8
2 parents 3ad98bf + fb03c8b
commit acc42a8
Show file tree

Hide file tree

Showing 152 changed files with 1,870 additions and 2,864 deletions.
diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 # Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 set -e
 

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 ########################
 # cuDF Version Updater #
 ########################

diff --git a/ci/utils/nbtestlog2junitxml.py b/ci/utils/nbtestlog2junitxml.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 # Generate a junit-xml file from parsing a nbtest log
 
 import re

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -156,6 +156,21 @@ include(cmake/Modules/JitifyPreprocessKernels.cmake)
 # find cuFile
 include(cmake/Modules/FindcuFile.cmake)
 
+# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved
+if(NOT BUILD_SHARED_LIBS)
+  include("${rapids-cmake-dir}/export/find_package_file.cmake")
+  list(APPEND METADATA_KINDS BUILD INSTALL)
+  foreach(METADATA_KIND IN LISTS METADATA_KINDS)
+    rapids_export_find_package_file(
+      ${METADATA_KIND} "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports
+    )
+    rapids_export_package(${METADATA_KIND} cuco cudf-exports)
+    rapids_export_package(${METADATA_KIND} ZLIB cudf-exports)
+    rapids_export_package(${METADATA_KIND} cuFile cudf-exports)
+    rapids_export_package(${METADATA_KIND} nvcomp cudf-exports)
+  endforeach()
+endif()
+
 # ##################################################################################################
 # * library targets -------------------------------------------------------------------------------
 

diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp
@@ -45,7 +45,7 @@ static void BM_concatenate(benchmark::State& state)
   auto input_columns = input->view();
   std::vector<cudf::column_view> column_views(input_columns.begin(), input_columns.end());
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, rmm::cuda_stream_default);
@@ -87,7 +87,7 @@ static void BM_concatenate_tables(benchmark::State& state)
     return table->view();
   });
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, rmm::cuda_stream_default);
@@ -146,7 +146,7 @@ static void BM_concatenate_strings(benchmark::State& state)
       return static_cast<cudf::column_view>(col);
     });
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, rmm::cuda_stream_default);

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
@@ -87,6 +87,15 @@ T get_distribution_mean(distribution_params<T> const& dist)
   }
 }
 
+/**
+ * @brief Computes the average element size in a column, given the data profile.
+ *
+ * Random distribution parameters like average string length and maximum list nesting level affect
+ * the element size of non-fixed-width columns. For lists and structs, `avg_element_size` is called
+ * recursively to determine the size of nested columns.
+ */
+size_t avg_element_size(data_profile const& profile, cudf::data_type dtype);
+
 // Utilities to determine the mean size of an element, given the data profile
 template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_width<T>())>
 size_t non_fixed_width_size(data_profile const& profile)
@@ -112,10 +121,22 @@ size_t non_fixed_width_size<cudf::list_view>(data_profile const& profile)
 {
   auto const dist_params       = profile.get_distribution_params<cudf::list_view>();
   auto const single_level_mean = get_distribution_mean(dist_params.length_params);
-  auto const element_size      = cudf::size_of(cudf::data_type{dist_params.element_type});
+  auto const element_size = avg_element_size(profile, cudf::data_type{dist_params.element_type});
   return element_size * pow(single_level_mean, dist_params.max_depth);
 }
 
+template <>
+size_t non_fixed_width_size<cudf::struct_view>(data_profile const& profile)
+{
+  auto const dist_params = profile.get_distribution_params<cudf::struct_view>();
+  return std::accumulate(dist_params.leaf_types.cbegin(),
+                         dist_params.leaf_types.cend(),
+                         0ul,
+                         [&](auto& sum, auto type_id) {
+                           return sum + avg_element_size(profile, cudf::data_type{type_id});
+                         });
+}
+
 struct non_fixed_width_size_fn {
   template <typename T>
   size_t operator()(data_profile const& profile)
@@ -527,14 +548,6 @@ std::unique_ptr<cudf::column> create_random_column<cudf::dictionary32>(data_prof
   CUDF_FAIL("not implemented yet");
 }
 
-template <>
-std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profile const& profile,
-                                                                      thrust::minstd_rand& engine,
-                                                                      cudf::size_type num_rows)
-{
-  CUDF_FAIL("not implemented yet");
-}
-
 /**
  * @brief Functor to dispatch create_random_column calls.
  */
@@ -549,6 +562,93 @@ struct create_rand_col_fn {
   }
 };
 
+/**
+ * @brief Calculates the number of direct parents needed to generate a struct column hierarchy with
+ * lowest maximum number of children in any nested column.
+ *
+ * Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf
+ * columns and nesting levels. The column tree is considered evenly distributed if all columns have
+ * nearly the same number of child columns (difference not larger than one).
+ */
+int num_direct_parents(int num_lvls, int num_leaf_columns)
+{
+  // Estimated average number of children in the hierarchy;
+  auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls);
+  // Minimum number of children columns for any column in the hierarchy
+  int const num_children_min = std::floor(num_children_avg);
+  // Maximum number of children columns for any column in the hierarchy
+  int const num_children_max = num_children_min + 1;
+
+  // Minimum number of columns needed so that their number of children does not exceed the maximum
+  int const min_for_current_nesting = std::ceil((double)num_leaf_columns / num_children_max);
+  // Minimum number of columns needed so that columns at the higher levels have at least the minimum
+  // number of children
+  int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1);
+  // Both conditions need to be satisfied
+  return std::max(min_for_current_nesting, min_for_upper_nesting);
+}
+
+template <>
+std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profile const& profile,
+                                                                      thrust::minstd_rand& engine,
+                                                                      cudf::size_type num_rows)
+{
+  auto const dist_params = profile.get_distribution_params<cudf::struct_view>();
+
+  // Generate leaf columns
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.reserve(dist_params.leaf_types.size());
+  std::transform(dist_params.leaf_types.cbegin(),
+                 dist_params.leaf_types.cend(),
+                 std::back_inserter(children),
+                 [&](auto& type_id) {
+                   return cudf::type_dispatcher(
+                     cudf::data_type(type_id), create_rand_col_fn{}, profile, engine, num_rows);
+                 });
+
+  auto valid_dist =
+    random_value_fn<bool>(distribution_params<bool>{1. - profile.get_null_frequency().value_or(0)});
+
+  // Generate the column bottom-up
+  for (int lvl = dist_params.max_depth; lvl > 0; --lvl) {
+    // Generating the next level
+    std::vector<std::unique_ptr<cudf::column>> parents;
+    parents.resize(num_direct_parents(lvl, children.size()));
+
+    auto current_child = children.begin();
+    for (auto current_parent = parents.begin(); current_parent != parents.end(); ++current_parent) {
+      auto [null_mask, null_count] = [&]() {
+        if (profile.get_null_frequency().has_value()) {
+          auto valids = valid_dist(engine, num_rows);
+          return cudf::detail::valid_if(valids.begin(), valids.end(), thrust::identity<bool>{});
+        }
+        return std::pair<rmm::device_buffer, cudf::size_type>{};
+      }();
+
+      // Adopt remaining children as evenly as possible
+      auto const num_to_adopt = cudf::util::div_rounding_up_unsafe(
+        std::distance(current_child, children.end()), std::distance(current_parent, parents.end()));
+      CUDF_EXPECTS(num_to_adopt > 0, "No children columns left to adopt");
+
+      std::vector<std::unique_ptr<cudf::column>> children_to_adopt;
+      children_to_adopt.insert(children_to_adopt.end(),
+                               std::make_move_iterator(current_child),
+                               std::make_move_iterator(current_child + num_to_adopt));
+      current_child += children_to_adopt.size();
+
+      *current_parent = cudf::make_structs_column(
+        num_rows, std::move(children_to_adopt), null_count, std::move(null_mask));
+    }
+
+    if (lvl == 1) {
+      CUDF_EXPECTS(parents.size() == 1, "There should be one top-level column");
+      return std::move(parents.front());
+    }
+    children = std::move(parents);
+  }
+  CUDF_FAIL("Reached unreachable code in struct column creation");
+}
+
 template <typename T>
 struct clamp_down : public thrust::unary_function<T, T> {
   T max;

diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
@@ -171,6 +171,15 @@ struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::list_view
   cudf::size_type max_depth;
 };
 
+/**
+ * @brief Structs are parameterized by the maximal nesting level, and the leaf column types.
+ */
+template <typename T>
+struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>> {
+  std::vector<cudf::type_id> leaf_types;
+  cudf::size_type max_depth;
+};
+
 // Present for compilation only. To be implemented once reader/writers support the fixed width type.
 template <typename T>
 struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
@@ -214,6 +223,8 @@ class data_profile {
   distribution_params<cudf::string_view> string_dist_desc{{distribution_id::NORMAL, 0, 32}};
   distribution_params<cudf::list_view> list_dist_desc{
     cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 100}, 2};
+  distribution_params<cudf::struct_view> struct_dist_desc{
+    {cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2};
   std::map<cudf::type_id, distribution_params<__uint128_t>> decimal_params;
 
   double bool_probability              = 0.5;
@@ -281,6 +292,12 @@ class data_profile {
     return list_dist_desc;
   }
 
+  template <typename T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>* = nullptr>
+  distribution_params<T> get_distribution_params() const
+  {
+    return struct_dist_desc;
+  }
+
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   distribution_params<typename T::rep> get_distribution_params() const
   {
@@ -357,8 +374,28 @@ class data_profile {
   void set_cardinality(cudf::size_type c) { cardinality = c; }
   void set_avg_run_length(cudf::size_type avg_rl) { avg_run_length = avg_rl; }
 
-  void set_list_depth(cudf::size_type max_depth) { list_dist_desc.max_depth = max_depth; }
+  void set_list_depth(cudf::size_type max_depth)
+  {
+    CUDF_EXPECTS(max_depth > 0, "List depth must be positive");
+    list_dist_desc.max_depth = max_depth;
+  }
+
   void set_list_type(cudf::type_id type) { list_dist_desc.element_type = type; }
+
+  void set_struct_depth(cudf::size_type max_depth)
+  {
+    CUDF_EXPECTS(max_depth > 0, "Struct depth must be positive");
+    struct_dist_desc.max_depth = max_depth;
+  }
+
+  void set_struct_types(std::vector<cudf::type_id> const& types)
+  {
+    CUDF_EXPECTS(
+      std::none_of(
+        types.cbegin(), types.cend(), [](auto& type) { return type == cudf::type_id::STRUCT; }),
+      "Cannot include STRUCT as its own subtype");
+    struct_dist_desc.leaf_types = types;
+  }
 };
 
 /**

diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, decimal, type_group_id:
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
+RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT);
 
 BENCHMARK_DEFINE_F(OrcRead, column_selection)
 (::benchmark::State& state) { BM_orc_read_varying_options(state); }

diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -116,6 +116,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, decimal, type_group_id::F
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
+WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT);
 
 BENCHMARK_DEFINE_F(OrcWrite, writer_options)
 (::benchmark::State& state) { BM_orc_write_varying_options(state); }

diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, decimal, type_group_id
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
+RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT);
 
 BENCHMARK_DEFINE_F(ParquetRead, column_selection)
 (::benchmark::State& state) { BM_parq_read_varying_options(state); }

diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -111,6 +111,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, decimal, type_group_id::
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
+WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT);
 
 BENCHMARK_DEFINE_F(ParquetWrite, writer_options)
 (::benchmark::State& state) { BM_parq_write_varying_options(state); }

diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -139,31 +139,31 @@ void generate_input_tables(key_type* const build_tbl,
 
   // Maximize exposed parallelism while minimizing storage for curand state
   int num_blocks_init_build_tbl{-1};
-  CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &num_blocks_init_build_tbl, init_build_tbl<key_type, size_type>, block_size, 0));
 
   int num_blocks_init_probe_tbl{-1};
-  CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &num_blocks_init_probe_tbl, init_probe_tbl<key_type, size_type>, block_size, 0));
 
   int dev_id{-1};
-  CUDA_TRY(cudaGetDevice(&dev_id));
+  CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
 
   int num_sms{-1};
-  CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
+  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
 
   const int num_states =
     num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
   rmm::device_uvector<curandState> devStates(num_states, rmm::cuda_stream_default);
 
   init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states);
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 
   init_build_tbl<key_type, size_type><<<num_sms * num_blocks_init_build_tbl, block_size>>>(
     build_tbl, build_tbl_size, multiplicity, devStates.data(), num_states);
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 
   auto const rand_max = std::numeric_limits<key_type>::max();
 
@@ -177,5 +177,5 @@ void generate_input_tables(key_type* const build_tbl,
                                                           devStates.data(),
                                                           num_states);
 
-  CHECK_CUDA(0);
+  CUDF_CHECK_CUDA(0);
 }