Merge remote-tracking branch 'upstream/branch-24.06' into ref/rangein…

…dex/attrs
rapidsai · Apr 23, 2024 · 6a6d30e · 6a6d30e
2 parents 0458b1c + 8db1851
commit 6a6d30e
Show file tree

Hide file tree

Showing 23 changed files with 1,739 additions and 228 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -359,6 +359,7 @@ add_library(
   src/interop/from_arrow.cu
   src/interop/to_arrow.cu
   src/interop/to_arrow_device.cu
+  src/interop/from_arrow_device.cu
   src/interop/to_arrow_schema.cpp
   src/interop/to_arrow_utilities.cpp
   src/interop/detail/arrow_allocator.cpp

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -45,6 +45,8 @@ static std::string cuio_host_mem_param{
  * Initializes the default memory resource to use the RMM pool device resource.
  */
 struct nvbench_base_fixture {
+  using host_pooled_mr_t = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
+
   inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
   inline auto make_pool()
@@ -90,12 +92,14 @@ struct nvbench_base_fixture {
 
   inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
   {
-    using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-    static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
-      std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
-      size_t{1} * 1024 * 1024 * 1024);
+    if (!this->host_pooled_mr) {
+      // Don't store in static, as the CUDA context may be destroyed before static destruction
+      this->host_pooled_mr = std::make_shared<host_pooled_mr_t>(
+        std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
+        size_t{1} * 1024 * 1024 * 1024);
+    }
 
-    return *mr;
+    return *this->host_pooled_mr;
   }
 
   inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
@@ -126,9 +130,16 @@ struct nvbench_base_fixture {
     std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
+  ~nvbench_base_fixture()
+  {
+    // Ensure the the pool is freed before the CUDA context is destroyed:
+    cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
+  }
+
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
   std::string rmm_mode{"pool"};
 
+  std::shared_ptr<host_pooled_mr_t> host_pooled_mr;
   std::string cuio_host_mode{"pinned"};
 };
 

diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp
@@ -15,29 +15,44 @@
  */
 
 #include <benchmarks/fixture/nvbench_fixture.hpp>
-#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture
 
 #include <nvbench/main.cuh>
 
+#include <string>
 #include <vector>
 
+namespace cudf {
+
 // strip off the rmm_mode and cuio_host_mem parameters before passing the
 // remaining arguments to nvbench::option_parser
-#undef NVBENCH_MAIN_PARSE
-#define NVBENCH_MAIN_PARSE(argc, argv)                     \
-  nvbench::option_parser parser;                           \
-  std::vector<std::string> m_args;                         \
-  for (int i = 0; i < argc; ++i) {                         \
-    std::string arg = argv[i];                             \
-    if (arg == cudf::detail::rmm_mode_param) {             \
-      i += 2;                                              \
-    } else if (arg == cudf::detail::cuio_host_mem_param) { \
-      i += 2;                                              \
-    } else {                                               \
-      m_args.push_back(arg);                               \
-    }                                                      \
-  }                                                        \
-  parser.parse(m_args)
+void benchmark_arg_handler(std::vector<std::string>& args)
+{
+  std::vector<std::string> _cudf_tmp_args;
+
+  for (std::size_t i = 0; i < args.size(); ++i) {
+    std::string arg = args[i];
+    if (arg == cudf::detail::rmm_mode_param) {
+      i++;  // skip the next argument
+    } else if (arg == cudf::detail::cuio_host_mem_param) {
+      i++;  // skip the next argument
+    } else {
+      _cudf_tmp_args.push_back(arg);
+    }
+  }
+
+  args = _cudf_tmp_args;
+}
+
+}  // namespace cudf
+
+// Install arg handler
+#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER
+#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) cudf::benchmark_arg_handler(args)
+
+// Global fixture setup:
+#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST
+#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \
+  [[maybe_unused]] auto env_state = cudf::nvbench_base_fixture(argc, argv);
 
 // this declares/defines the main() function using the definitions above
 NVBENCH_MAIN
diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json
@@ -2,13 +2,8 @@
 {
   "packages" : {
     "nvbench" : {
-      "patches" : [
-        {
-          "file" : "${current_json_dir}/nvbench_global_setup.diff",
-          "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
-          "fixed_in" : ""
-        }
-      ]
+      "git_url": "https://github.com/NVIDIA/nvbench.git",
+      "git_tag": "555d628e9b250868c9da003e4407087ff1982e8e"
     }
   }
 }
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
@@ -348,5 +348,129 @@ std::unique_ptr<cudf::scalar> from_arrow(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
+ *
+ */
+using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;
+
+/**
+ * @brief functor for a custom deleter to a unique_ptr of table_view
+ *
+ * When converting from an ArrowDeviceArray, there are cases where data can't
+ * be zero-copy (i.e. bools or non-UINT32 dictionary indices). This custom deleter
+ * is used to maintain ownership over the data allocated since a `cudf::table_view`
+ * doesn't hold ownership.
+ */
+template <typename ViewType>
+struct custom_view_deleter {
+  /**
+   * @brief Construct a new custom view deleter object
+   *
+   * @param owned Vector of owning columns
+   */
+  explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}
+
+  /**
+   * @brief operator to delete the unique_ptr
+   *
+   * @param ptr Pointer to the object to be deleted
+   */
+  void operator()(ViewType* ptr) const { delete ptr; }
+
+  owned_columns_t owned_mem_;  ///< Owned columns that must be deleted.
+};
+
+/**
+ * @brief typedef for a unique_ptr to a `cudf::table_view` with custom deleter
+ *
+ */
+using unique_table_view_t =
+  std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;
+
+/**
+ * @brief Create `cudf::table_view` from given `ArrowDeviceArray` and `ArrowSchema`
+ *
+ * Constructs a non-owning `cudf::table_view` using `ArrowDeviceArray` and `ArrowSchema`,
+ * data must be accessible to the CUDA device. Because the resulting `cudf::table_view` will
+ * not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result.
+ * It is the responsibility of callers to ensure they call the release callback on the
+ * `ArrowDeviceArray` after it is no longer needed, and that the `cudf::table_view` is not
+ * accessed after this happens.
+ *
+ * @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST`
+ * or `ARROW_DEVICE_CUDA_MANAGED`
+ *
+ * @throws cudf::data_type_error if the input array is not a struct array, non-struct
+ * arrays should be passed to `from_arrow_device_column` instead.
+ *
+ * @throws cudf::data_type_error if the input arrow data type is not supported.
+ *
+ * Each child of the input struct will be the columns of the resulting table_view.
+ *
+ * @note The custom deleter used for the unique_ptr to the table_view maintains ownership
+ * over any memory which is allocated, such as converting boolean columns from the bitmap
+ * used by Arrow to the 1-byte per value for cudf.
+ *
+ * @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed
+ * to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called
+ * on it with the event. This function, however, will not explicitly synchronize on the
+ * stream.
+ *
+ * @param schema `ArrowSchema` pointer to object describing the type of the device array
+ * @param input `ArrowDeviceArray` pointer to object owning the Arrow data
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to perform any allocations
+ * @return `cudf::table_view` generated from given Arrow data
+ */
+unique_table_view_t from_arrow_device(
+  ArrowSchema const* schema,
+  ArrowDeviceArray const* input,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter
+ *
+ */
+using unique_column_view_t =
+  std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;
+
+/**
+ * @brief Create `cudf::column_view` from given `ArrowDeviceArray` and `ArrowSchema`
+ *
+ * Constructs a non-owning `cudf::column_view` using `ArrowDeviceArray` and `ArrowSchema`,
+ * data must be accessible to the CUDA device. Because the resulting `cudf::column_view` will
+ * not own the data, the `ArrowDeviceArray` must be kept alive for the lifetime of the result.
+ * It is the responsibility of callers to ensure they call the release callback on the
+ * `ArrowDeviceArray` after it is no longer needed, and that the `cudf::column_view` is not
+ * accessed after this happens.
+ *
+ * @throws cudf::logic_error if device_type is not `ARROW_DEVICE_CUDA`, `ARROW_DEVICE_CUDA_HOST`
+ * or `ARROW_DEVICE_CUDA_MANAGED`
+ *
+ * @throws cudf::data_type_error input arrow data type is not supported.
+ *
+ * @note The custom deleter used for the unique_ptr to the table_view maintains ownership
+ * over any memory which is allocated, such as converting boolean columns from the bitmap
+ * used by Arrow to the 1-byte per value for cudf.
+ *
+ * @note If the input `ArrowDeviceArray` contained a non-null sync_event it is assumed
+ * to be a `cudaEvent_t*` and the passed in stream will have `cudaStreamWaitEvent` called
+ * on it with the event. This function, however, will not explicitly synchronize on the
+ * stream.
+ *
+ * @param schema `ArrowSchema` pointer to object describing the type of the device array
+ * @param input `ArrowDeviceArray` pointer to object owning the Arrow data
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to perform any allocations
+ * @return `cudf::column_view` generated from given Arrow data
+ */
+unique_column_view_t from_arrow_device_column(
+  ArrowSchema const* schema,
+  ArrowDeviceArray const* input,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 }  // namespace cudf