Fix merge conflicts

rapidsai · Jul 24, 2024 · a9c20bc · a9c20bc
2 parents 70b6b2f + 8bba6df
commit a9c20bc
Show file tree

Hide file tree

Showing 95 changed files with 3,755 additions and 763 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -367,6 +367,7 @@ add_library(
   src/interop/arrow_utilities.cpp
   src/interop/to_arrow.cu
   src/interop/to_arrow_device.cu
+  src/interop/to_arrow_host.cu
   src/interop/from_arrow_device.cu
   src/interop/from_arrow_host.cu
   src/interop/from_arrow_stream.cu
@@ -671,9 +672,9 @@ add_library(
   src/unary/null_ops.cu
   src/utilities/cuda_memcpy.cu
   src/utilities/default_stream.cpp
+  src/utilities/host_memory.cpp
   src/utilities/linked_column.cpp
   src/utilities/logger.cpp
-  src/utilities/pinned_memory.cpp
   src/utilities/prefetch.cpp
   src/utilities/stacktrace.cpp
   src/utilities/stream_pool.cpp
@@ -807,7 +808,7 @@ add_dependencies(cudf jitify_preprocess_run)
 # Specify the target module library dependencies
 target_link_libraries(
   cudf
-  PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS_thread_pool>
+  PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
   PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
           kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
 )

diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
@@ -95,7 +95,9 @@ void BM_FST_JSON(nvbench::state& state)
   auto parser = cudf::io::fst::detail::make_fst(
     cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
     cudf::io::fst::detail::make_transition_table(pda_state_tt),
-    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size,
+                                                  min_translated_out,
+                                                  max_translated_out>(pda_out_tt),
     stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
@@ -134,7 +136,9 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
   auto parser = cudf::io::fst::detail::make_fst(
     cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
     cudf::io::fst::detail::make_transition_table(pda_state_tt),
-    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size,
+                                                  min_translated_out,
+                                                  max_translated_out>(pda_out_tt),
     stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
@@ -171,7 +175,9 @@ void BM_FST_JSON_no_out(nvbench::state& state)
   auto parser = cudf::io::fst::detail::make_fst(
     cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
     cudf::io::fst::detail::make_transition_table(pda_state_tt),
-    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size,
+                                                  min_translated_out,
+                                                  max_translated_out>(pda_out_tt),
     stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
@@ -209,7 +215,9 @@ void BM_FST_JSON_no_str(nvbench::state& state)
   auto parser = cudf::io::fst::detail::make_fst(
     cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
     cudf::io::fst::detail::make_transition_table(pda_state_tt),
-    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size,
+                                                  min_translated_out,
+                                                  max_translated_out>(pda_out_tt),
     stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -17,11 +17,11 @@ function(find_and_configure_nanoarrow)
   # Currently we need to always build nanoarrow so we don't pickup a previous installed version
   set(CPM_DOWNLOAD_nanoarrow ON)
   rapids_cpm_find(
-    nanoarrow 0.5.0
+    nanoarrow 0.6.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
     GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
-    GIT_TAG 11e73a8c85b45e3d49c8c541b4e1497a649fe03c
+    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
     GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
   )

diff --git a/cpp/cmake/thirdparty/get_thread_pool.cmake b/cpp/cmake/thirdparty/get_thread_pool.cmake
@@ -12,20 +12,14 @@
 # the License.
 # =============================================================================
 
-# This function finds rmm and sets any additional necessary environment variables.
+# Need to call rapids_cpm_bs_thread_pool to get support for an installed version of thread-pool and
+# to support installing it ourselves
 function(find_and_configure_thread_pool)
-  rapids_cpm_find(
-    BS_thread_pool 4.1.0
-    CPM_ARGS
-    GIT_REPOSITORY https://github.com/bshoshany/thread-pool.git
-    GIT_TAG 097aa718f25d44315cadb80b407144ad455ee4f9
-    GIT_SHALLOW TRUE
-  )
-  if(NOT TARGET BS_thread_pool)
-    add_library(BS_thread_pool INTERFACE)
-    target_include_directories(BS_thread_pool INTERFACE ${BS_thread_pool_SOURCE_DIR}/include)
-    target_compile_definitions(BS_thread_pool INTERFACE "BS_THREAD_POOL_ENABLE_PAUSE=1")
-  endif()
+  include(${rapids-cmake-dir}/cpm/bs_thread_pool.cmake)
+
+  # Find or install thread-pool
+  rapids_cpm_bs_thread_pool(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
+
 endfunction()
 
 find_and_configure_thread_pool()
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
@@ -577,7 +577,7 @@ void gather_bitmask(table_view const& source,
   }
 
   // Make device array of target bitmask pointers
-  std::vector<bitmask_type*> target_masks(target.size());
+  auto target_masks = make_host_vector<bitmask_type*>(target.size(), stream);
   std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) {
     return col->mutable_view().null_mask();
   });

diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
@@ -430,7 +430,9 @@ std::vector<size_type> segmented_count_bits(bitmask_type const* bitmask,
   if (num_segments == 0) { return std::vector<size_type>{}; }
 
   // Construct a contiguous host buffer of indices and copy to device.
-  auto const h_indices = std::vector<size_type>(indices_begin, indices_end);
+  auto h_indices = make_empty_host_vector<typename std::iterator_traits<IndexIterator>::value_type>(
+    std::distance(indices_begin, indices_end), stream);
+  std::copy(indices_begin, indices_end, std::back_inserter(h_indices));
   auto const d_indices =
     make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource());
 

diff --git a/cpp/include/cudf/detail/utilities/host_memory.hpp b/cpp/include/cudf/detail/utilities/host_memory.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/detail/utilities/host_vector.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+
+#include <rmm/resource_ref.hpp>
+
+#include <cstddef>
+
+namespace cudf::detail {
+/**
+ * @brief Get the memory resource to be used for pageable memory allocations.
+ *
+ * @return Reference to the pageable memory resource
+ */
+CUDF_EXPORT rmm::host_async_resource_ref get_pageable_memory_resource();
+
+/**
+ * @brief Get the allocator to be used for the host memory allocation.
+ *
+ * @param size The number of elements of type T to allocate
+ * @param stream The stream to use for the allocation
+ * @return The allocator to be used for the host memory allocation
+ */
+template <typename T>
+rmm_host_allocator<T> get_host_allocator(std::size_t size, rmm::cuda_stream_view stream)
+{
+  if (size * sizeof(T) <= get_allocate_host_as_pinned_threshold()) {
+    return {get_pinned_memory_resource(), stream};
+  }
+  return {get_pageable_memory_resource(), stream};
+}
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -61,6 +61,10 @@ class rmm_host_allocator<void> {
   };
 };
 
+template <class DesiredProperty, class... Properties>
+inline constexpr bool contains_property =
+  (cuda::std::is_same_v<DesiredProperty, Properties> || ... || false);
+
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c `rmm::host_async_resource_ref` for allocation.
  *
@@ -100,8 +104,12 @@ class rmm_host_allocator {
   /**
    * @brief Construct from a `cudf::host_async_resource_ref`
    */
-  rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream)
-    : mr(_mr), stream(_stream)
+  template <class... Properties>
+  rmm_host_allocator(cuda::mr::async_resource_ref<cuda::mr::host_accessible, Properties...> _mr,
+                     rmm::cuda_stream_view _stream)
+    : mr(_mr),
+      stream(_stream),
+      _is_device_accessible{contains_property<cuda::mr::device_accessible, Properties...>}
   {
   }
 
@@ -173,15 +181,25 @@ class rmm_host_allocator {
    */
   inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }
 
+  bool is_device_accessible() const { return _is_device_accessible; }
+
  private:
   rmm::host_async_resource_ref mr;
   rmm::cuda_stream_view stream;
+  bool _is_device_accessible;
 };
 
 /**
  * @brief A vector class with rmm host memory allocator
  */
 template <typename T>
-using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
+class host_vector : public thrust::host_vector<T, rmm_host_allocator<T>> {
+ public:
+  using base = thrust::host_vector<T, rmm_host_allocator<T>>;
+
+  host_vector(rmm_host_allocator<T> const& alloc) : base(alloc) {}
+
+  host_vector(size_t size, rmm_host_allocator<T> const& alloc) : base(size, alloc) {}
+};
 
 }  // namespace cudf::detail