Adding hostdevice_span that is a span createable from `hostdevice_v…

…ector` (#12981) I ran into a need for a span-like view into a `hostdevice_vector`. I was chopping it up into pieces to pass into a function to process portions at a time, but it still wanted to do things like host to device on the spans. This class is a result of that need. Authors: - Mike Wilson (https://github.com/hyperbolic2346) - Nghia Truong (https://github.com/ttnghia) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) URL: #12981
rapidsai · Apr 7, 2023 · e28c9c5 · e28c9c5
1 parent d82f97c
commit e28c9c5
Show file tree

Hide file tree

Showing 4 changed files with 340 additions and 9 deletions.
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
@@ -1015,7 +1015,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       const auto num_columns = columns_level.size();
       cudf::detail::hostdevice_2dvector<gpu::ColumnDesc> chunks(
         total_num_stripes, num_columns, stream);
-      memset(chunks.base_host_ptr(), 0, chunks.memory_size());
+      memset(chunks.base_host_ptr(), 0, chunks.size_bytes());
 
       const bool use_index =
         _use_index &&

diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+template <typename T>
+class hostdevice_span {
+ public:
+  using value_type = T;
+
+  hostdevice_span()                       = default;
+  ~hostdevice_span()                      = default;
+  hostdevice_span(hostdevice_span const&) = default;  ///< Copy constructor
+  hostdevice_span(hostdevice_span&&)      = default;  ///< Move constructor
+
+  hostdevice_span(T* cpu_data, T* gpu_data, size_t size)
+    : _size(size), _host_data(cpu_data), _device_data(gpu_data)
+  {
+  }
+
+  /**
+   * @brief Copy assignment operator.
+   *
+   * @return Reference to this hostdevice_span.
+   */
+  constexpr hostdevice_span& operator=(hostdevice_span const&) noexcept = default;
+
+  /**
+   * @brief Converts a hostdevice view into a device span.
+   *
+   * @tparam T The device span type.
+   * @return A typed device span of the hostdevice view's data.
+   */
+  [[nodiscard]] operator cudf::device_span<T>() const
+  {
+    return cudf::device_span(_device_data, size());
+  }
+
+  /**
+   * @brief Returns the underlying device data.
+   *
+   * @tparam T The type to cast to
+   * @return T const* Typed pointer to underlying data
+   */
+  [[nodiscard]] T* device_ptr(size_t offset = 0) const noexcept { return _device_data + offset; }
+
+  /**
+   * @brief Return first element in device data.
+   *
+   * @tparam T The desired type
+   * @return T const* Pointer to the first element
+   */
+  [[nodiscard]] T* device_begin() const noexcept { return device_ptr(); }
+
+  /**
+   * @brief Return one past the last element in device_data.
+   *
+   * @tparam T The desired type
+   * @return T const* Pointer to one past the last element
+   */
+  [[nodiscard]] T* device_end() const noexcept { return device_begin() + size(); }
+
+  /**
+   * @brief Converts a hostdevice_span into a host span.
+   *
+   * @tparam T The host span type.
+   * @return A typed host span of the hostdevice_span's data.
+   */
+  [[nodiscard]] operator cudf::host_span<T>() const noexcept
+  {
+    return cudf::host_span<T>(_host_data, size());
+  }
+
+  /**
+   * @brief Returns the underlying host data.
+   *
+   * @tparam T The type to cast to
+   * @return T* Typed pointer to underlying data
+   */
+  [[nodiscard]] T* host_ptr(size_t offset = 0) const noexcept { return _host_data + offset; }
+
+  /**
+   * @brief Return first element in host data.
+   *
+   * @tparam T The desired type
+   * @return T const* Pointer to the first element
+   */
+  [[nodiscard]] T* host_begin() const noexcept { return host_ptr(); }
+
+  /**
+   * @brief Return one past the last elementin host data.
+   *
+   * @tparam T The desired type
+   * @return T const* Pointer to one past the last element
+   */
+  [[nodiscard]] T* host_end() const noexcept { return host_begin() + size(); }
+
+  /**
+   * @brief Returns the number of elements in the view
+   *
+   * @return The number of elements in the view
+   */
+  [[nodiscard]] std::size_t size() const noexcept { return _size; }
+
+  /**
+   * @brief Returns true if `size()` returns zero, or false otherwise
+   *
+   * @return True if `size()` returns zero, or false otherwise
+   */
+  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
+
+  [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }
+
+  [[nodiscard]] T& operator[](size_t i) { return _host_data[i]; }
+  [[nodiscard]] T const& operator[](size_t i) const { return _host_data[i]; }
+
+  /**
+   * @brief Obtains a hostdevice_span that is a view over the `count` elements of this
+   * hostdevice_span starting at offset
+   *
+   * @param offset The offset of the first element in the subspan
+   * @param count The number of elements in the subspan
+   * @return A subspan of the sequence, of requested count and offset
+   */
+  constexpr hostdevice_span<T> subspan(size_t offset, size_t count) const noexcept
+  {
+    return hostdevice_span<T>(_host_data + offset, _device_data + offset, count);
+  }
+
+  void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
+  {
+    CUDF_CUDA_TRY(
+      cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
+    if (synchronize) { stream.synchronize(); }
+  }
+
+  void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false)
+  {
+    CUDF_CUDA_TRY(
+      cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
+    if (synchronize) { stream.synchronize(); }
+  }
+
+ private:
+  size_t _size{};     ///< Number of elements
+  T* _device_data{};  ///< Pointer to device memory containing elements
+  T* _host_data{};    ///< Pointer to host memory containing elements
+};
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "config_utils.hpp"
+#include "hostdevice_span.hpp"
 
 #include <cudf/detail/utilities/pinned_allocator.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -90,7 +91,7 @@ class hostdevice_vector {
 
   [[nodiscard]] size_t capacity() const noexcept { return d_data.size(); }
   [[nodiscard]] size_t size() const noexcept { return current_size; }
-  [[nodiscard]] size_t memory_size() const noexcept { return sizeof(T) * size(); }
+  [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }
 
   [[nodiscard]] T& operator[](size_t i) { return host_data[i]; }
   [[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; }
@@ -139,17 +140,39 @@ class hostdevice_vector {
   void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
   {
     CUDF_CUDA_TRY(
-      cudaMemcpyAsync(device_ptr(), host_ptr(), memory_size(), cudaMemcpyDefault, stream.value()));
+      cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
     if (synchronize) { stream.synchronize(); }
   }
 
   void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false)
   {
     CUDF_CUDA_TRY(
-      cudaMemcpyAsync(host_ptr(), device_ptr(), memory_size(), cudaMemcpyDefault, stream.value()));
+      cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
     if (synchronize) { stream.synchronize(); }
   }
 
+  /**
+   * @brief Converts a hostdevice_vector into a hostdevice_span.
+   *
+   * @return A typed hostdevice_span of the hostdevice_vector's data.
+   */
+  [[nodiscard]] operator hostdevice_span<T>()
+  {
+    return hostdevice_span<T>{host_data, d_data.data(), size()};
+  }
+
+  /**
+   * @brief Converts a part of a hostdevice_vector into a hostdevice_span.
+   *
+   * @return A typed hostdevice_span of the hostdevice_vector's data.
+   */
+  [[nodiscard]] hostdevice_span<T> subspan(size_t offset, size_t count)
+  {
+    CUDF_EXPECTS(count >= offset, "End index cannot be smaller than the starting index.");
+    CUDF_EXPECTS(count <= d_data.size(), "Slice range out of bounds.");
+    return hostdevice_span<T>{host_data + offset, d_data.data() + offset, count - offset};
+  }
+
  private:
   std::variant<thrust::host_vector<T>, thrust::host_vector<T, cudf::detail::pinned_allocator<T>>>
     h_data_owner;
@@ -207,7 +230,7 @@ class hostdevice_2dvector {
 
   T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); }
 
-  size_t memory_size() const noexcept { return _data.memory_size(); }
+  size_t size_bytes() const noexcept { return _data.size_bytes(); }
 
   void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
   {