Skip to content

Commit

Permalink
Adding hostdevice_span that is a span createable from `hostdevice_v…
Browse files Browse the repository at this point in the history
…ector` (#12981)

I ran into a need for a span-like view into a `hostdevice_vector`. I was chopping it up into pieces to pass into a function to process portions at a time, but it still wanted to do things like host to device on the spans. This class is a result of that need.

Authors:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #12981
  • Loading branch information
hyperbolic2346 authored Apr 7, 2023
1 parent d82f97c commit e28c9c5
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cpp/src/io/orc/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
const auto num_columns = columns_level.size();
cudf::detail::hostdevice_2dvector<gpu::ColumnDesc> chunks(
total_num_stripes, num_columns, stream);
memset(chunks.base_host_ptr(), 0, chunks.memory_size());
memset(chunks.base_host_ptr(), 0, chunks.size_bytes());

const bool use_index =
_use_index &&
Expand Down
165 changes: 165 additions & 0 deletions cpp/src/io/utilities/hostdevice_span.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

template <typename T>
class hostdevice_span {
public:
using value_type = T;

hostdevice_span() = default;
~hostdevice_span() = default;
hostdevice_span(hostdevice_span const&) = default; ///< Copy constructor
hostdevice_span(hostdevice_span&&) = default; ///< Move constructor

hostdevice_span(T* cpu_data, T* gpu_data, size_t size)
: _size(size), _host_data(cpu_data), _device_data(gpu_data)
{
}

/**
* @brief Copy assignment operator.
*
* @return Reference to this hostdevice_span.
*/
constexpr hostdevice_span& operator=(hostdevice_span const&) noexcept = default;

/**
* @brief Converts a hostdevice view into a device span.
*
* @tparam T The device span type.
* @return A typed device span of the hostdevice view's data.
*/
[[nodiscard]] operator cudf::device_span<T>() const
{
return cudf::device_span(_device_data, size());
}

/**
* @brief Returns the underlying device data.
*
* @tparam T The type to cast to
* @return T const* Typed pointer to underlying data
*/
[[nodiscard]] T* device_ptr(size_t offset = 0) const noexcept { return _device_data + offset; }

/**
* @brief Return first element in device data.
*
* @tparam T The desired type
* @return T const* Pointer to the first element
*/
[[nodiscard]] T* device_begin() const noexcept { return device_ptr(); }

/**
* @brief Return one past the last element in device_data.
*
* @tparam T The desired type
* @return T const* Pointer to one past the last element
*/
[[nodiscard]] T* device_end() const noexcept { return device_begin() + size(); }

/**
* @brief Converts a hostdevice_span into a host span.
*
* @tparam T The host span type.
* @return A typed host span of the hostdevice_span's data.
*/
[[nodiscard]] operator cudf::host_span<T>() const noexcept
{
return cudf::host_span<T>(_host_data, size());
}

/**
* @brief Returns the underlying host data.
*
* @tparam T The type to cast to
* @return T* Typed pointer to underlying data
*/
[[nodiscard]] T* host_ptr(size_t offset = 0) const noexcept { return _host_data + offset; }

/**
* @brief Return first element in host data.
*
* @tparam T The desired type
* @return T const* Pointer to the first element
*/
[[nodiscard]] T* host_begin() const noexcept { return host_ptr(); }

/**
* @brief Return one past the last elementin host data.
*
* @tparam T The desired type
* @return T const* Pointer to one past the last element
*/
[[nodiscard]] T* host_end() const noexcept { return host_begin() + size(); }

/**
* @brief Returns the number of elements in the view
*
* @return The number of elements in the view
*/
[[nodiscard]] std::size_t size() const noexcept { return _size; }

/**
* @brief Returns true if `size()` returns zero, or false otherwise
*
* @return True if `size()` returns zero, or false otherwise
*/
[[nodiscard]] bool is_empty() const noexcept { return size() == 0; }

[[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }

[[nodiscard]] T& operator[](size_t i) { return _host_data[i]; }
[[nodiscard]] T const& operator[](size_t i) const { return _host_data[i]; }

/**
* @brief Obtains a hostdevice_span that is a view over the `count` elements of this
* hostdevice_span starting at offset
*
* @param offset The offset of the first element in the subspan
* @param count The number of elements in the subspan
* @return A subspan of the sequence, of requested count and offset
*/
constexpr hostdevice_span<T> subspan(size_t offset, size_t count) const noexcept
{
return hostdevice_span<T>(_host_data + offset, _device_data + offset, count);
}

void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
if (synchronize) { stream.synchronize(); }
}

void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
if (synchronize) { stream.synchronize(); }
}

private:
size_t _size{}; ///< Number of elements
T* _device_data{}; ///< Pointer to device memory containing elements
T* _host_data{}; ///< Pointer to host memory containing elements
};
31 changes: 27 additions & 4 deletions cpp/src/io/utilities/hostdevice_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include "config_utils.hpp"
#include "hostdevice_span.hpp"

#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -90,7 +91,7 @@ class hostdevice_vector {

[[nodiscard]] size_t capacity() const noexcept { return d_data.size(); }
[[nodiscard]] size_t size() const noexcept { return current_size; }
[[nodiscard]] size_t memory_size() const noexcept { return sizeof(T) * size(); }
[[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }

[[nodiscard]] T& operator[](size_t i) { return host_data[i]; }
[[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; }
Expand Down Expand Up @@ -139,17 +140,39 @@ class hostdevice_vector {
void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(device_ptr(), host_ptr(), memory_size(), cudaMemcpyDefault, stream.value()));
cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
if (synchronize) { stream.synchronize(); }
}

void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false)
{
CUDF_CUDA_TRY(
cudaMemcpyAsync(host_ptr(), device_ptr(), memory_size(), cudaMemcpyDefault, stream.value()));
cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value()));
if (synchronize) { stream.synchronize(); }
}

/**
* @brief Converts a hostdevice_vector into a hostdevice_span.
*
* @return A typed hostdevice_span of the hostdevice_vector's data.
*/
[[nodiscard]] operator hostdevice_span<T>()
{
return hostdevice_span<T>{host_data, d_data.data(), size()};
}

/**
* @brief Converts a part of a hostdevice_vector into a hostdevice_span.
*
* @return A typed hostdevice_span of the hostdevice_vector's data.
*/
[[nodiscard]] hostdevice_span<T> subspan(size_t offset, size_t count)
{
CUDF_EXPECTS(count >= offset, "End index cannot be smaller than the starting index.");
CUDF_EXPECTS(count <= d_data.size(), "Slice range out of bounds.");
return hostdevice_span<T>{host_data + offset, d_data.data() + offset, count - offset};
}

private:
std::variant<thrust::host_vector<T>, thrust::host_vector<T, cudf::detail::pinned_allocator<T>>>
h_data_owner;
Expand Down Expand Up @@ -207,7 +230,7 @@ class hostdevice_2dvector {

T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); }

size_t memory_size() const noexcept { return _data.memory_size(); }
size_t size_bytes() const noexcept { return _data.size_bytes(); }

void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false)
{
Expand Down
Loading

0 comments on commit e28c9c5

Please sign in to comment.