From e28c9c55e7fa05bd6492aea0d01e8c6c3582e986 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Thu, 6 Apr 2023 21:18:09 -0400 Subject: [PATCH] Adding `hostdevice_span` that is a span createable from `hostdevice_vector` (#12981) I ran into a need for a span-like view into a `hostdevice_vector`. I was chopping it up into pieces to pass into a function to process portions at a time, but it still wanted to do things like host to device on the spans. This class is a result of that need. Authors: - Mike Wilson (https://github.com/hyperbolic2346) - Nghia Truong (https://github.com/ttnghia) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/12981 --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/utilities/hostdevice_span.hpp | 165 +++++++++++++++++++++ cpp/src/io/utilities/hostdevice_vector.hpp | 31 +++- cpp/tests/utilities_tests/span_tests.cu | 151 ++++++++++++++++++- 4 files changed, 340 insertions(+), 9 deletions(-) create mode 100644 cpp/src/io/utilities/hostdevice_span.hpp diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index bcf53159676..fd3fdc74978 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1015,7 +1015,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, const auto num_columns = columns_level.size(); cudf::detail::hostdevice_2dvector chunks( total_num_stripes, num_columns, stream); - memset(chunks.base_host_ptr(), 0, chunks.memory_size()); + memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); const bool use_index = _use_index && diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp new file mode 100644 index 00000000000..4b0dc7f672a --- /dev/null +++ b/cpp/src/io/utilities/hostdevice_span.hpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +template +class hostdevice_span { + public: + using value_type = T; + + hostdevice_span() = default; + ~hostdevice_span() = default; + hostdevice_span(hostdevice_span const&) = default; ///< Copy constructor + hostdevice_span(hostdevice_span&&) = default; ///< Move constructor + + hostdevice_span(T* cpu_data, T* gpu_data, size_t size) + : _size(size), _host_data(cpu_data), _device_data(gpu_data) + { + } + + /** + * @brief Copy assignment operator. + * + * @return Reference to this hostdevice_span. + */ + constexpr hostdevice_span& operator=(hostdevice_span const&) noexcept = default; + + /** + * @brief Converts a hostdevice view into a device span. + * + * @tparam T The device span type. + * @return A typed device span of the hostdevice view's data. + */ + [[nodiscard]] operator cudf::device_span() const + { + return cudf::device_span(_device_data, size()); + } + + /** + * @brief Returns the underlying device data. + * + * @tparam T The type to cast to + * @return T const* Typed pointer to underlying data + */ + [[nodiscard]] T* device_ptr(size_t offset = 0) const noexcept { return _device_data + offset; } + + /** + * @brief Return first element in device data. + * + * @tparam T The desired type + * @return T const* Pointer to the first element + */ + [[nodiscard]] T* device_begin() const noexcept { return device_ptr(); } + + /** + * @brief Return one past the last element in device_data. + * + * @tparam T The desired type + * @return T const* Pointer to one past the last element + */ + [[nodiscard]] T* device_end() const noexcept { return device_begin() + size(); } + + /** + * @brief Converts a hostdevice_span into a host span. + * + * @tparam T The host span type. + * @return A typed host span of the hostdevice_span's data. + */ + [[nodiscard]] operator cudf::host_span() const noexcept + { + return cudf::host_span(_host_data, size()); + } + + /** + * @brief Returns the underlying host data. + * + * @tparam T The type to cast to + * @return T* Typed pointer to underlying data + */ + [[nodiscard]] T* host_ptr(size_t offset = 0) const noexcept { return _host_data + offset; } + + /** + * @brief Return first element in host data. + * + * @tparam T The desired type + * @return T const* Pointer to the first element + */ + [[nodiscard]] T* host_begin() const noexcept { return host_ptr(); } + + /** + * @brief Return one past the last elementin host data. + * + * @tparam T The desired type + * @return T const* Pointer to one past the last element + */ + [[nodiscard]] T* host_end() const noexcept { return host_begin() + size(); } + + /** + * @brief Returns the number of elements in the view + * + * @return The number of elements in the view + */ + [[nodiscard]] std::size_t size() const noexcept { return _size; } + + /** + * @brief Returns true if `size()` returns zero, or false otherwise + * + * @return True if `size()` returns zero, or false otherwise + */ + [[nodiscard]] bool is_empty() const noexcept { return size() == 0; } + + [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } + + [[nodiscard]] T& operator[](size_t i) { return _host_data[i]; } + [[nodiscard]] T const& operator[](size_t i) const { return _host_data[i]; } + + /** + * @brief Obtains a hostdevice_span that is a view over the `count` elements of this + * hostdevice_span starting at offset + * + * @param offset The offset of the first element in the subspan + * @param count The number of elements in the subspan + * @return A subspan of the sequence, of requested count and offset + */ + constexpr hostdevice_span subspan(size_t offset, size_t count) const noexcept + { + return hostdevice_span(_host_data + offset, _device_data + offset, count); + } + + void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false) + { + CUDF_CUDA_TRY( + cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); + if (synchronize) { stream.synchronize(); } + } + + void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false) + { + CUDF_CUDA_TRY( + cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); + if (synchronize) { stream.synchronize(); } + } + + private: + size_t _size{}; ///< Number of elements + T* _device_data{}; ///< Pointer to device memory containing elements + T* _host_data{}; ///< Pointer to host memory containing elements +}; diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 5a551998d21..1591abe4064 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -17,6 +17,7 @@ #pragma once #include "config_utils.hpp" +#include "hostdevice_span.hpp" #include #include @@ -90,7 +91,7 @@ class hostdevice_vector { [[nodiscard]] size_t capacity() const noexcept { return d_data.size(); } [[nodiscard]] size_t size() const noexcept { return current_size; } - [[nodiscard]] size_t memory_size() const noexcept { return sizeof(T) * size(); } + [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } [[nodiscard]] T& operator[](size_t i) { return host_data[i]; } [[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; } @@ -139,17 +140,39 @@ class hostdevice_vector { void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false) { CUDF_CUDA_TRY( - cudaMemcpyAsync(device_ptr(), host_ptr(), memory_size(), cudaMemcpyDefault, stream.value())); + cudaMemcpyAsync(device_ptr(), host_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); if (synchronize) { stream.synchronize(); } } void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false) { CUDF_CUDA_TRY( - cudaMemcpyAsync(host_ptr(), device_ptr(), memory_size(), cudaMemcpyDefault, stream.value())); + cudaMemcpyAsync(host_ptr(), device_ptr(), size_bytes(), cudaMemcpyDefault, stream.value())); if (synchronize) { stream.synchronize(); } } + /** + * @brief Converts a hostdevice_vector into a hostdevice_span. + * + * @return A typed hostdevice_span of the hostdevice_vector's data. + */ + [[nodiscard]] operator hostdevice_span() + { + return hostdevice_span{host_data, d_data.data(), size()}; + } + + /** + * @brief Converts a part of a hostdevice_vector into a hostdevice_span. + * + * @return A typed hostdevice_span of the hostdevice_vector's data. + */ + [[nodiscard]] hostdevice_span subspan(size_t offset, size_t count) + { + CUDF_EXPECTS(count >= offset, "End index cannot be smaller than the starting index."); + CUDF_EXPECTS(count <= d_data.size(), "Slice range out of bounds."); + return hostdevice_span{host_data + offset, d_data.data() + offset, count - offset}; + } + private: std::variant, thrust::host_vector>> h_data_owner; @@ -207,7 +230,7 @@ class hostdevice_2dvector { T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } - size_t memory_size() const noexcept { return _data.memory_size(); } + size_t size_bytes() const noexcept { return _data.size_bytes(); } void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false) { diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index 66f9fbfc0d6..43ca05644ff 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -40,12 +40,19 @@ using cudf::detail::host_2dspan; using cudf::detail::hostdevice_2dvector; template -void expect_equivolent(host_span a, host_span b) +void expect_equivalent(host_span a, host_span b) { EXPECT_EQ(a.size(), b.size()); EXPECT_EQ(a.data(), b.data()); } +template +void expect_equivalent(hostdevice_span a, hostdevice_span b) +{ + EXPECT_EQ(a.size(), b.size()); + EXPECT_EQ(a.host_ptr(), b.host_ptr()); +} + template void expect_match(Iterator1 expected, size_t expected_size, host_span input) { @@ -61,10 +68,16 @@ void expect_match(std::string expected, host_span input) return expect_match(expected.begin(), expected.size(), input); } -std::string const hello_wold_message = "hello world"; +template +void expect_match(std::string expected, hostdevice_span input) +{ + return expect_match(expected.begin(), expected.size(), host_span(input)); +} + +std::string const hello_world_message = "hello world"; std::vector create_hello_world_message() { - return std::vector(hello_wold_message.begin(), hello_wold_message.end()); + return std::vector(hello_world_message.begin(), hello_world_message.end()); } class SpanTest : public cudf::test::BaseFixture { @@ -75,7 +88,7 @@ TEST(SpanTest, CanCreateFullSubspan) auto message = create_hello_world_message(); auto const message_span = host_span(message.data(), message.size()); - expect_equivolent(message_span, message_span.subspan(0, message_span.size())); + expect_equivalent(message_span, message_span.subspan(0, message_span.size())); } TEST(SpanTest, CanTakeFirst) @@ -310,4 +323,134 @@ TEST(MdSpanTest, CanGetCount) EXPECT_EQ(device_2dspan{vector}.count(), 11ul * 23); } +auto get_test_hostdevice_vector() +{ + auto v = hostdevice_vector(0, 11, cudf::get_default_stream()); + for (auto c : create_hello_world_message()) { + v.push_back(c); + } + + return v; +} + +TEST(HostDeviceSpanTest, CanCreateFullSubspan) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + expect_equivalent(message_span, message.subspan(0, message_span.size())); +} + +TEST(HostDeviceSpanTest, CanCreateHostSpan) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = host_span(message.host_ptr(), message.size()); + auto const hd_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + expect_equivalent(message_span, cudf::host_span(hd_span)); +} + +TEST(HostDeviceSpanTest, CanTakeSubspanFull) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + expect_match("hello world", message_span.subspan(0, 11)); +} + +TEST(HostDeviceSpanTest, CanTakeSubspanPartial) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + expect_match("lo w", message_span.subspan(3, 4)); +} + +TEST(HostDeviceSpanTest, CanGetData) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + EXPECT_EQ(message.host_ptr(), message_span.host_ptr()); +} + +TEST(HostDeviceSpanTest, CanGetSize) +{ + auto message = get_test_hostdevice_vector(); + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + auto const empty_span = hostdevice_span(); + + EXPECT_EQ(static_cast(11), message_span.size()); + EXPECT_EQ(static_cast(0), empty_span.size()); +} + +TEST(HostDeviceSpanTest, CanGetSizeBytes) +{ + auto doubles = std::vector({6, 3, 2}); + auto doubles_hdv = hostdevice_vector(0, 3, cudf::get_default_stream()); + for (auto d : doubles) { + doubles_hdv.push_back(d); + } + auto const doubles_span = hostdevice_span(doubles_hdv); + auto const empty_span = hostdevice_span(); + + EXPECT_EQ(static_cast(24), doubles_span.size_bytes()); + EXPECT_EQ(static_cast(0), empty_span.size_bytes()); +} + +TEST(HostDeviceSpanTest, CanCopySpan) +{ + auto message = get_test_hostdevice_vector(); + hostdevice_span message_span_copy; + + { + auto const message_span = + hostdevice_span(message.host_ptr(), message.device_ptr(), message.size()); + + message_span_copy = message_span; + } + + EXPECT_EQ(message.host_ptr(), message_span_copy.host_ptr()); + EXPECT_EQ(message.device_ptr(), message_span_copy.device_ptr()); + EXPECT_EQ(message.size(), message_span_copy.size()); +} + +TEST(HostDeviceSpanTest, CanSendToDevice) +{ + auto message = get_test_hostdevice_vector(); + + message.host_to_device(cudf::get_default_stream(), true); + + char d_message[12]; + cudaMemcpy(d_message, message.device_ptr(), 11, cudaMemcpyDefault); + d_message[11] = '\0'; + + EXPECT_EQ(11, strlen(d_message)); + EXPECT_EQ(std::string(d_message), hello_world_message); +} + +__global__ void simple_device_char_kernel(device_span result) +{ + const char* str = "world hello"; + for (int offset = 0; offset < result.size(); ++offset) { + result.data()[offset] = str[offset]; + } +} + +TEST(HostDeviceSpanTest, CanGetFromDevice) +{ + auto message = get_test_hostdevice_vector(); + message.host_to_device(cudf::get_default_stream(), true); + simple_device_char_kernel<<<1, 1, 0, cudf::get_default_stream()>>>(message); + + message.device_to_host(cudf::get_default_stream(), true); + expect_match("world hello", hostdevice_span(message)); +} + CUDF_TEST_PROGRAM_MAIN()